In [1]:
import numpy as np
import requests
from bs4 import BeautifulSoup
import pandas as pd
import altair as alt
import geopandas as gpd
import matplotlib.pyplot as plt
import json
import altair as alt
import dtmapi
import statsmodels.api as sm
from statsmodels.formula.api import ols
from fuzzywuzzy import fuzz, process



### IOM DTM

In [2]:
idp_admin_data = dtmapi.get_idp_admin2_data(CountryName='Sudan',  FromReportingDate='2021-01-01', ToReportingDate='2025-01-01', to_pandas=True)

In [3]:
# read project/data_analysis/sudan/DTM_sdn_SMU - Bi Weekly 13 _ 17122024_V02_Public.xlsx sheet MASTER LIST (ADMIN1)
idp_adm1_2024 = pd.read_excel('sudan/DTM_sdn_SMU - Bi Weekly 13 _ 17122024_V02_Public.xlsx', sheet_name='MASTER LIST (ADMIN1)', skiprows=2)
#idp_adm2_2024 = pd.read_excel('sudan/DTM_sdn_SMU - Bi Weekly 13 _ 17122024_V02_Public.xlsx', sheet_name='MASTER LIST (ADMIN2)', skiprows=2)

idp_adm1_2023 = pd.read_excel('sudan/DTM_sdn_Report_Snapshot 13_20231212_V02_Public.xlsx', sheet_name='MASTER LIST (ADMIN1)', skiprows=2)
#idp_adm2_2023 = pd.read_excel('sudan/DTM_sdn_Report_Snapshot 13_20231212_V02_Public.xlsx', sheet_name='MASTER LIST (ADMIN2)', skiprows=2)

rename_dict = {
    'STATE OF DISPLACEMET': 'adm1_name',
    'STATE CODE': 'adm1_pcode',
    'LOCALITY OF DISPLACEMENT': 'adm2_name',
    'LOCALITY_CODE': 'adm2_pcode',
    'LOCALITY_ CODE': 'adm2_pcode',
    'HHs': 'affected_idps_hh',
    'IDPs': 'affected_idps_ind'
}

idp_adm1_2023.rename(columns=rename_dict, inplace=True)
#idp_adm2_2023.rename(columns=rename_dict, inplace=True)
idp_adm1_2024.rename(columns=rename_dict, inplace=True)
#idp_adm2_2024.rename(columns=rename_dict, inplace=True)

In [4]:
idp_origin_adm1_2024 = idp_adm1_2024.iloc[1:]
#idp_origin_adm2_2024 = idp_adm2_2024.iloc[1:]
idp_origin_adm1_2023 = idp_adm1_2023.iloc[1:]
#idp_origin_adm2_2023 = idp_adm2_2023.iloc[1:]

In [5]:
state_columns = [' Aj Jazirah', ' Central Darfur', ' East Darfur', ' Khartoum',
       ' North Darfur', ' North Kordofan', ' South Darfur', ' South Kordofan',
       ' West Darfur', ' West Kordofan', ' White Nile']

nationality_columns = [' SUDANESE', ' NON SUDANESE']

idp_adm1_2023_state = idp_origin_adm1_2023.melt(
        id_vars=['adm1_name', 'adm1_pcode',  'affected_idps_ind', 'affected_idps_hh'],
        value_vars=state_columns,
        var_name='state_origin',
        value_name='affected_idps_state'
    )

In [6]:
state_columns = ['Aj Jazirah', 'Blue Nile', 'Central Darfur', 'East Darfur', 'Gedaref',
       'Kassala', 'Khartoum', 'North Darfur', 'North Kordofan', 'Northern',
       'Red Sea', 'River Nile', 'Sennar', 'South Darfur', 'South Kordofan',
       'West Darfur', 'West Kordofan', 'White Nile']

idp_adm1_2024_state = idp_origin_adm1_2024.melt(
        id_vars=['adm1_name', 'adm1_pcode',  'affected_idps_ind', 'affected_idps_hh'],
        value_vars=state_columns,
        var_name='state_origin',
        value_name='affected_idps_state'
    )

In [7]:
#filter by adm1_name grand total
idp_origin_2023 = idp_adm1_2023_state[idp_adm1_2023_state['adm1_name'] == 'Grand Total']
idp_origin_2024 = idp_adm1_2024_state[idp_adm1_2024_state['adm1_name'] == 'Grand Total']

### IPC 

GeoJson data

In [8]:
ipc = requests.get('https://api.ipcinfo.org/analyses?country=SD&type=A&periods=C&key=3bdbeef9-fc4c-4315-a665-cc56deb11be6').json()
ipc

[{'id': '14192889',
  'title': 'Sudan - Acute Food Insecurity July 2019',
  'link': 'http://www.ipcinfo.org/ipc-country-analysis/details-map/en/c/1152151/?iso3=SDN',
  'country': 'SD',
  'year': 2019,
  'condition': 'A',
  'created': '2020-03-31',
  'modified': '2025-01-03'},
 {'id': '18151797',
  'title': 'Sudan - Acute Food Insecurity June 2020',
  'link': 'http://www.ipcinfo.org/ipc-country-analysis/details-map/en/c/1152718/',
  'country': 'SD',
  'year': 2020,
  'condition': 'A',
  'created': '2020-06-30',
  'modified': '2025-01-03'},
 {'id': '19191589',
  'title': 'Sudan - Acute Food Insecurity November 2020 (Proj Update)',
  'link': 'http://www.ipcinfo.org/ipc-country-analysis/details-map/en/c/1152920/?iso3=SDN',
  'country': 'SD',
  'year': 2020,
  'condition': 'A',
  'created': '2020-11-17',
  'modified': '2025-01-03'},
 {'id': '24004104',
  'title': 'Sudan - Acute Food Insecurity April 2021',
  'link': 'http://www.ipcinfo.org/ipc-country-analysis/details-map/en/c/1154879/?iso3

In [9]:
#including obly current data and no projections
for item in ipc:
    response = requests.get(f"https://api.ipcinfo.org/areas/{item['id']}/C?format=geojson&key=3bdbeef9-fc4c-4315-a665-cc56deb11be6")
    if response.status_code == 200:
        try:
            ipc_geoson = response.json()
            with open(f"ipc_geoson_{item['id']}.json", 'w') as f:
                json.dump(ipc_geoson, f)
        except json.JSONDecodeError as e:
            print(f"Error decoding JSON for item {item['id']}: {e}")
        except Exception as e:
            print(f"Error saving file for item {item['id']}: {e}")
    else:
        print(f"Error fetching data for item {item['id']}: HTTP {response.status_code}")

Error fetching data for item 19191589: HTTP 404
Error fetching data for item 64768802: HTTP 404
Error fetching data for item 74795267: HTTP 404


In [10]:
ipc_projection = requests.get('https://api.ipcinfo.org/areas/74795267/P?format=geojson&key=3bdbeef9-fc4c-4315-a665-cc56deb11be6')


try:
    ipc_projection_json = ipc_projection.json()
    with open('ipc_projection_74795267_2024_proj.json', 'w') as f:
        json.dump(ipc_projection_json, f)
except json.JSONDecodeError as e:
    print(f"Error decoding JSON for ipc_projection: {e}")
except Exception as e:
    print(f"Error saving file for ipc_projection: {e}")

In [11]:
ipc_2024 = requests.get(f"https://api.ipcinfo.org/areas/68887616/C?format=json&key=3bdbeef9-fc4c-4315-a665-cc56deb11be6").json()
ipc_2024_proj = requests.get(f"https://api.ipcinfo.org/areas/68887616/C?format=json&key=3bdbeef9-fc4c-4315-a665-cc56deb11be6").json()

In [12]:
# read project/data_analysis/sudan/ipc_sdn_area_long.csv
ipc_sdn_adm2 = pd.read_csv('sudan/ipc_sdn_area_long.csv')
ipc_sdn_adm2 = ipc_sdn_adm2.iloc[1:]
ipc_sdn_adm1 = pd.read_csv('sudan/ipc_sdn_level1_long.csv')
ipc_sdn_adm1 = ipc_sdn_adm1.iloc[1:]

In [13]:
# filter ipc_sdn_adm2 by Validity period current and year containing 2024 or 2023
ipc_sdn_adm2 = ipc_sdn_adm2[(ipc_sdn_adm2['Validity period'] == 'current') & (ipc_sdn_adm2['Date of analysis'].str.contains('2024')) | 
                            (ipc_sdn_adm2['Validity period'] == 'current') & (ipc_sdn_adm2['Date of analysis'].str.contains('2023'))]
ipc_sdn_adm2 = ipc_sdn_adm2[ipc_sdn_adm2['Phase'] == '3+']


# filter ipc_sdn_adm2 by Validity period current and year containing 2024 or 2023
ipc_sdn_adm1 = ipc_sdn_adm1[(ipc_sdn_adm1['Validity period'] == 'current') & (ipc_sdn_adm1['Date of analysis'].str.contains('2024')) | 
                            (ipc_sdn_adm1['Validity period'] == 'current') & (ipc_sdn_adm1['Date of analysis'].str.contains('2023'))]
ipc_sdn_adm1 = ipc_sdn_adm1[ipc_sdn_adm1['Phase'] == '3+']

In [14]:
ipc_sdn_adm2 = ipc_sdn_adm2[~ipc_sdn_adm2['Area'].str.contains('IDPs')]
ipc_sdn_adm2 = ipc_sdn_adm2[~ipc_sdn_adm2['Area'].str.contains('IDP')]
ipc_sdn_adm2 = ipc_sdn_adm2[~ipc_sdn_adm2['Area'].str.contains('Refugees')]
ipc_sdn_adm2 = ipc_sdn_adm2[~ipc_sdn_adm2['Area'].str.contains('refugees')]


ipc_sdn_adm1 = ipc_sdn_adm1[~ipc_sdn_adm1['Level 1'].str.contains('IDPs')]
ipc_sdn_adm1 = ipc_sdn_adm1[~ipc_sdn_adm1['Level 1'].str.contains('IDP')]
ipc_sdn_adm1 = ipc_sdn_adm1[~ipc_sdn_adm1['Level 1'].str.contains('Refugees')]
ipc_sdn_adm1 = ipc_sdn_adm1[~ipc_sdn_adm1['Level 1'].str.contains('refugees')]

### ACLED

In [15]:
acled = pd.read_csv('sudan/ACLED_2020-01-01-2024-12-31-Sudan.csv')

In [16]:
acled = acled[acled['year'] != 2020]
acled['event_date'] = pd.to_datetime(acled['event_date'])
acled['month_year'] = acled['event_date'].dt.to_period('M')
acled['week'] = acled['event_date'].dt.to_period('W').apply(lambda r: r.start_time)

In [17]:
#Monthly Data
acled_monthly= acled.groupby(['month_year']).agg({'fatalities':'sum', 'event_date':'count'}).reset_index()

#remove riots
#acled_monthly = acled_monthly[acled_monthly['event_type'] != 'Riots']
#acled_monthly['event_type'] = acled_monthly['event_type'].replace('Explosions/Remote violence', 'Explosions')

acled_monthly.to_csv('sudan/acled_monthly.csv', index=False)
acled_monthly['month_year'] = acled_monthly['month_year'].astype(str)

#Weekly Data
acled_weekly = acled.groupby(['week', 'event_type']).agg({'fatalities':'sum', 'event_date':'count'}).reset_index()
acled_weekly['week'] = acled_weekly['week'].astype(str)

In [18]:
# Line plot of total fatalities by month
line = alt.Chart(acled_monthly).mark_line(color='red').encode(
    x='month_year:O',
    y='fatalities:Q',
    tooltip=['month_year', 'fatalities']
).properties(
    title='Total Fatalities and Incidents by Month'
).interactive()

# Bar chart of total incidents by month
bar = alt.Chart(acled_monthly).mark_bar(color='blue').encode(
    x='month_year:O',
    y='event_date:Q',
    tooltip=['month_year', 'event_date']
).interactive()

# Combine both charts into one with a single y-axis
combined_chart = alt.layer(bar, line)

combined_chart


### COMBINED STUFF

### ACLED + IPC

In [19]:
acled_admin1 = acled[(acled['month_year'] >= '2023-04') ]
acled_admin1 = acled_admin1.groupby('admin1').agg({'fatalities': 'sum', 'event_id_cnty': 'count'}).reset_index()
acled_admin1.reset_index(drop=True, inplace=True)

acled_admin2 = acled[(acled['month_year'] >= '2023-04')]
acled_admin2= acled_admin2.groupby('admin2').agg({'fatalities': 'sum', 'event_id_cnty': 'count'}).reset_index()
acled_admin2.reset_index(drop=True, inplace=True)

In [20]:
ipc_sdn_adm2_2024 = ipc_sdn_adm2[ipc_sdn_adm2['Date of analysis'].str.contains('Apr 2024')]

In [21]:
# Compare admin2 values in acled_admin2 and admin2Name values in idp_admin2_data
acled_admin_values = set(acled_admin2['admin2'])
ipc_admin_values = set(ipc_sdn_adm2_2024['Area'])

# Find common and unique values
common_admin = acled_admin_values.intersection(ipc_admin_values)
unique_to_acled = list(acled_admin_values.difference(ipc_admin_values))
unique_to_ipc = list(ipc_admin_values.difference(acled_admin_values))

print("Common admin2 values:", common_admin)
print("Unique to ACLED admin2 values:", unique_to_acled)
print("Unique to IPC admin2 values:", unique_to_ipc)

Common admin2 values: {'Sinja', 'Atbara', 'Kadugli', 'Heiban', 'Delami', 'Sennar', 'Sawakin', 'Dilling', 'Assalaya', 'Haya', 'Babanusa', 'Beida', 'Kereneik', 'Tendalti', 'Keilak', 'Basundah', 'Shendi', 'Kulbus', 'Dongola', 'Halfa', 'Abassiya', 'Geisan', 'An Nuhud', 'Sheikan', 'Tawkar', 'Yassin', 'Sirba', 'Adila', 'Delgo', 'Kosti', 'Dordieb', 'Barbar', 'Rabak', 'Shiaria'}
Unique to ACLED admin2 values: ['Tulus', 'Al Lait', 'Kateila', 'Nyala Janoub', 'Um Kadadah', 'Janub Al Jazirah', 'As Suki', 'Karrari', 'Um Dam Haj Ahmed', 'Damso', 'Melit', 'Al Fasher', 'Gereida', 'Omdurman', 'Um Bada', 'Reifi Telkok', 'Dinder', 'Um Baru', 'Ed Damazine', 'Soudari', 'Kelemando', 'Rashad', 'Bahri', 'Bendasi', 'Mershing', 'Wad Al Mahi', 'Abyei', 'Al Gitaina', 'Jebel Moon', 'Kernoi', 'Mukjar', 'Tawila', 'Um Rimta', 'Jubayt Elmaaadin', 'Khartoum', 'El Geneina', 'Abu Karinka', 'Al Buhaira', 'Al Quoz', 'Rehaid Albirdi', 'Al Khiwai', 'Al Malha', 'Gebrat Al Sheikh', 'Saraf Omra', 'As Sunut', 'Al Idia', 'Al Fird

In [22]:
# Mapping based on similarity
def map_admin2(acled_list, idp_list, threshold=80):
    mapping = {}
    for idp in idp_list:
        match, score = process.extractOne(idp, acled_list, scorer=fuzz.ratio)
        if score >= threshold:
            mapping[idp] = match
        else:
            mapping[idp] = None
    return mapping

mapping_result = map_admin2(unique_to_acled, unique_to_ipc)

# Display mapping results
#for idp, acled in mapping_result.items():
    #print(f"{idp} -> {acled if acled else 'No match found'}")

#apply mapping to acled_admin2
ipc_sdn_adm2_2024['Area'] = ipc_sdn_adm2_2024['Area'].replace(mapping_result)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ipc_sdn_adm2_2024['Area'] = ipc_sdn_adm2_2024['Area'].replace(mapping_result)


In [23]:
#Adm 1
acled_admin1['admin1'] = acled_admin1['admin1'].replace('Al Jazirah', 'Aj Jazirah')
ipc_sdn_adm1_2024 = ipc_sdn_adm1[ipc_sdn_adm1['Date of analysis'].str.contains('Apr 2024')]

acled_ipc_adm1 = pd.merge(acled_admin1, ipc_sdn_adm1_2024, left_on=['admin1'], right_on=['Level 1'], how='inner')

In [24]:
#Adm 2
acled_admin2['admin2'] = acled_admin2['admin2'].replace(mapping_result)
ipc_sdn_adm2_2024 = ipc_sdn_adm2[ipc_sdn_adm2['Date of analysis'].str.contains('Apr 2024')]

acled_ipc_adm2 = pd.merge(acled_admin2, ipc_sdn_adm2_2024, left_on=['admin2'], right_on=['Area'], how='inner')

In [25]:
#correlation between fatalities and Percentage
acled_ipc_adm1['fatalities'].corr(acled_ipc_adm1['Number'])

0.6468157976019642

In [26]:
acled_ipc_adm2['fatalities'].corr(acled_ipc_adm2['Number'])

0.5825001498104534

In [27]:
# Create the chart with the selection and regression line
chart = alt.Chart(acled_ipc_adm1).mark_circle().encode(
    x=alt.X('fatalities:Q', scale=alt.Scale(type='log')),
    y=alt.Y('Number:Q', scale=alt.Scale(type='log')),
    color='admin1:N',
    size=alt.Size('fatalities:Q', scale=alt.Scale(range=[10, 100])),
    tooltip=['admin1', 'fatalities', 'Number']
).properties(
    title='Fatalities vs IPC by Admin1 in Sudan'
).interactive()

#chart

In [28]:
#save acled_ipc_adm2 to csv
acled_ipc_adm2.to_csv('sudan/acled_ipc_adm2.csv', index=False)

In [29]:
# Create the chart with the selection and regression line
chart = alt.Chart(acled_ipc_adm2).mark_circle().encode(
    x=alt.X('event_id_cnty:Q', scale=alt.Scale(type='log'), title='Incidents (Log)'),
    y=alt.Y('Number:Q', scale=alt.Scale(type='log'), title='People in IPC Phase 3+ (Log)'),
    color=alt.Color('Level 1:N', legend=alt.Legend(title="Admin 1 Area")),
    size=alt.Size('fatalities:Q'),
    tooltip=['admin2', 'fatalities', 'Number']
).properties(
    title='Armed Violence and acute food insecurity'
).interactive()

chart.to_json()

with open('CC9_fig1.json', 'w') as f:
    f.write(chart)

### ACLED + IOM

In [30]:
acled_admin1 = acled[(acled['month_year'] >= '2023-04') ]
acled_admin1 = acled_admin1.groupby('admin1').agg({'fatalities': 'sum', 'event_id_cnty': 'count'}).reset_index()
acled_admin1.reset_index(drop=True, inplace=True)

acled_admin2 = acled[(acled['month_year'] >= '2023-04')]
acled_admin2= acled_admin2.groupby('admin2').agg({'fatalities': 'sum', 'event_id_cnty': 'count'}).reset_index()
acled_admin2.reset_index(drop=True, inplace=True)

In [31]:
# Compare admin2 values in acled_admin2 and admin2Name values in idp_admin2_data
acled_admin2_values = set(acled_admin2['admin2'])
idp_admin2_values = set(idp_admin_data['admin2Name'])

# Find common and unique values
common_admin2 = acled_admin2_values.intersection(idp_admin2_values)
unique_to_acled = acled_admin2_values.difference(idp_admin2_values)
unique_to_idp = idp_admin2_values.difference(acled_admin2_values)

print("Common admin2 values:", common_admin2)
print("Unique to ACLED admin2 values:", unique_to_acled)
print("Unique to IDP admin2 values:", unique_to_idp)

Common admin2 values: {'Tulus', 'Al Lait', 'Sawakin', 'Kateila', 'Nyala Janoub', 'Um Kadadah', 'Janub Al Jazirah', 'As Suki', 'Tendalti', 'Karrari', 'Um Dam Haj Ahmed', 'Damso', 'Melit', 'Al Fasher', 'Gereida', 'Um Bada', 'Reifi Telkok', 'Um Baru', 'Ed Damazine', 'Soudari', 'Kelemando', 'Bahri', 'Bendasi', 'Mershing', 'Wad Al Mahi', 'Abyei', 'Al Gitaina', 'Jebel Moon', 'Kernoi', 'Mukjar', 'Sheikan', 'Tawila', 'Um Rimta', 'Tawkar', 'Yassin', 'Khartoum', 'Abu Karinka', 'Al Buhaira', 'Atbara', 'Sinja', 'Al Quoz', 'Rehaid Albirdi', 'Al Khiwai', 'Al Malha', 'Babanusa', 'Haya', 'Gebrat Al Sheikh', 'Saraf Omra', 'As Sunut', 'Al Idia', 'Al Firdous', 'Sharg Aj Jabal', 'Kubum', 'Bara', 'Ad Diwaim', 'Al Hasahisa', 'Al Butanah', 'Abu Kershola', 'Kas', 'Sharg An Neel', 'Port Sudan', 'Ghubaish', 'Um Algura', 'Al Golid', 'Reifi Kassla', 'Wasat Al Gedaref', 'Al Dibab', 'Al Matama', 'Wad Bandah', 'Al Lagowa', 'Al Fao', 'Kadugli', 'As Serief', 'Al Radoum', 'Um Dukhun', 'Reifi Gharb Kassala', 'Reifi Arom

In [32]:
# Mapping based on similarity
def map_admin2(acled_list, idp_list, threshold=80):
    mapping = {}
    for idp in idp_list:
        if isinstance(idp, float) and pd.isna(idp):
            continue
        idp = str(idp)
        match, score = process.extractOne(idp, [str(a) for a in acled_list], scorer=fuzz.ratio)
        if score >= threshold:
            mapping[idp] = match
        else:
            mapping[idp] = None
    return mapping

mapping_result = map_admin2(unique_to_acled, unique_to_idp)

# Display mapping results
#for idp, acled in mapping_result.items():
    #print(f"{idp} -> {acled if acled else 'No match found'}")
#mapping_result


In [33]:
#apply mapping to acled_admin2
idp_admin_data['admin2Name'] = idp_admin_data['admin2Name'].replace(mapping_result)

In [34]:
idp_admin2_2024 = idp_admin_data[(idp_admin_data['yearReportingDate'] == 2024) & (idp_admin_data['monthReportingDate'] == 12) & (idp_admin_data['operation'] == 'Armed Clashes in Sudan (Overview)') & (idp_admin_data['roundNumber'] == 18)]
idp_admin2_2023 = idp_admin_data[(idp_admin_data['yearReportingDate'] == 2023) & (idp_admin_data['monthReportingDate'] == 12) & (idp_admin_data['operation'] == 'Armed Clashes in Sudan (Overview)')]
idp_admin2_2022 = idp_admin_data[(idp_admin_data['yearReportingDate'] == 2022) & (idp_admin_data['monthReportingDate'] == 3) & (idp_admin_data['operation'] == 'Darfur conflict')]
idp_admin2_2021 = idp_admin_data[(idp_admin_data['yearReportingDate'] == 2021) & (idp_admin_data['monthReportingDate'] == 12) & (idp_admin_data['operation'] == 'Darfur conflict')]

idp_admin2 = pd.concat([idp_admin2_2024, idp_admin2_2023, idp_admin2_2022, idp_admin2_2021])

In [35]:
idp_admin2['admin2Name'] = idp_admin2['admin2Name'].replace(mapping_result)

In [36]:
idp_admin2_yearly = idp_admin2.groupby(['yearReportingDate']).agg({'numPresentIdpInd':'sum'}).reset_index()
idp_admin2_yearly

Unnamed: 0,yearReportingDate,numPresentIdpInd
0,2021,3086553
1,2022,3779487
2,2023,9052822
3,2024,11532774


In [37]:
idp_origin_2023 = idp_origin_2023.drop(columns=['adm1_name', 'adm1_pcode', 'affected_idps_ind', 'affected_idps_hh'])
idp_origin_2024 = idp_origin_2024.drop(columns=['adm1_name', 'adm1_pcode', 'affected_idps_ind', 'affected_idps_hh'])

idp_origin_2023['state_origin'] = idp_origin_2023['state_origin'].str.strip()

In [38]:
# merge idp_adm1_2023 with idp_acled_adm1_2023 on adm1_name and state_origin
acled_idp_2024_origin = pd.merge(idp_origin_2024, acled_admin1, left_on=['state_origin'], right_on=['admin1'], how='inner')
acled_idp_2024 = pd.merge(idp_admin2_2024, acled_admin2, left_on=['admin2Name'], right_on=['admin2'], how='inner')

In [47]:
alt.Chart(acled_idp_2024).mark_circle().encode(
    x=alt.X('event_id_cnty:Q', scale=alt.Scale(type='log')),
    y=alt.Y('numPresentIdpInd:Q', scale=alt.Scale(type='log')),
    size='fatalities:Q'
).properties(
    title='Total Fatalities by Month'
).interactive()




In [40]:
acled_idp_2024['numPresentIdpInd'].corr(acled_idp_2024['fatalities'])

0.20483374160946596

In [41]:
#save as csv
acled_idp_2024.to_csv('sudan/acled_idp_2024.csv', index=False)

In [42]:
#remove admin2Name reifi gharb kassala  and Jubayt Elmaaadin
#idp_acled = idp_acled[idp_acled['admin2Name'] != 'Reifi Gharb Kassala']
#idp_acled = idp_acled[idp_acled['admin2Name'] != 'Jubayt Elmaaadin']

In [43]:
alt.Chart(acled_idp_2024_origin).mark_circle().encode(
    x=alt.X('event_id_cnty:Q', scale=alt.Scale(type='log')),
    y=alt.Y('affected_idps_state:Q', scale=alt.Scale(type='log')),
    size='fatalities:Q'
).properties(
    title='Total Fatalities by Month'
).interactive()


In [44]:
acled_idp_2024_origin['affected_idps_state'].corr(acled_idp_2024_origin['fatalities'])

0.8294799756613697

In [45]:
acled_idp_2024_origin
#save as csv
acled_idp_2024_origin.to_csv('sudan/acled_idp_2024_origin.csv', index=False)