In [759]:
import numpy as np
import requests
from bs4 import BeautifulSoup
import pandas as pd
import altair as alt
import geopandas as gpd
import matplotlib.pyplot as plt
import json
import altair as alt
import dtmapi
import statsmodels.api as sm
from statsmodels.formula.api import ols
from fuzzywuzzy import fuzz, process
from hdx.utilities.easy_logging import setup_logging
from hdx.api.configuration import Configuration
from hdx.data.dataset import Dataset
import os

### IOM DTM

In [760]:
datasets_ipc = Dataset.read_from_hdx("sudan-displacement-situation-countrywide-idps-iom-dtm")
resources = datasets_ipc.get_resources()

for res in resources:
    url, path = res.download('sudan/')
    print(f"Resource URL {url} downloaded to {path}")

Resource URL https://data.humdata.org/dataset/44594ae2-dde9-417f-acae-523bc012c162/resource/d641cae6-ba35-416d-9a4f-731d886315d6/download/dtm_sdn_smu-bi-weekly-13-_-17122024_v02_public_hdx.xlsx downloaded to sudan/Sudan - Countrywide Mobility Update 13 (Bi-Weekly)2.xlsx
Resource URL https://data.humdata.org/dataset/44594ae2-dde9-417f-acae-523bc012c162/resource/4ea6ae19-4080-405b-bf8a-3003e8ed5341/download/dtm_sdn_smu-bi-weekly-12-_-03122024_v02_public_hdx.xlsx downloaded to sudan/Sudan - Countrywide Mobility Update 12 (Bi-Weekly)2.xlsx
Resource URL https://data.humdata.org/dataset/44594ae2-dde9-417f-acae-523bc012c162/resource/2a0a6ab7-722a-4aea-bd31-dd0d949ea7dd/download/dtm_sdn_smu-bi-weekly-11-_-20112024_v02_public_hdx.xlsx downloaded to sudan/Sudan - Countrywide Mobility Update 11 (Bi-Weekly)2.xlsx
Resource URL https://data.humdata.org/dataset/44594ae2-dde9-417f-acae-523bc012c162/resource/7a7d1e2f-6411-4e0a-b51c-f1a05a1ed5ca/download/dtm_sdn_smo-bi-monthly-003-_06112024_v02_public_h

In [761]:
idp_admin_data = dtmapi.get_idp_admin2_data(CountryName='Sudan',  FromReportingDate='2021-01-01', ToReportingDate='2025-01-01', to_pandas=True)

In [762]:
# read data 
idp_adm1_2024 = pd.read_excel('sudan/dtm/Sudan - Countrywide Mobility Update 13 (Bi-Weekly).xlsx', sheet_name='MASTER LIST (ADMIN1)', skiprows=2)
idp_adm2_2024 = pd.read_excel('sudan/dtm/Sudan - Countrywide Mobility Update 13 (Bi-Weekly).xlsx', sheet_name='MASTER LIST (ADMIN2)', skiprows=2)

rename_dict = {
    'STATE OF DISPLACEMET': 'adm1_name',
    'STATE CODE': 'adm1_pcode',
    'LOCALITY OF DISPLACEMENT': 'adm2_name',
    'LOCALITY_CODE': 'adm2_pcode',
    'LOCALITY_ CODE': 'adm2_pcode',
    'HHs': 'affected_idps_hh',
    'IDPs': 'affected_idps_ind'
}


idp_adm1_2024.rename(columns=rename_dict, inplace=True)
#idp_adm1_2023.rename(columns=rename_dict, inplace=True)
#idp_adm2_2023.rename(columns=rename_dict, inplace=True)
#idp_adm2_2024.rename(columns=rename_dict, inplace=True)

In [763]:
idp_origin_adm1_2024 = idp_adm1_2024.iloc[1:]
#idp_origin_adm1_2023 = idp_adm1_2023.iloc[1:]
#idp_origin_adm2_2023 = idp_adm2_2023.iloc[1:]
#idp_origin_adm2_2024 = idp_adm2_2024.iloc[1:]

In [764]:
idp_origin_adm1_2024.loc[19] = idp_origin_adm1_2024.iloc[:, 2:].sum()
idp_origin_adm1_2024
idp_origin_adm1_2024.loc[19, 'adm1_name'] = 'Total'

In [765]:
state_columns = idp_adm1_2024.columns[4:-2].tolist()
idp_adm1_2024_state = idp_origin_adm1_2024.melt(
        id_vars=['adm1_name', 'adm1_pcode',  'affected_idps_ind', 'affected_idps_hh'],
        value_vars=state_columns,
        var_name='state_origin',
        value_name='affected_idps_state'
    )

In [766]:
idp_origin_2024 = idp_adm1_2024_state[idp_adm1_2024_state['adm1_name'] == 'Total']

### IPC 

GeoJson data

In [767]:


api_key = '3bdbeef9-fc4c-4315-a665-cc56deb11be6'
base_url = 'https://api.ipcinfo.org/'

# Create directory if it doesn't exist
os.makedirs('sudan/ipc', exist_ok=True)

# Fetch IPC analyses
ipc = requests.get(f'{base_url}analyses?country=SD&type=A&periods=C&key={api_key}').json()

for item in ipc:
    # Try fetching current data ('C') first
    response = requests.get(f"{base_url}areas/{item['id']}/C?format=geojson&key={api_key}")
    
    if response.status_code == 200:
        save_name = f"sudan/ipc/ipc_geojson_C_{item['id']}.json"
    else:
        print(f"Current data not available for item {item['id']}, fetching projections (P)")
        response = requests.get(f"{base_url}areas/{item['id']}/P?format=geojson&key={api_key}")
        save_name = f"sudan/ipc/ipc_geojson_P_{item['id']}.json"
    
    if response.status_code == 200:
        try:
            ipc_geojson = response.json()
            with open(save_name, 'w') as f:
                json.dump(ipc_geojson, f)
            print(f"Saved {save_name}")
        except json.JSONDecodeError as e:
            print(f"Error decoding JSON for item {item['id']}: {e}")
        except Exception as e:
            print(f"Error saving file for item {item['id']}: {e}")
    else:
        print(f"Failed to fetch data for item {item['id']}: HTTP {response.status_code}")


Saved sudan/ipc/ipc_geojson_C_14192889.json
Saved sudan/ipc/ipc_geojson_C_18151797.json
Current data not available for item 19191589, fetching projections (P)
Saved sudan/ipc/ipc_geojson_P_19191589.json
Saved sudan/ipc/ipc_geojson_C_24004104.json
Saved sudan/ipc/ipc_geojson_C_25857808.json
Saved sudan/ipc/ipc_geojson_C_58836462.json
Current data not available for item 64768802, fetching projections (P)
Failed to fetch data for item 64768802: HTTP 404
Saved sudan/ipc/ipc_geojson_C_68887616.json
Current data not available for item 74795267, fetching projections (P)
Saved sudan/ipc/ipc_geojson_P_74795267.json


In [768]:
#ipc_2024 = requests.get(f"https://api.ipcinfo.org/areas/68887616/C?format=json&key=").json()
#ipc_2024_proj = requests.get(f"https://api.ipcinfo.org/areas/68887616/C?format=json&key=").json()

HDX API

In [769]:
datasets_ipc = Dataset.read_from_hdx("sudan-acute-food-insecurity-country-data")
resources = datasets_ipc.get_resources()

ipc_sdn_adm1 = [res for res in resources if res.get('description') == 'All IPC level 1 data in long form with HXL tags']
ipc_sdn_adm2 = [res for res in resources if res.get('description') == 'All IPC area data in long form with HXL tags']


url1 = ipc_sdn_adm1[0]['alt_url']
url2 = ipc_sdn_adm2[0]['alt_url']

# Download the file
response = requests.get(url1)
with open('sudan/ipc_sdn_adm1.csv', 'wb') as f:
	f.write(response.content)
	
response = requests.get(url2)
with open('sudan/ipc_sdn_adm2.csv', 'wb') as f:
	f.write(response.content)



In [770]:
ipc_sdn_adm2 = pd.read_csv('sudan/ipc_sdn_adm2.csv')
ipc_sdn_adm2 = ipc_sdn_adm2.iloc[1:]
ipc_sdn_adm1 = pd.read_csv('sudan/ipc_sdn_adm1.csv')
ipc_sdn_adm1 = ipc_sdn_adm1.iloc[1:]

In [771]:
ipc_sdn_adm2 = ipc_sdn_adm2[~ipc_sdn_adm2['Area'].str.contains('IDPs')]
ipc_sdn_adm2 = ipc_sdn_adm2[~ipc_sdn_adm2['Area'].str.contains('IDP')]
ipc_sdn_adm2 = ipc_sdn_adm2[~ipc_sdn_adm2['Area'].str.contains('Refugees')]
ipc_sdn_adm2 = ipc_sdn_adm2[~ipc_sdn_adm2['Area'].str.contains('refugees')]

ipc_sdn_adm1 = ipc_sdn_adm1[~ipc_sdn_adm1['Level 1'].str.contains('IDPs')]
ipc_sdn_adm1 = ipc_sdn_adm1[~ipc_sdn_adm1['Level 1'].str.contains('IDP')]
ipc_sdn_adm1 = ipc_sdn_adm1[~ipc_sdn_adm1['Level 1'].str.contains('Refugees')]
ipc_sdn_adm1 = ipc_sdn_adm1[~ipc_sdn_adm1['Level 1'].str.contains('refugees')]

### ACLED

In [772]:
acled = pd.read_csv('sudan/ACLED_2020-01-01-2024-12-31-Sudan.csv')

In [773]:
acled = acled[acled['year'] != 2020]
acled['event_date'] = pd.to_datetime(acled['event_date'])
acled['month_year'] = acled['event_date'].dt.to_period('M')
acled['week'] = acled['event_date'].dt.to_period('W').apply(lambda r: r.start_time)

In [774]:
#Monthly Data
acled_monthly= acled.groupby(['month_year','event_type']).agg({'fatalities':'sum', 'event_date':'count'}).reset_index()

#remove riots
acled_monthly = acled_monthly[acled_monthly['event_type'] != 'Riots']
acled_monthly = acled_monthly[acled_monthly['event_type'] != 'Strategic developments']
acled_monthly['event_type'] = acled_monthly['event_type'].replace('Explosions/Remote violence', 'Explosions')

acled_monthly.to_csv('sudan/acled_monthly.csv', index=False)
acled_monthly['month_year'] = acled_monthly['month_year'].astype(str)

#Weekly Data
#acled_weekly = acled.groupby(['week', 'event_type']).agg({'fatalities':'sum', 'event_date':'count'}).reset_index()
#acled_weekly['week'] = acled_weekly['week'].astype(str)

In [775]:
# Line plot of total fatalities by month
line = alt.Chart(acled_monthly).mark_line(color='red').encode(
    x='month_year:O',
    y='fatalities:Q',
    color='event_type:O',
    tooltip=['month_year', 'fatalities', 'event_type']
).properties(
    title='Total Fatalities and Incidents by Month'
).interactive()

# Bar chart of total incidents by month
bar = alt.Chart(acled_monthly).mark_bar(color='blue').encode(
    x='month_year:O',
    y='event_date:Q',
    color='event_type:O',
    tooltip=['month_year', 'fatalities', 'event_type']
).interactive()

# Combine both charts into one with a single y-axis
combined_chart = alt.layer(bar, line)

combined_chart


### COMBINED STUFF

### IPC + ACLED

In [776]:
acled_admin1 = acled[(acled['month_year'] >= '2023-04') & (acled['month_year'] <= '2024-05')]
acled_admin1 = acled_admin1.groupby('admin1').agg({'fatalities': 'sum', 'event_id_cnty': 'count'}).reset_index()
acled_admin1.reset_index(drop=True, inplace=True)

acled_admin2 = acled[(acled['month_year'] >= '2023-04')& (acled['month_year'] <= '2024-05')]
acled_admin2= acled_admin2.groupby('admin2').agg({'fatalities': 'sum', 'event_id_cnty': 'count'}).reset_index()
acled_admin2.reset_index(drop=True, inplace=True)

In [777]:
# Old Filtering
#ipc_sdn_adm2 = ipc_sdn_adm2[(ipc_sdn_adm2['Validity period'] == 'current') & (ipc_sdn_adm2['Date of analysis'].str.contains('2024')) | 
 #                           (ipc_sdn_adm2['Validity period'] == 'current') & (ipc_sdn_adm2['Date of analysis'].str.contains('2023'))]
#ipc_sdn_adm2 = ipc_sdn_adm2[ipc_sdn_adm2['Phase'] == '3+']


#ipc_sdn_adm1 = ipc_sdn_adm1[(ipc_sdn_adm1['Validity period'] == 'current') & (ipc_sdn_adm1['Date of analysis'].str.contains('2024')) | 
 #                           (ipc_sdn_adm1['Validity period'] == 'current') & (ipc_sdn_adm1['Date of analysis'].str.contains('2023'))]
#ipc_sdn_adm1 = ipc_sdn_adm1[ipc_sdn_adm1['Phase'] == '3+']

In [778]:
ipc_sdn_adm2_2024 = ipc_sdn_adm2[(ipc_sdn_adm2['Validity period'] == 'current') & (ipc_sdn_adm2['Date of analysis'].str.contains('Apr 2024'))]
ipc_sdn_adm2_2024 = ipc_sdn_adm2_2024[ipc_sdn_adm2_2024['Phase']== '3+']

#splitting areas
ipc_sdn_adm2_2024 = ipc_sdn_adm2_2024.assign(Area=ipc_sdn_adm2_2024['Area'].str.split(r'[,&]')).explode('Area').reset_index(drop=True)
ipc_sdn_adm2_2024['Area'] = ipc_sdn_adm2_2024['Area'].str.strip()


ipc_sdn_adm1_2024 = ipc_sdn_adm1[(ipc_sdn_adm1['Validity period'] == 'current') & (ipc_sdn_adm1['Date of analysis'].str.contains('Apr 2024'))]
ipc_sdn_adm1_2024 = ipc_sdn_adm1_2024[ipc_sdn_adm1_2024['Phase']== '3+']

In [779]:
# Compare admin2 values in acled_admin2 and admin2Name values in idp_admin2_data
acled_admin_values = set(acled_admin2['admin2'])
ipc_admin_values = set(ipc_sdn_adm2_2024['Area'])

# Find common and unique values
common_admin = acled_admin_values.intersection(ipc_admin_values)
unique_to_acled = list(acled_admin_values.difference(ipc_admin_values))
unique_to_ipc = list(ipc_admin_values.difference(acled_admin_values))

print("Common admin2 values:", common_admin)
print("Unique to ACLED admin2 values:", unique_to_acled)
print("Unique to IPC admin2 values:", unique_to_ipc)

Common admin2 values: {'Nitega', 'Khartoum', 'Abu Zabad', 'Wad Bandah', 'Basundah', 'Beliel', 'Um Rawaba', 'Dilling', 'Soudari', 'Wadi Salih', 'Haya', 'Al Qurashi', 'An Nuhud', 'Bendasi', 'Azum', 'Um Dam Haj Ahmed', 'Halfa', 'Al Khiwai', 'Um Bada', 'Bahri', 'Sheikan', 'Karrari', 'Al Idia', 'Kas', 'Delgo', 'Shiaria', 'Kelemando', 'Al Wihda', 'Yassin', 'Bara', 'Rabak', 'Abu Jubayhah', 'Barbar', 'Beida', 'At Tina', 'Nyala Janoub', 'Kereneik', 'Kadugli', 'Tawila', 'Geisan', 'Kebkabiya', 'Nyala Shimal', 'Melit', 'Atbara', 'Talawdi', 'Gereida', 'Babanusa', 'Ghadeer', 'Keilak', 'Abassiya', 'Shendi', 'Mukjar', 'Sawakin', 'Al Dibab', 'Um Dukhun', 'Heiban', 'Tendalti', 'Kubum', 'Um Rimta', 'Sirba', 'Zalingi', 'Kateila', 'Kutum', 'Saraf Omra', 'Dongola', 'Um Dafoug', 'Sinja', 'Al Meiram', 'Delami', 'Adila', 'Mershing', 'Sennar', 'Ghubaish', 'Kulbus', 'Buram', 'Kosti'}
Unique to ACLED admin2 values: ['Abu Hamad', 'Sharg Aj Jabal', 'El Roseires', 'Al Fao', 'Dinder', 'Al Lait', 'Um Kadadah', 'Al Kur

In [780]:
# Mapping based on similarity
def map_admin2(list1, list2, threshold=80):
    mapping = {}
    for item in list1:
        match, score = process.extractOne(item, list2, scorer=fuzz.ratio)
        if score >= threshold:
            mapping[item] = match
        else:
            mapping[item] = None
    return mapping

mapping_result = map_admin2(unique_to_ipc, unique_to_acled )

# Display mapping results
#for idp, acled in mapping_result.items():
    #print(f"{idp} -> {acled if acled else 'No match found'}")

#apply mapping to acled_admin2
acled_admin2['admin2'] = acled_admin2['admin2'].replace(mapping_result)

In [781]:
#Adm 1
acled_admin1['admin1'] = acled_admin1['admin1'].replace('Al Jazirah', 'Aj Jazirah')
acled_ipc_adm1 = pd.merge(acled_admin1, ipc_sdn_adm1_2024, left_on=['admin1'], right_on=['Level 1'], how='inner')

In [782]:
#Adm 2
acled_ipc_adm2 = pd.merge(acled_admin2, ipc_sdn_adm2_2024, left_on=['admin2'], right_on=['Area'], how='inner')

In [783]:
#correlation between fatalities and Percentage
print(acled_ipc_adm1['event_id_cnty'].corr(acled_ipc_adm1['Number']))
print(acled_ipc_adm1['fatalities'].corr(acled_ipc_adm1['Number']))
print(acled_ipc_adm2['event_id_cnty'].corr(acled_ipc_adm2['Number']))
print(acled_ipc_adm2['fatalities'].corr(acled_ipc_adm2['Number']))

0.6452683691222223
0.48353427830695067
0.6106016302268746
0.6721237302616113


In [784]:
# Create the chart with the selection and regression line
chart = alt.Chart(acled_ipc_adm1).mark_circle().encode(
    x=alt.X('fatalities:Q', scale=alt.Scale(type='log')),
    y=alt.Y('Number:Q', scale=alt.Scale(type='log')),
    color='admin1:N',
    size=alt.Size('fatalities:Q', scale=alt.Scale(range=[10, 100])),
    tooltip=['admin1', 'fatalities', 'Number']
).properties(
    title='Fatalities vs IPC by Admin1 in Sudan'
).interactive()

chart

In [785]:
#save acled_ipc_adm2 to csv
acled_ipc_adm2.to_csv('sudan/acled_ipc_adm2.csv', index=False)

### IPC + IDP

In [787]:
idp_admin2_2024_may = idp_admin_data[(idp_admin_data['yearReportingDate'] == 2024) & (idp_admin_data['monthReportingDate'] == 5) & (idp_admin_data['operation'] == 'Armed Clashes in Sudan (Overview)') ]
idp_admin2_2024_may = idp_admin2_2024_may.dropna(subset=['admin2Name'])

idp_admin1_2024_may = idp_admin_data[(idp_admin_data['yearReportingDate'] == 2024) & (idp_admin_data['monthReportingDate'] == 5) & (idp_admin_data['operation'] == 'Armed Clashes in Sudan (Overview)') ]
idp_admin1_2024_may = idp_admin1_2024_may.groupby('admin1Name').agg({'numPresentIdpInd':'sum'}).reset_index()

In [788]:
ipc_sdn_adm2_2024 = ipc_sdn_adm2[(ipc_sdn_adm2['Validity period'] == 'current') & (ipc_sdn_adm2['Date of analysis'].str.contains('Apr 2024'))]
ipc_sdn_adm2_2024 = ipc_sdn_adm2_2024[ipc_sdn_adm2_2024['Phase']== '3+']
ipc_sdn_adm2_2024 = ipc_sdn_adm2_2024.assign(Area=ipc_sdn_adm2_2024['Area'].str.split(r'[,&]')).explode('Area').reset_index(drop=True)
ipc_sdn_adm2_2024['Area'] = ipc_sdn_adm2_2024['Area'].str.strip()

ipc_sdn_adm1_2024 = ipc_sdn_adm1[(ipc_sdn_adm1['Validity period'] == 'current') & (ipc_sdn_adm1['Date of analysis'].str.contains('Apr 2024'))]
ipc_sdn_adm1_2024 = ipc_sdn_adm1_2024[ipc_sdn_adm1_2024['Phase']== '3+']

In [789]:
# Compare admin2 values in idp and admin2Name values in idp_admin2_data
idp_admin_values = set(idp_admin2_2024_may['admin2Name'])
ipc_admin_values = set(ipc_sdn_adm2_2024['Area'])

# Find common and unique values
common_admin = idp_admin_values.intersection(ipc_admin_values)
unique_to_idp = list(idp_admin_values.difference(ipc_admin_values))
unique_to_ipc = list(ipc_admin_values.difference(idp_admin_values))

print("Common admin2 values:", common_admin)
print("Unique to IDP admin2 values:", unique_to_idp)
print("Unique to IPC admin2 values:", unique_to_ipc)

Common admin2 values: {'Damso', 'Halfa', 'Tawkar', 'Kelemando', 'Al Wihda', 'Yassin', 'Kernoi', 'Barbar', 'Beida', 'At Tina', 'Nyala Janoub', 'Abassiya', 'Ar Rashad', 'Al Leri', 'Merwoe', 'Zalingi', 'Saraf Omra', 'Baw', 'Sennar', 'Kulbus', 'Nitega', 'Khartoum', 'Um Rawaba', 'Beliel', 'Dilling', 'As Serief', 'Azum', 'Gharb Bara', 'Um Dam Haj Ahmed', 'Al Khiwai', 'Bahri', 'Kas', 'Bara', 'Tendalti', 'Kubum', 'Um Rimta', 'Sirba', 'Delami', 'Buram', 'Abu Zabad', 'Soudari', 'Wadi Salih', 'Um Bada', 'Karrari', "Hala'ib", 'Kereneik', 'Tawila', 'Kadugli', 'Geisan', 'Melit', 'Ghadeer', 'Babanusa', 'Sawakin', 'Sinkat', 'Dongola', 'Sinja', 'Guli', 'Adila', 'Ghubaish', 'Wad Bandah', 'Kosti', 'Basundah', 'Haya', 'Al Qurashi', 'An Nuhud', 'Bendasi', 'Tulus', 'Sheikan', 'Al Idia', 'Delgo', 'Rabak', 'Abu Jubayhah', 'Dordieb', 'Kebkabiya', 'Nyala Shimal', 'Talawdi', 'Atbara', 'Gereida', 'Keilak', 'Shendi', 'Mukjar', 'Al Dibab', 'Um Dukhun', 'Assalaya', 'Kateila', 'Kutum', 'Um Dafoug', 'Al Meiram', 'Mers

In [790]:
# Mapping based on similarity
def map_admin2(list1, list2, threshold=80):
    mapping = {}
    for item in list1:
        match, score = process.extractOne(item, list2, scorer=fuzz.ratio)
        if score >= threshold:
            mapping[item] = match
        else:
            mapping[item] = None
    return mapping

mapping_result = map_admin2(unique_to_ipc, unique_to_idp)

#apply mapping 
#idp_admin2_2024_may['admin2Name'] = idp_admin2_2024_may['admin2Name'].replace(mapping_result)

In [791]:
#Adm 2
idp_ipc_adm2 = pd.merge(idp_admin2_2024_may, ipc_sdn_adm2_2024, left_on=['admin2Name'], right_on=['Area'], how='inner')
idp_ipc_adm1 = pd.merge(idp_admin1_2024_may, ipc_sdn_adm1_2024, left_on=['admin1Name'], right_on=['Level 1'], how='inner')

In [792]:
#correlation between IDP and IPC 3+
print(idp_ipc_adm1['numPresentIdpInd'].corr(idp_ipc_adm1['Number']))

0.17711988087371816


In [793]:
chart = alt.Chart(idp_ipc_adm1).mark_circle().encode(
    x=alt.X('numPresentIdpInd:Q', scale=alt.Scale(type='log')),
    y=alt.Y('Number:Q', scale=alt.Scale(type='log')),
    color='admin1Name:N',
    size=alt.Size('numPresentIdpInd:Q', scale=alt.Scale(range=[10, 100])),
    tooltip=['admin1Name', 'numPresentIdpInd', 'Number']
).properties(
    title='IDP vs IPC by Admin1 in Sudan'
).interactive()

chart

In [808]:
acled_idp_ipc_adm2 = pd.merge(acled_ipc_adm2, idp_ipc_adm2, left_on=['Area'], right_on=['Area'], how='outer')

acled_idp_ipc_adm2 = acled_idp_ipc_adm2[['Area', 'Level 1_x','fatalities', 'event_id_cnty', 'numPresentIdpInd', 'Number_x']]
acled_idp_ipc_adm2.rename(columns={'Number_x':'phase3plus'}, inplace=True)
acled_idp_ipc_adm2.rename(columns={'Level 1_x':'Level 1'}, inplace=True)

#save as csv
acled_idp_ipc_adm2.to_csv('sudan/acled_idp_ipc_adm2.csv', index=False)

In [795]:
acled_idp_ipc_adm1 = pd.merge(acled_ipc_adm1, idp_ipc_adm1, left_on=['Level 1'], right_on=['Level 1'], how='outer')
acled_idp_ipc_adm1 = pd.merge(acled_idp_ipc_adm1, idp_origin_2024, left_on=['Level 1'], right_on=['state_origin'], how='outer')

acled_idp_ipc_adm1 = acled_idp_ipc_adm1[['Level 1', 'fatalities', 'event_id_cnty', 'numPresentIdpInd', 'Number_x', 'affected_idps_state']]
acled_idp_ipc_adm1.rename(columns={'Number_x':'phase3plus'}, inplace=True)

acled_idp_ipc_adm1.to_csv('sudan/acled_idp_ipc_adm1.csv', index=False)

In [796]:
chart = alt.Chart(acled_idp_ipc_adm1).mark_circle().encode(
    x=alt.X('affected_idps_state:Q', scale=alt.Scale(type='log')),
    y=alt.Y('phase3plus:Q', scale=alt.Scale(type='log')),
    size=alt.Size('fatalities:Q'),
    tooltip=[ 'Level 1','affected_idps_state', 'phase3plus']
).properties(
    title='IDP vs IPC by Admin1 in Sudan'
).interactive()

chart

In [812]:
acled_idp_ipc_adm2

Unnamed: 0,Area,Level 1,fatalities,event_id_cnty,numPresentIdpInd,phase3plus
0,Abassiya,South Kordofan,108.0,13.0,56528.0,54287
1,Abu Jubayhah,South Kordofan,0.0,2.0,56258.0,91277
2,Abu Zabad,West Kordofan,12.0,10.0,17517.0,151440
3,Adila,East Darfur,0.0,9.0,53737.0,85845
4,Al Dibab,West Kordofan,0.0,2.0,21146.0,146748
...,...,...,...,...,...,...
87,Al Leri,,,,6641.0,
88,Ar Rashad,,,,20931.0,
89,Guli,,,,26634.0,
90,Baw,,,,50471.0,


In [817]:
# Create the chart with the selection and regression line
chart = alt.Chart(acled_idp_ipc_adm2).mark_circle().encode(
    x=alt.X('phase3plus:Q', scale=alt.Scale(type='log'), title='People in IPC Phase 3+ (Log)'),
    y=alt.Y('numPresentIdpInd:Q', scale=alt.Scale(type='log'), title=None),
    color=alt.Color('Level 1:N', legend=alt.Legend(title="Admin 1 Area")),
    size=alt.Size('fatalities:Q'),
    tooltip=['Area', 'fatalities', 'phase3plus']
).properties(
    title='Food Insecurity, IDPs and Conflict in Sudan, Adm 2'
).interactive()#.to_json()


chart

In [813]:
# Create the chart with the selection and regression line
chart = alt.Chart(acled_idp_ipc_adm2).mark_circle().encode(
    x=alt.X('phase3plus:Q', scale=alt.Scale(type='log'), title='Incidents (Log)'),
    y=alt.Y(':Q', scale=alt.Scale(type='log'), title='People in IPC Phase 3+ (Log)'),
    color=alt.Color('Level 1:N', legend=alt.Legend(title="Admin 1 Area")),
    size=alt.Size('fatalities:Q'),
    tooltip=['Area', 'fatalities', 'phase3plus']
).properties(
    title='Armed Violence and acute food insecurity'
).interactive()#.to_json()

#with open('CC9_fig1.json', 'w') as f:
   # f.write(chart)

chart