In [118]:
import warnings
warnings.filterwarnings("ignore")

import os
import itertools
import numpy as np
import networkx as nx
import pandas as pd
import geopandas as gpd

from collections import Counter
from datetime import datetime
from pyproj import Proj, transform

from rasterio.transform import xy
from shapely.geometry import Point, Polygon, LineString, MultiLineString

import tqdm.notebook as tqdm

import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
sns.set()
%config InlineBackend.figure_format = 'retina'

In [1]:
data_path = '/home/gleb/Desktop/thesis/data/'

countries_path = data_path + 'world-administrative-boundaries/'
basins_6_path = data_path + 'HydroBASINS Africa 6 level/'
basins_12_path = data_path + 'HydroBASINS Africa 12 level/'
conflict_path = data_path + 'conflicts/'
dams_path = data_path + 'GRanD_Version_1_3/'
sheds_file_3 = data_path + 'hyd_af_dem_3s/af_dem_3s.tif'
sheds_file_15 = data_path + 'hyd_af_dem_15s/hyd_af_dem_15s.tif'
rivers_file = data_path + 'HydroRIVERS_v10_af/HydroRIVERS_v10_af.gdb'

output_path = '/home/gleb/Desktop/thesis/outcomes/'

In [172]:
bas_6 = gpd.read_file(output_path + '5rivers_fully_prepared_data.shp')

df = gpd.read_file(conflict_path + '1997-01-01-2023-09-30.csv', sep=';', dtype={'timestamp': 'object'})
gdf_conf = gpd.GeoDataFrame(df, geometry = gpd.points_from_xy(df.longitude, df.latitude))
gdf_conf.set_crs('epsg:4326', inplace=True)
# gdf_conf.to_crs('epsg:4326', inplace=True)

del df
regions = ['Eastern Africa', 'Middle Africa', 'Northern Africa', 'Southern Africa', 'Western Africa']
gdf_conf = gdf_conf[gdf_conf['region'].apply(lambda x: True if x in regions else False)]
gdf_conf['year'] = gdf_conf['year'].apply(lambda x: int(x))
gdf_conf = gdf_conf[gdf_conf['year'] >= 1999]
gdf_conf.reset_index(drop=True, inplace=True)

In [173]:
# All conflicts:
all_conflicts_gdf = gdf_conf.copy()

# Riots:
riots_gdf = gdf_conf[gdf_conf.event_type == 'Riots'].copy()
riots_gdf.reset_index(drop=True, inplace=True)

# Filter by battles:
battles_gdf = gdf_conf[gdf_conf.event_type == 'Battles'].copy()
battles_gdf.reset_index(drop=True, inplace=True)

del gdf_conf

In [174]:
battles_gdf.columns

Index(['event_id_cnty', 'event_date', 'year', 'time_precision',
       'disorder_type', 'event_type', 'sub_event_type', 'actor1',
       'assoc_actor_1', 'inter1', 'actor2', 'assoc_actor_2', 'inter2',
       'interaction', 'civilian_targeting', 'iso', 'region', 'country',
       'admin1', 'admin2', 'admin3', 'location', 'latitude', 'longitude',
       'geo_precision', 'source', 'source_scale', 'notes', 'fatalities',
       'tags', 'timestamp', 'geometry'],
      dtype='object')

In [175]:
interaction_of_interest = set(['33', '34', '35', '38', '44', '45', '48'])
battles_gdf['interest'] = battles_gdf['interaction'].apply(lambda x: 1 if x in interaction_of_interest else 0)
subset = battles_gdf[battles_gdf['interaction'].apply(lambda x: 1 if x in interaction_of_interest else 0) == 1].copy()
subset.reset_index(inplace=True, drop=True)

print('Battles with associated actors:', len(subset) - subset['assoc_actor_1'].isnull().sum())
print('Battles with multiple associated actors:', subset['assoc_actor_1'].apply(lambda x: 1 if ';'
                                                                                    in str(x) else 0).sum())

Battles with associated actors: 14144
Battles with multiple associated actors: 414


In [176]:
max_assoc_actors1 = pd.DataFrame(subset['assoc_actor_1'].apply(lambda x: str(x).count(';'))).max()[0]
max_assoc_actors2 = pd.DataFrame(subset['assoc_actor_2'].apply(lambda x: str(x).count(';'))).max()[0]
print('Max actors in column Assoc Actor 1 = ' + str(max_assoc_actors1) + 
      '. In column Assoc Actor 2 = ' + str(max_assoc_actors2))

Max actors in column Assoc Actor 1 = 6. In column Assoc Actor 2 = 5


In [177]:
subset[['assoc_actor_1_1', 'assoc_actor_1_2',
        'assoc_actor_1_3', 'assoc_actor_1_4',
        'assoc_actor_1_5', 'assoc_actor_1_6',
        'assoc_actor_1_7']] = subset['assoc_actor_1'].str.split(';', expand=True)
subset['assoc_actor_1_1'] = subset['assoc_actor_1_1'].replace('', None)

subset[['assoc_actor_2_1','assoc_actor_2_2',
        'assoc_actor_2_3', 'assoc_actor_2_4',
        'assoc_actor_2_5', 'assoc_actor_2_6']] = subset['assoc_actor_2'].str.split(';', expand=True)
subset['assoc_actor_2_1'] = subset['assoc_actor_2_1'].replace('', None)

In [209]:
a = subset[['event_id_cnty', 'year', 'country', 'geometry', 'notes', 'admin1', 'admin2', 'admin3', 'location', 'latitude', 'longitude',
       'geo_precision', 'event_date', 'time_precision', 'civilian_targeting', 'iso', 'region',
            'actor1',
            'assoc_actor_1_1', 'assoc_actor_1_2','assoc_actor_1_3','assoc_actor_1_4',
            'assoc_actor_1_5', 'assoc_actor_1_6','assoc_actor_1_7',
            'actor2',
            'assoc_actor_2_1','assoc_actor_2_2','assoc_actor_2_3','assoc_actor_2_4',
            'assoc_actor_2_5', 'assoc_actor_2_6']].copy()

# Start separating by hydrobasins and years from here:

In [210]:
bas_6.head(2)

Unnamed: 0,HYBAS_ID,NEXT_DOWN,NEXT_SINK,MAIN_BAS,DIST_SINK,DIST_MAIN,SUB_AREA,UP_AREA,PFAF_ID,ENDO,...,z_fobki,z_hyeah,z_oxjpe,z_vcjei,z_nlvsk,z_ahjvn,z_zgjij,lon,lat,geometry
0,1060000010,0,1060000010,1060000010,0.0,0.0,1890.8,1890.8,111011,0,...,3419600,2045.549124,15126.4,4.051191,3.30065,0.648159,0.0,32.246994,29.876606,"POLYGON ((32.36667 29.67917, 32.36667 29.67500..."
1,1060000010,0,1060000010,1060000010,0.0,0.0,1890.8,1890.8,111011,0,...,3419600,2045.549124,15126.4,4.051191,3.30065,0.648159,0.0,32.246994,29.876606,"POLYGON ((32.36667 29.67917, 32.36667 29.67500..."


In [255]:
joined_df = gpd.sjoin(a, bas_6.drop_duplicates(subset=['HYBAS_ID'], keep='first')[['HYBAS_ID', 'geometry']], how="left")
joined_df.drop(['index_right'], axis=1, inplace=True)
joined_df = joined_df[~joined_df['HYBAS_ID'].isna()]
joined_df.reset_index(drop=True, inplace=True)

Unnamed: 0,event_id_cnty,year,country,geometry,notes,admin1,admin2,admin3,location,latitude,...,assoc_actor_1_6,assoc_actor_1_7,actor2,assoc_actor_2_1,assoc_actor_2_2,assoc_actor_2_3,assoc_actor_2_4,assoc_actor_2_5,assoc_actor_2_6,HYBAS_ID
0,DRC30734,2023,Democratic Republic of Congo,POINT (29.24960 -1.52190),"On 28 September 2023, the APCLS elements clash...",Nord-Kivu,Nyiragongo,Nyiragongo,Nyiragongo,-1.5219,...,,,UPC: Union of Congolese Patriots,,,,,,,1061196250
1,DRC30735,2023,Democratic Republic of Congo,POINT (29.29790 -1.55100),"On 28 September 2023, the APCLS elements clash...",Nord-Kivu,Nyiragongo,Bukumu,Kanyamahoro,-1.551,...,,,UPC: Union of Congolese Patriots,,,,,,,1061196250
2,DRC30736,2023,Democratic Republic of Congo,POINT (25.18750 0.48580),"On 28 September 2023, Mbole militias clashed w...",Tshopo,Kisangani,,Lubunga,0.4858,...,,,Lengola Communal Militia (Democratic Republic ...,Lengola Communal Group (Democratic Republic of...,,,,,,1061131410
3,SUD22721,2023,Sudan,POINT (32.56320 15.50350),"On 28 September 2023, an unspecified armed gro...",Khartoum,Khartoum,,Khartoum - Al Azhari,15.5035,...,,,Unidentified Armed Group (Sudan),,,,,,,1060601190
4,SOM42454,2023,Somalia,POINT (47.43080 6.76970),"On 28 September 2023, pastoralists from Habar ...",Mudug,Gaalkacyo,,Gaalkacyo,6.7697,...,,,Leelkayse Clan Militia (Somalia),Pastoralists (Somalia),,,,,,1060965780
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13877,SUD529,1999,Sudan,POINT (22.44540 13.44130),Mesalit tribesmen and a group of Arab nomads b...,West Darfur,El Geneina,,El Geneina,13.4413,...,,,Rizeigat Ethnic Militia (Sudan),,,,,,,1060736640
13878,SOM57,1999,Somalia,POINT (50.20000 9.33330),HornAfrik. Yusef launches attacks against Jama.,Bari,Bandarbeyla,,Dhuudo,9.3333,...,,,Unidentified Armed Group (Somalia),,,,,,,1060007000
13879,SIE2874,1999,Sierra Leone,POINT (-12.50500 7.52640),"Around 15 January 1999 (month of), Kamajor Mil...",Southern,Bonthe,Yawbeko,Bonthe,7.5264,...,,,Kamajor Militia (CDF Unit),,,,,,,1060025060
13880,SUD521,1999,Sudan,POINT (31.58240 4.85100),Marchars SSDF Faction battled with another (un...,Equatoria,Central Equatoria,,Juba,4.851,...,,,SSDF: South Sudan Defence Forces,,,,,,,1060965920


In [114]:
all_actors = set(a.actor1)|set(a.assoc_actor_1_1)|set(a.assoc_actor_1_2)|set(
    a.assoc_actor_1_3)|set(a.assoc_actor_1_4)|set(a.assoc_actor_1_5)|set(a.assoc_actor_1_6)|set(
        a.assoc_actor_1_7)|set(a.actor2)|set(
    a.assoc_actor_2_1)|set(a.assoc_actor_2_2)|set(a.assoc_actor_2_3)|set(a.assoc_actor_2_4)|set(
        a.assoc_actor_2_5)|set(a.assoc_actor_2_6)

all_actors = sorted(x for x in all_actors if x is not None)
print(len(all_actors))
all_actors = sorted({s.lstrip() for s in sorted_set})
print(len(all_actors))

4589
4427


In [115]:
# Initiate a graph
E = nx.Graph()
A = nx.Graph()

# Fill it with nodes
all_actors = set(a.actor1)|set(a.assoc_actor_1_1)|set(a.assoc_actor_1_2)|set(
    a.assoc_actor_1_3)|set(a.assoc_actor_1_4)|set(a.actor2)|set(
    a.assoc_actor_2_1)|set(a.assoc_actor_2_2)|set(a.assoc_actor_2_3)|set(a.assoc_actor_2_4)
all_actors = {x for x in all_actors if not isinstance(x, float) or not math.isnan(x)}
all_actors.discard(None)
E.add_nodes_from(all_actors)
A.add_nodes_from(all_actors)

In [119]:
cols_allies_1 = ['actor1','assoc_actor_1_1', 'assoc_actor_1_2','assoc_actor_1_3','assoc_actor_1_4',
                 'assoc_actor_1_5','assoc_actor_1_6','assoc_actor_1_7']
cols_allies_2 = ['actor2','assoc_actor_2_1','assoc_actor_2_2','assoc_actor_2_3','assoc_actor_2_4',
                 'assoc_actor_2_5','assoc_actor_2_6']

enemies = list(itertools.product(cols_allies_1, cols_allies_2))
allies_1 = list(itertools.combinations(cols_allies_1, 2))
allies_2 = list(itertools.combinations(cols_allies_2, 2))
allies_lists = [allies_1, allies_2]

In [127]:
# Fill the battles graph edges:
for idx, row in a.iterrows():
    for pair in enemies:
        if not pd.isna(row[pair[0]]) and not pd.isna(row[pair[1]]):
            if E.has_edge(row[pair[0]], row[pair[1]]):
                edge = E.get_edge_data(row[pair[0]], row[pair[1]])
                edge = edge.get('battles', 0)
                updated_value = edge + 1
                E[row[pair[0]]][row[pair[1]]]['battles'] = updated_value
            else:
                E.add_edge(row[pair[0]], row[pair[1]], battles=1)

# Fill the alliances graph edges:
for idx, row in a.iterrows():
    for lst in allies_lists:
        for pair in lst:
            if not pd.isna(row[pair[0]]) and not pd.isna(row[pair[1]]):
                if A.has_edge(row[pair[0]], row[pair[1]]):
                    edge = A.get_edge_data(row[pair[0]], row[pair[1]])
                    edge = edge.get('fights_together', 0)
                    updated_value = edge + 1
                    A[row[pair[0]]][row[pair[1]]]['fights_together'] = updated_value
                else:
                    A.add_edge(row[pair[0]], row[pair[1]], fights_together=1)

In [130]:
# # Choose only multiple battles and allies
df_enemies = nx.to_pandas_edgelist(E)
df_enemies = df_enemies[df_enemies['battles'] >= 2].copy()
df_enemies.reset_index(inplace=True, drop=True)
E2 = nx.from_pandas_edgelist(df_enemies,source='source',target='target', edge_attr=['battles'])

df_allies = nx.to_pandas_edgelist(A)
df_allies = df_allies[df_allies['fights_together'] >= 2].copy()
df_allies.reset_index(inplace=True, drop=True)
A2 = nx.from_pandas_edgelist(df_allies,source='source',target='target', edge_attr=['fights_together'])

In [133]:
# pos = nx.spring_layout(E2)  # Define layout (you can try different layouts)
# edge_labels = {(u, v): d['battles'] for u, v, d in E2.edges(data=True)}

# plt.figure(figsize=(45,30))
# nx.draw(E2, pos, with_labels=True, node_size=500, node_color='lightblue', font_size=10,
#         font_color='black', font_weight='bold')
# nx.draw_networkx_edge_labels(E2, pos, edge_labels=edge_labels, font_size=8, font_color='red')
# plt.title('Battles on African Continent')
# plt.show()

In [134]:
# pos = nx.spring_layout(A2)  # Define layout (you can try different layouts)
# edge_labels = {(u, v): d['fights_together'] for u, v, d in A2.edges(data=True)}

# plt.figure(figsize=(45,30))
# nx.draw(A2, pos, with_labels=True, node_size=500, node_color='lightblue', font_size=10,
#         font_color='black', font_weight='bold')
# nx.draw_networkx_edge_labels(A2, pos, edge_labels=edge_labels, font_size=8, font_color='red')
# plt.title('Fights Together on African Continent')
# plt.show()