In [1]:
import pandas as pd
import numpy as np
import matplotlib

In [2]:
#Importing Data
data = pd.read_csv("data/ACLED2021-2024.csv")

38130 rows × 32 columns - Original Dataset Dimensions

In [3]:
#Reformatting / Cleaning

data.drop(columns=['time_precision', 'assoc_actor_1', 'assoc_actor_2', 'iso', 'region', 'admin3', 'location', 
                  'latitude', 'longitude', 'geo_precision', 'source_scale', 'timestamp', 'tags', 'population_best', 'event_id_cnty'], 
          inplace=True)

data['event_date'] = pd.to_datetime(data['event_date'], errors = 'coerce') #changing to datetime
data = data.dropna(subset=['event_date'])

#Addressing NA values - Categorical, NUmerical and date
categorical_columns = ['disorder_type', 'event_type', 'sub_event_type', 'actor1', 'actor2', 'civilian_targeting', 
                       'country', 'admin1', 'admin2', 'source', 'notes']

for column in categorical_columns:
    data[column] = data[column].fillna('Not specified')
    
numerical_columns = ['fatalities', 'inter1', 'inter2', 'interaction']

for column in numerical_columns:
    data[column] = data[column].fillna(data[column].median()) #using median to fill
    
print(data.isna().sum())
#data.loc[:, 'actor2'] = data['actor2'].fillna('Not specified')
#data.loc[:, 'civilian_targeting'] = data['civilian_targeting'].fillna('Not specified')

event_date            0
year                  0
disorder_type         0
event_type            0
sub_event_type        0
actor1                0
inter1                0
actor2                0
inter2                0
interaction           0
civilian_targeting    0
country               0
admin1                0
admin2                0
source                0
notes                 0
fatalities            0
dtype: int64


0 Missing Values within Dataset :)

In [4]:
# Dropping Dupes
initial_row_count = data.shape[0]
data = data.drop_duplicates()
final_row_count = data.shape[0]
print(f"Removed {initial_row_count - final_row_count} duplicates")

Removed 40 duplicates


38090 rows × 17 columns - After cleaning

In [5]:
data

Unnamed: 0,event_date,year,disorder_type,event_type,sub_event_type,actor1,inter1,actor2,inter2,interaction,civilian_targeting,country,admin1,admin2,source,notes,fatalities
0,2024-04-19,2024,Political violence,Explosions/Remote violence,Shelling/artillery/missile attack,Hamas Movement,3,Not specified,0,30,Not specified,Israel,HaDarom,Ashqelon,N12; Times of Israel,"Interception: On 19 April 2024, Hamas militant...",0
1,2024-04-19,2024,Demonstrations,Protests,Peaceful protest,Protesters (Israel),6,Not specified,0,60,Not specified,Israel,Haifa,Hadera,Ynet,"On 19 April 2024, thousands formed a human cha...",0
2,2024-04-19,2024,Demonstrations,Riots,Violent demonstration,Rioters (Israel),5,Rioters (Israel),5,55,Not specified,Israel,HaMerkaz,Ramla,Haaretz; Jerusalem Post; N12; Times of Israel;...,"On 19 April 2024, dozens of relatives of hosta...",0
3,2024-04-19,2024,Demonstrations,Protests,Peaceful protest,Protesters (Israel),6,Not specified,0,60,Not specified,Israel,HaMerkaz,Rehovot,Ynet,"On 19 April 2024, thousands formed a human cha...",0
4,2024-04-19,2024,Political violence,Battles,Armed clash,Hezbollah,3,Military Forces of Israel (2022-),1,13,Not specified,Israel,HaZafon,Zefat,Jerusalem Post; Ma'ariv; Times of Israel,"On 19 April 2024, Hezbollah forces in Lebanon ...",0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
38125,2021-04-20,2021,Strategic developments,Strategic developments,Other,Military Forces of Israel (2009-2021),8,Civilians (Palestine),7,78,Not specified,Palestine,Gaza Strip,North Gaza,PLO Negotiations Affairs Department,"Other: On 20 April 2021, Israeli military forc...",0
38126,2021-04-20,2021,Political violence,Riots,Mob violence,Rioters (Israel),5,Not specified,0,50,Not specified,Palestine,West Bank,Hebron,PLO Negotiations Affairs Department,"On 20 April 2021, Israeli settlers uprooted 20...",0
38127,2021-04-20,2021,Demonstrations,Protests,Peaceful protest,Protesters (Palestine),6,Not specified,0,60,Not specified,Palestine,Gaza Strip,Deir El Balah,Ma'an News Agency,"On 20 April 2021, a large protest organized by...",0
38128,2021-04-20,2021,Political violence,Riots,Mob violence,Rioters (Israel),5,Civilians (Palestine),7,57,Civilian targeting,Palestine,West Bank,Ramallah and Al Bireh,PLO Negotiations Affairs Department,"On 20 April 2021, Israeli settlers from the Ba...",0


In [6]:
#Grouping Together Actor1 and 2
def consolidate_names(name):
    if 'Military Forces of Israel' in name:
        return 'Military Forces of Israel'
    elif 'Police Forces of Israel' in name:
        return 'Police Forces of Israel'
    elif 'Hamas Movement' in name:
        return 'Hamas Movement'
    elif 'Police Forces of Israel' in name or 'Government of Israel' in name:
        return 'Government and Police Forces of Israel'
    elif 'Police Forces of Palestine' in name or 'Government of Palestine' in name:
        return 'Government and Police Forces of Palestine'
    elif 'PIJ:' in name or 'Islamic Jihad' in name:
        return 'Palestinian Islamic Jihad'
    elif 'Hezbollah' in name:
        return 'Hezbollah'
    elif 'Al Aqsa' in name:
        return 'Al Aqsa Martyrs Brigade'
    elif 'Katibat' in name:
        return 'Katibat Groups (Palestine)'
    elif 'PFLP:' in name:
        return 'Popular Front for the Liberation of Palestine'
    elif 'DFLP:' in name:
        return 'Democratic Front for the Liberation of Palestine'
    elif 'Military Forces of Iran' in name:
        return 'Iranian Revolutionary Guard Corps'
    elif 'Islamic State' in name:
        return 'Islamic State'
#civilians
    elif 'Civilians' in name:
        if 'Israel' in name or 'Palestine' in name:
            return name  #Keeping isr and pal civilians
        else:
            return 'Civilians (International)'  # grouping others as int.
#armed groups
    elif 'Unidentified Armed Group' in name:
        if 'Israel' in name or 'Palestine' in name:
            return name  
        else:
            return 'Unidentified Armed Group (International)'
#military forces
    elif 'Military Forces of' in name:
        if 'Israel' in name or 'Palestine' in name:
            return name  
        else:
            return 'Military Forces of International Forces'
    elif 'Settlers' in name:
        return 'Settlers (Israel)'
    elif 'Protesters' in name or 'Rioters' in name:
        return name  # Retains specific categories due to their distinct contexts
    else:
        return name #'Other Groups' 

# Apply the consolidation function to both actor1 and actor2
data['actor1_grouped'] = data['actor1'].apply(consolidate_names)
data['actor2_grouped'] = data['actor2'].apply(consolidate_names)

In [7]:
actor1_counts = data['actor1_grouped'].value_counts()
actor2_counts = data['actor2_grouped'].value_counts()

def consolidate_small_groups(name, counts):
    # Check if the group count is less than 10
    if counts[name] < 10:
        # Check if "Israel" or "Palestine" is in the original name for appropriate categorization
        if 'Israel' in name:
            return 'Other (Israel)'
        elif 'Palestine' in name:
            return 'Other (Palestine)'
        else:
            return name #'Other Group'
    else:
        # Return the name if the count is 10 or more
        return name

# Apply the consolidation function to both actor1_grouped and actor2_grouped
data['actor1_grouped'] = data['actor1_grouped'].apply(lambda x: consolidate_small_groups(x, actor1_counts))
data['actor2_grouped'] = data['actor2_grouped'].apply(lambda x: consolidate_small_groups(x, actor2_counts))


# Print the new value counts to confirm re-categorization
print(data['actor1_grouped'].value_counts())
print(data['actor2_grouped'].value_counts())


actor1_grouped
Military Forces of Israel                        12260
Rioters (Palestine)                               8269
Protesters (Israel)                               4367
Rioters (Israel)                                  3515
Protesters (Palestine)                            2369
Unidentified Armed Group (Palestine)              1937
Hamas Movement                                    1440
Settlers (Israel)                                  906
Hezbollah                                          814
Katibat Groups (Palestine)                         556
Al Aqsa Martyrs Brigade                            457
Police Forces of Israel                            423
Palestinian Islamic Jihad                          321
Lions' Den                                         100
Unidentified Armed Group (Israel)                   59
Government and Police Forces of Israel              49
Mujahideen Brigades                                 44
Government and Police Forces of Palestine         

In [8]:
#One Hot Encoding
#categorical_cols = ['disorder_type', 'event_type', 'actor1', 'event_type'] 