In [1]:
import pandas as pd
import numpy as np
pd.options.display.max_rows = 999

In [2]:
# Reading in Data
acled = pd.read_csv('data/ACLED.csv')
acled.head()

Unnamed: 0,data_id,iso,event_id_cnty,event_id_no_cnty,event_date,year,time_precision,event_type,sub_event_type,actor1,...,location,latitude,longitude,geo_precision,source,source_scale,notes,fatalities,timestamp,iso3
0,6708242,368,IRQ22736,22736,31-Dec-19,2019,1,Protests,Peaceful protest,Protesters (Iraq),...,Qurna,31.0167,47.4333,1,Al Mirbad,National,"On Dec 31, protesters in Qurna came out to den...",0,1578503874,IRQ
1,6716953,760,SYR73663,73663,31-Dec-19,2019,1,Explosions/Remote violence,Shelling/artillery/missile attack,Military Forces of Syria (2000-),...,Hazarin,35.5998,36.5267,1,SOHR,Other,"On 31 December 2019, regime forces shelled Haz...",0,1578515228,SYR
2,6716955,760,SYR73713,73713,31-Dec-19,2019,1,Explosions/Remote violence,Shelling/artillery/missile attack,Opposition Rebels (Syria),...,Aslieh,35.1967,36.4892,1,SOHR,Other,"On 31 December 2019, opposition rebels shelled...",0,1578515228,SYR
3,6716700,760,SYR73184,73184,31-Dec-19,2019,1,Battles,Armed clash,Unidentified Armed Group (Syria),...,Ar-Ra'ee,36.6125,37.4464,1,SOHR,Other,"On 31 December 2019, unidentified gunmen assas...",1,1578515227,SYR
4,6716960,760,SYR73664,73664,31-Dec-19,2019,1,Explosions/Remote violence,Shelling/artillery/missile attack,Military Forces of Syria (2000-),...,Kafr Nobol,35.6147,36.5603,1,SOHR,Other,"On 31 December 2019, regime forces shelled Kaf...",0,1578515228,SYR


In [3]:
# Converting Date column to panas datetime format
acled['event_date'] = pd.to_datetime(acled['event_date'])
acled.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 91910 entries, 0 to 91909
Data columns (total 31 columns):
 #   Column            Non-Null Count  Dtype         
---  ------            --------------  -----         
 0   data_id           91910 non-null  int64         
 1   iso               91910 non-null  int64         
 2   event_id_cnty     91910 non-null  object        
 3   event_id_no_cnty  91910 non-null  int64         
 4   event_date        91910 non-null  datetime64[ns]
 5   year              91910 non-null  int64         
 6   time_precision    91910 non-null  int64         
 7   event_type        91910 non-null  object        
 8   sub_event_type    91910 non-null  object        
 9   actor1            91910 non-null  object        
 10  assoc_actor_1     19824 non-null  object        
 11  inter1            91910 non-null  int64         
 12  actor2            55510 non-null  object        
 13  assoc_actor_2     9890 non-null   object        
 14  inter2            9191

In [4]:
# Reading in Tor Data
tor_filepaths = [
    'data/Egypt-Tor-Metrics.csv',
    'data/Iraq-Tor-Metrics.csv',
    'data/Jordan-Tor-Metrics.csv',
    'data/Lebanon-Tor-Metrics.csv',
    'data/Saudi-Arabia-Tor-Metrics.csv',
    'data/Syria-Tor-Metrics.csv'
    ]

tor_data = [csv for csv in tor_filepaths]
tor_metrics = pd.concat(map(pd.read_csv, tor_filepaths))
tor_metrics['date'] = pd.to_datetime(tor_metrics['date'])

In [5]:
tor_metrics['country'].unique()

array(['eg', 'iq', 'jo', 'lb', 'sa', 'sy'], dtype=object)

In [6]:
# Mapping country names to match ACLED Data
tor_metrics['country'] = tor_metrics['country'].map({'eg': 'Egypt', 'iq': 'Iraq', 'jo':'Jordan', 'lb': 'Lebanon', 'sa': 'Saudi Arabia', 'sy': 'Syria'})

In [7]:
tor_metrics.head()

Unnamed: 0,date,country,users,frac
0,2017-01-01,Egypt,564,56
1,2017-01-02,Egypt,654,54
2,2017-01-03,Egypt,598,59
3,2017-01-04,Egypt,528,57
4,2017-01-05,Egypt,508,59


In [8]:
# Merging datasets
acled_and_tor = pd.merge_ordered(acled, 
                         tor_metrics, 
                         how='left',
                         left_on=['event_date', 'country'],
                         right_on=['date', 'country'],
                         fill_method='ffill')

In [9]:
# Filtering for US/UN Actions
us_actions = acled_and_tor[(acled_and_tor['actor1'].str.contains('United States|United Nations')) |
                           (acled_and_tor['actor2'].str.contains('United States|United Nations')) |
                           (acled_and_tor['assoc_actor_1'].str.contains('United States|United Nations')) |
                           (acled_and_tor['assoc_actor_2'].str.contains('United States|United Nations'))] 

In [10]:
# Dropping US/UN Actions
acled_and_tor.drop(us_actions.index, inplace=True)

In [11]:
acled_and_tor.isnull().sum()

data_id                 0
iso                     0
event_id_cnty           0
event_id_no_cnty        0
event_date              0
year                    0
time_precision          0
event_type              0
sub_event_type          0
actor1                  0
assoc_actor_1       72001
inter1                  0
actor2              36159
assoc_actor_2       79202
inter2                  0
interaction             0
region                  0
country                 0
admin1                  0
admin2                 20
admin3              19411
location                0
latitude                0
longitude               0
geo_precision           0
source                  0
source_scale            0
notes                   0
fatalities              0
timestamp               0
iso3                    0
date                    0
users                   0
frac                    0
dtype: int64

In [12]:
# Filling Unknow for missing actor data
acled_and_tor['actor2'].fillna('Unknown', inplace=True)
acled_and_tor['assoc_actor_1'].fillna('Unknown', inplace=True)
acled_and_tor['assoc_actor_2'].fillna('Unknown', inplace=True)
acled_and_tor['admin2'].fillna('Unknown', inplace=True)

In [13]:
# Filling 0 for missing Tor Data
acled_and_tor['users'].fillna(0, inplace=True)
acled_and_tor['frac'].fillna(0, inplace=True)

In [14]:
acled_and_tor.columns

Index(['data_id', 'iso', 'event_id_cnty', 'event_id_no_cnty', 'event_date',
       'year', 'time_precision', 'event_type', 'sub_event_type', 'actor1',
       'assoc_actor_1', 'inter1', 'actor2', 'assoc_actor_2', 'inter2',
       'interaction', 'region', 'country', 'admin1', 'admin2', 'admin3',
       'location', 'latitude', 'longitude', 'geo_precision', 'source',
       'source_scale', 'notes', 'fatalities', 'timestamp', 'iso3', 'date',
       'users', 'frac'],
      dtype='object')

In [15]:
# Dropping Tor Date and admin3 Column
acled_and_tor.drop(columns=['date', 'admin3'], inplace=True)

In [16]:
# Extracting Month Column and Creating Seperate Column
acled_and_tor['month'] = acled_and_tor['event_date'].dt.month

In [35]:
# Reading in Economic Data
econ_data = pd.read_csv('data/econ-data-imputed.csv')
econ_data.head()

Unnamed: 0,Country,Code,Year,Month,Date,Consumer Price Index (CPI),Inflation monthly percent change in the CPI,Exchange rate USD,GDP per capita Purchasing Power Parity,Labor force million people,...,Security threats index 0 (low) - 10 (high),State legitimacy index 0 (high) - 10 (low),Public services index 0 (high) - 10 (low),Human rights and rule of law index 0 (high) - 10 (low),Refugees and displaced persons index 0 (low) - 10 (high),Military spending percent of GDP,Labor force million people.1,Labor force participation rate,Percent urban population,Population density people per square km
0,Egypt,EGY,2017,1,1/1/17,227.5,4.31,18.6331,10672.8,30.66,...,8.1,8.2,4.9,9.8,7.3,1.42,30.66,48.01,42.71,97.0
1,Egypt,EGY,2017,2,2/1/17,233.7,2.73,17.2295,10672.8,30.66,...,8.1,8.2,4.9,9.8,7.3,1.42,30.66,48.01,42.71,97.0
2,Egypt,EGY,2017,3,3/1/17,238.5,2.05,17.6674,10672.8,30.66,...,8.1,8.2,4.9,9.8,7.3,1.42,30.66,48.01,42.71,97.0
3,Egypt,EGY,2017,4,4/1/17,242.7,1.76,18.0983,10672.8,30.66,...,8.1,8.2,4.9,9.8,7.3,1.42,30.66,48.01,42.71,97.0
4,Egypt,EGY,2017,5,5/1/17,246.5,1.57,18.0884,10672.8,30.66,...,8.1,8.2,4.9,9.8,7.3,1.42,30.66,48.01,42.71,97.0


In [36]:
econ_data['Date'] = pd.to_datetime(econ_data['Date'])

In [37]:
conflict = pd.merge(acled_and_tor, econ_data,
                            how='left',
                            left_on=['country', 'year', 'month'],
                            right_on=['Country', 'Year', 'Month'])

In [38]:
conflict.isnull().sum()

data_id                                                         0
iso                                                             0
event_id_cnty                                                   0
event_id_no_cnty                                                0
event_date                                                      0
year                                                            0
time_precision                                                  0
event_type                                                      0
sub_event_type                                                  0
actor1                                                          0
assoc_actor_1                                                   0
inter1                                                          0
actor2                                                          0
assoc_actor_2                                                   0
inter2                                                          0
interactio

In [30]:
print(conflict.shape)
print(acled_and_tor.shape)


(88927, 87)
(88927, 33)


In [44]:
conflict['missing'] = [1 for i in conflict['Consumer Price Index (CPI)'] if i == np.nan]

ValueError: Length of values does not match length of index

In [18]:
#conflict.to_csv('data/conflict.csv', index=False)

In [19]:
data.head()

NameError: name 'data' is not defined

In [20]:
data.isnull().sum()

NameError: name 'data' is not defined