In [1]:
import pandas as pd
import numpy as np
import glob
import datetime

In [2]:
icews_main = pd.read_csv('../data/icews_main.csv', parse_dates=['event_date'],dtype={
       'event_id':'int','intensity':'float','story_id':'int','sentence_number':'int'})

In [3]:
#extract year to separate column
icews_main['event_year'] = icews_main['event_date'].dt.year

In [4]:
#filter for country Mexico
mexico_main = icews_main.loc[icews_main["country"].isin(['Mexico'])]
mexico_main.shape

(195581, 22)

In [5]:
# check to see if 2017 is in the data frame due to cameo code issues
#(step 1 to compare with second step once cameo codes are filtered)
mex_test = mexico_main.loc[mexico_main["event_year"].isin([2017])]
mex_test.shape

(3498, 22)

In [6]:
mex_180 = mexico_main.loc[mexico_main["cameo_code"].isin([
180,181,182,183,184,185,186,190,191,192,193,194,195,202,1822,1823])]
mexico_main.shape

(195581, 22)

In [7]:
#CAMEO codes to show which events are present in the dataframe
# DOUBLE CHECK TO MAKE SURE THIS IS RIGHT
explore = mex_180['cameo_code'].unique()
explore

array([ 192,  193,  190,  180,  181,  202,  182,  191, 1822,  186,  194,
        195, 1823,  183,  185,  184], dtype=int64)

In [8]:
# check to see if 2017 is in the data frame due to cameo code issues
mex_test = mex_180.loc[mex_180["event_year"].isin([2017])]
mex_test.shape

(375, 22)

In [9]:
# looking to see that apostrophes have been removed 
mex_180["cameo_code"].value_counts()

180     7545
190     4136
193     3036
181     1353
182      450
192      384
186      207
1823     149
1822     148
183      142
191      136
202       49
195       19
194        4
184        3
185        2
Name: cameo_code, dtype: int64

In [10]:
mex_180["event_text"].value_counts()

Use unconventional violence                            7545
Use conventional military force                        4136
fight with small arms and light weapons                3036
Abduct, hijack, or take hostage                        1353
Physically assault                                      450
Occupy territory                                        384
Assassinate                                             207
Kill by physical assault                                149
Torture                                                 148
Conduct suicide, car, or other non-military bombing     142
Impose blockade, restrict movement                      136
Engage in mass killings                                  49
Employ aerial weapons                                    19
fight with artillery and tanks                            4
Use as human shield                                       3
Attempt to assassinate                                    2
Name: event_text, dtype: int64

In [11]:
#viewing different events to see major sources of event activities
mex_occupy = mex_180.loc[mex_180["event_text"].isin(['Occupy territory'])]
mex_occupy.head(10)

Unnamed: 0.1,Unnamed: 0,event_id,event_date,source_name,source_sectors,source_country,event_text,cameo_code,intensity,target_name,...,story_id,sentence_number,publisher,city,district,provice,country,lat,lon,event_year
325,325,927232,1995-01-06,Citizen (Mexico),"Social,General Population / Civilian / Social",Mexico,Occupy territory,192,-9.5,Mexico,...,28238835,1,The Associated Press Political Service,Amatenango del Valle,Amatenango del Valle,Estado de Chiapas,Mexico,16.5167,-92.45,1995
326,326,927233,1995-01-06,Citizen (Mexico),"Social,General Population / Civilian / Social",Mexico,Occupy territory,192,-9.5,Government (Mexico),...,28238835,4,The Associated Press Political Service,Amatenango del Valle,Amatenango del Valle,Estado de Chiapas,Mexico,16.5167,-92.45,1995
46952,46952,981506,1995-07-10,Leftists (Mexico),Parties,Mexico,Occupy territory,192,-9.5,Mexico,...,28188848,3,The Associated Press Political Service,Macuspana,,Estado de Tabasco,Mexico,17.7615,-92.5965,1995
46961,46961,981523,1995-07-10,Leftists (Mexico),Parties,Mexico,Occupy territory,192,-9.5,Mexico,...,28189328,3,The Associated Press Political Service,Macuspana,,Estado de Tabasco,Mexico,17.7615,-92.5965,1995
49799,49799,984524,1995-07-13,Police (Mexico),"Police,Government",Mexico,Occupy territory,192,-9.5,Mexico,...,31905802,1,Reuters - Noticias Latinoamericanas,,,,Mexico,19.4285,-99.1277,1995
121172,121172,1061309,1995-11-07,Citizen (Mexico),"General Population / Civilian / Social,Social",Mexico,Occupy territory,192,-9.5,Mexico,...,28788967,1,Reuters News,Mexico City,,Distrito Federal,Mexico,19.4285,-99.1277,1995
173122,173122,1118389,1996-02-06,Institutional Revolutionary Party,"Ideological,Center Left,Parties,(National) Maj...",Mexico,Occupy territory,192,-9.5,National Action Party,...,28727042,5,Reuters News,La Paz,,Estado de Tamaulipas,Mexico,25.9824,-98.0682,1996
266752,266752,1220733,1996-06-27,Citizen (Mexico),"Social,General Population / Civilian / Social",Mexico,Occupy territory,192,-9.5,Media (Mexico),...,29294990,1,The Associated Press Political Service,Villahermosa,,Distrito Federal,Mexico,19.4833,-99.1,1996
275883,275883,1230611,1996-07-11,Mexican Army,"Army,Government,Military",Mexico,Occupy territory,192,-9.5,Mexico,...,28685766,6,BBC Monitoring Latin America,,,Huasteca,Mexico,22.0,-98.25,1996
305753,305753,1263930,1996-08-31,Armed Rebel (Revolutionary Armed Forces of Col...,"Far Left,Dissident,Ideological,Insurgents",Colombia,Occupy territory,192,-9.5,Military (Mexico),...,28609034,3,Reuters News,Las Delicias,,Estado de Tamaulipas,Mexico,25.1114,-98.674,1996


In [12]:
#explore source sectors for consolidation
mex_180["source_sectors"].unique()

array(['Social,General Population / Civilian / Social',
       'Unidentified Forces', 'Parties',
       'Parties,(National) Major Party,Ideological,Center Left',
       'Dissident,Criminals / Gangs', 'Criminals / Gangs,Dissident',
       '(National) Major Party,Parties,Center Left,Ideological',
       'Government,Police',
       'Dissident,Protestors / Popular Opposition / Mobs',
       'Police,Government',
       'General Population / Civilian / Social,Social',
       'Parties,(National) Major Party,Far Left,Ideological',
       'Protestors / Popular Opposition / Mobs,Dissident',
       'Global,International Government Organization,Global Defense / Security IGOs',
       nan, 'Dissident,Rebel',
       'Ideological,Center Left,(National) Major Party,Parties',
       'Far Left,Ideological,(National) Major Party,Parties',
       'Government,Military', 'Dissident,Elite,Criminals / Gangs',
       'Ideological,Center Left,Parties,(National) Major Party',
       'Military,Government', 'Ideol

COMBINE AND CLEAN SOURCE SECTORS

In [13]:
source_sectors = {
    "Government,Police": "Government/Police",
    "Police,Government": "Government/Police",
    "Army,Government,Military": "Government/Military",
    "Military,Government,Army": "Government/Military",
    "Government,Army,Military": "Government/Military",
    "Government,Military,Army": "Government/Military",
    "Army,Military,Government": "Government/Military",
    "Military,Navy,Government": "Government/Military",
    "Government,Military,Navy": "Government/Military",
    "Marines,Military,Government": "Government/Military",
    "Military,Government,Navy": "Government/Military",
    "Government,Navy,Military": "Government/Military",
    "Military,Marines,Government": "Government/Military",
    "Government,Military,Marines": "Government/Military",
    "Military,Government,Marines": "Government/Military"}

In [23]:
mex_180['source_sectors'] = mex_180['source_sectors'].map(source_sectors)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  mex_180['source_sectors'] = mex_180['source_sectors'].map(source_sectors)


In [15]:
#explore source sectors for consolidation
mex_180["source_sectors"].unique()

array([325, 326, 336, ..., 16659004, 16659005, 16659006], dtype=object)

In [16]:
#mex_180.to_csv('../data/icews_mexico_180.csv')

In [17]:
# what to do with events without a longitude latitude

In [18]:
# Mex = ['Mexico']
# source_country = mex_violence[~mex_violence.source_country.isin(Mex)]
# source_country.shape

In [19]:
# source_country.tail(10)

In [20]:
# # see unique source countries in the df
# source_country["source_country"].unique()

In [21]:
# # filter so that the only source country is mexico
# mexico = mex_violence[mex_violence.source_country == 'Mexico']
# mexico.shape

In [22]:
# #fix 2017 cameo codes to remove apostrophes
# cameo = {
#     '180':180,
#     '181':181,
#     '182':182,
#     '183':183,
#     '184':184,
#     '185':185,
#     '186':186,
#     '190':190,
#     '191':191,
#     '192':192,
#     '193':193,
#     '194':194,
#     '195':195,
#     '202':202,
#     '1822':1822,
#     '128':128,
#     '1823':1823}
# mexico_main.cameo_code = mexico_main.cameo_code.replace(cameo)