# Notebook 2: Cleaning and narrowing dataset to cartel related events

In [1]:
import pandas as pd
import numpy as np
import datetime
#import geopandas as gp

In [2]:
#using full icews data set to screen for cartel events and make sure I'm not filtering anything out accidentally
cartel_df = pd.read_csv('../data/icews_main.csv', parse_dates=['event_date'],dtype={
       'event_id':'int','intensity':'float','story_id':'int','sentence_number':'int'})

In [None]:
#standardize events involving cartels, consolidating cartel factions to
#reflect umbrella organizations
cartels = {
    "Armed Gang (Sinaloa Cartel)": 'Sinaloa Cartel',
    "Armed Gang (Juárez Cartel)":'Juárez Cartel',
    "Organized Crime (Gulf Cartel)":'Gulf Cartel',
    "Armed Gang (Gulf Cartel)":'Gulf Cartel',
    "Drug Gang (Los Zetas Cartel)":'Los Zetas Cartel',
    "Hitman (Gulf Cartel)":'Gulf Cartel',
    "Hitman (Los Zetas Cartel)":'Los Zetas Cartel',
    "Criminal (Los Zetas Cartel)":'Los Zetas Cartel',
    "Drug Gang (Gulf Cartel)":'Gulf Cartel',
    "Armed Services Deserter (Los Zetas Cartel)":'Los Zetas Cartel',
    "Armed Gang (Knights Templar Cartel)":'Knights Templar Cartel',
    "La Familia Michoacana": 'La Familia Michoacana Cartel',
    "Drug Gang (La Familia Michoacana)": 'La Familia Michoacana Cartel',
    "La Línea": 'Juárez Cartel',
    "Criminal (La Línea)":'Juárez Cartel',
    "Barrio Azteca": 'Barrio Azteca Drug Gang',
    "Gente Nueva": "Sinaloa Cartel",
    "BeltrÃ¡n-Leyva Cartel": "Beltrán-Leyva Cartel",
    "Los Negros": "Beltrán-Leyva Cartel",
    "Mexicles": "Mexicles Drug Gang"
}
cartel_df = cartel_df.replace(cartels)

In [4]:
cartel_source = cartel_df.loc[cartel_df['source_name'].str.contains('Cartel')]
cartel_target = cartel_df.loc[cartel_df['target_name'].str.contains('Cartel')]
drug_gang_source = cartel_df.loc[cartel_df['source_name'].str.contains('Drug')]
drug_gang_target = cartel_df.loc[cartel_df['target_name'].str.contains('Drug')]

In [5]:
cartels = pd.concat([cartel_source, cartel_target, drug_gang_source, drug_gang_target])

In [6]:
cartels.shape

(13957, 21)

In [7]:
cartels['event_year'] = cartels['event_date'].dt.year

In [3]:
cartel_df["event_year"].value_counts()

KeyError: 'event_year'

In [8]:
cartels['event_year'].unique()

array([1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
       2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2017, 2018, 2019,
       2020, 1995], dtype=int64)

In [9]:
cartels['country'].unique()

array(['Mexico', 'Peru', 'Colombia', 'Cuba', 'Panama', 'United States',
       'Brazil', 'Venezuela', 'Guatemala', 'Italy', 'Canada', 'Spain',
       'China', 'El Salvador', 'Ecuador', 'Costa Rica', 'Honduras',
       'Nigeria', 'United Kingdom', 'Dominican Republic', 'Ireland',
       'South Africa', 'New Zealand', 'Philippines', 'Bolivia', 'Jamaica',
       'Thailand', 'Laos', 'Vietnam', 'Singapore', 'Chile', 'Japan',
       'Australia', 'Austria', 'United Arab Emirates', 'Belize',
       'Myanmar', 'Romania', 'Argentina', 'Tajikistan', 'Hong Kong',
       'Saudi Arabia', 'Latvia', 'Germany', 'Kosovo',
       'Trinidad and Tobago', 'Zambia', 'Russian Federation', 'Greece',
       'Iran', 'Indonesia', 'Cambodia', 'Czech Republic', 'Turkey',
       'Kazakhstan', 'Netherlands', 'Malaysia', 'Pakistan', 'India',
       'Senegal', 'Afghanistan', 'Georgia', 'Lebanon', 'Angola',
       'Bangladesh', 'Uruguay', 'France', 'South Korea', 'Nicaragua',
       'Slovakia', 'Iraq', 'Bulgaria', 'Serb

In [10]:
cartels['cameo_code'].unique()

array([ 180,   10,   42,   40,  173,   36,   71,  112,   43,   51,  181,
        841,  356,  111, 1121,  193,   20,  130,   90,  313,   44,   63,
        331,   12,  138,   60,   15, 1822,   32,   50,  874,   61, 1823,
        192,   72,  100,   13, 1821,  190,  353,  125,   14,  174,  120,
         64,  160,   30,  186,  154,   62,  163, 1621,   81,  141,   46,
         70,   80,  183,   41,  113,  195,  311,  142,   16, 1122, 1711,
        102,  182,  161,  833,  332, 1246,   74,   17,  214,   57,   22,
        213, 1211,  115,  150,  191,   25,  128,   91,   18, 1125,  172,
        842, 1031,  114,  231,   19,  194,  170,  202,  312,  139,  124,
        132, 1243, 1041,  233,   21,  171,  145,  143, 1661,  253,   35,
        184,  333,  256,   55, 1313,   45,   73,  861,   23,  137,   75,
       1053,  133, 1241,  153,  105,  351,  127, 1712,   26,   37,   11,
       1014,  106, 1056], dtype=int64)

In [11]:
cartels_v1 = cartels.loc[cartels["cameo_code"].isin([
180,181,182,183,184,185,186,190,191,192,193,194,195,202,1822,1823])]

In [12]:
cartels_v1.shape

(3066, 22)

In [13]:
cartels_v1['event_year'].unique()

array([1996, 1999, 2003, 2004, 2005, 2007, 2008, 2009, 2010, 2011, 2012,
       2013, 2014, 2017, 2018, 2019, 2020, 1997, 2000, 2002, 1995, 1998,
       2001, 2006], dtype=int64)

In [14]:
cartels_v1['country'].unique()

array(['Mexico', 'Guatemala', 'Colombia', 'United States', 'Venezuela',
       'Peru', 'United Kingdom', 'Honduras', 'South Africa', 'Brazil',
       'Philippines', 'Thailand', 'Singapore', 'Australia', 'Canada',
       'Myanmar', 'Saudi Arabia', 'Hong Kong', 'Germany',
       'Trinidad and Tobago', 'Iran', 'Ireland', 'Czech Republic',
       'New Zealand', 'Nigeria', 'Senegal', 'China', 'Georgia', 'Lebanon',
       'India', 'Bangladesh', 'Afghanistan', 'United Arab Emirates',
       'Italy', 'Tajikistan', 'Kenya', 'Israel', 'Indonesia',
       'Netherlands', 'Kazakhstan', 'Occupied Palestinian Territory',
       'Russian Federation', 'Zambia', 'Japan', 'Haiti', 'Guinea-Bissau',
       'Greece', 'Egypt', 'Dominican Republic', 'Cambodia', 'Ukraine',
       'Malaysia', 'Turkey', 'Chile', 'Bulgaria', 'Jamaica', 'Argentina',
       'Kyrgyzstan', 'Tanzania', 'Switzerland', 'Namibia', 'Paraguay',
       'New Caledonia', 'Taiwan', 'Albania', 'Liberia', 'Pakistan',
       'Uruguay', 'Spain', '

In [15]:
cartels_v1.head()

Unnamed: 0.1,Unnamed: 0,event_id,event_date,source_name,source_sectors,source_country,event_text,cameo_code,intensity,target_name,...,story_id,sentence_number,publisher,city,district,state,country,lat,lon,event_year
385783,385783,1348307,1996-12-30,Beltrán-Leyva Cartel,,Mexico,Use unconventional violence,180,-9.0,Foreign Affairs (Japan),...,29239867,4,BBC Monitoring Latin America,Beltrán-Leyva Cartel,,Estado de Zacatecas,Mexico,22.134,-103.068,1996
1507433,1507433,2501317,1999-11-30,Juárez Cartel,"Dissident,Criminals / Gangs",Mexico,Use unconventional violence,180,-9.0,Citizen (Mexico),...,31812087,3,Servicio Universal de Noticias,Juarez,,Estado de Chihuahua,Mexico,31.7333,-106.483,1999
1509636,1509636,2503620,1999-12-02,Juárez Cartel,"Criminals / Gangs,Dissident",Mexico,Use unconventional violence,180,-9.0,Citizen (Mexico),...,30238340,6,Reuters News,Juarez,,Estado de Chihuahua,Mexico,31.7333,-106.483,1999
4503880,4503880,6555207,2003-10-14,Juárez Cartel,"Criminals / Gangs,Dissident",Mexico,"Abduct, hijack, or take hostage",181,-9.0,Men (Mexico),...,13698349,5,AP Spanish Worldstream,,,,Mexico,19.4285,-99.1277,2003
5330245,5330245,7779500,2004-09-13,Juárez Cartel,"Dissident,Criminals / Gangs",Mexico,Use unconventional violence,180,-9.0,Gulf Cartel,...,13883098,2,El Norte,Nuevo Leon,,Estado de Yucatan,Mexico,21.2667,-87.6167,2004


In [16]:
Mex = ['Mexico']
cartel_mex = cartels_v1[cartels_v1.country.isin(Mex)]

In [17]:
cartel_mex.head(15)

Unnamed: 0.1,Unnamed: 0,event_id,event_date,source_name,source_sectors,source_country,event_text,cameo_code,intensity,target_name,...,story_id,sentence_number,publisher,city,district,state,country,lat,lon,event_year
385783,385783,1348307,1996-12-30,Beltrán-Leyva Cartel,,Mexico,Use unconventional violence,180,-9.0,Foreign Affairs (Japan),...,29239867,4,BBC Monitoring Latin America,Beltrán-Leyva Cartel,,Estado de Zacatecas,Mexico,22.134,-103.068,1996
1507433,1507433,2501317,1999-11-30,Juárez Cartel,"Dissident,Criminals / Gangs",Mexico,Use unconventional violence,180,-9.0,Citizen (Mexico),...,31812087,3,Servicio Universal de Noticias,Juarez,,Estado de Chihuahua,Mexico,31.7333,-106.483,1999
1509636,1509636,2503620,1999-12-02,Juárez Cartel,"Criminals / Gangs,Dissident",Mexico,Use unconventional violence,180,-9.0,Citizen (Mexico),...,30238340,6,Reuters News,Juarez,,Estado de Chihuahua,Mexico,31.7333,-106.483,1999
4503880,4503880,6555207,2003-10-14,Juárez Cartel,"Criminals / Gangs,Dissident",Mexico,"Abduct, hijack, or take hostage",181,-9.0,Men (Mexico),...,13698349,5,AP Spanish Worldstream,,,,Mexico,19.4285,-99.1277,2003
5330245,5330245,7779500,2004-09-13,Juárez Cartel,"Dissident,Criminals / Gangs",Mexico,Use unconventional violence,180,-9.0,Gulf Cartel,...,13883098,2,El Norte,Nuevo Leon,,Estado de Yucatan,Mexico,21.2667,-87.6167,2004
5330248,5330248,7779503,2004-09-13,Juárez Cartel,"Dissident,Criminals / Gangs",Mexico,Use unconventional violence,180,-9.0,Gulf Cartel,...,13883171,3,Mural,Nuevo Leon,,Estado de Yucatan,Mexico,21.2667,-87.6167,2004
5330277,5330277,7779532,2004-09-13,Juárez Cartel,"Dissident,Criminals / Gangs",Mexico,Use unconventional violence,180,-9.0,Gulf Cartel,...,13883437,3,Reforma,Nuevo Leon,,Estado de Yucatan,Mexico,21.2667,-87.6167,2004
5401885,5401885,7891128,2004-10-09,Juárez Cartel,"Dissident,Criminals / Gangs",Mexico,Use unconventional violence,180,-9.0,Mexico,...,4840572,3,EFE News Service,Sinaloa,,Estado de Chiapas,Mexico,15.8931,-92.1298,2004
5680028,5680028,8293506,2005-01-19,Tijuana Cartel,"Dissident,Criminals / Gangs",Mexico,"Abduct, hijack, or take hostage",181,-9.0,Citizen (Mexico),...,14005527,2,Servicio Universal de Noticias,Tijuana,,Estado de Baja California,Mexico,32.5027,-117.004,2005
5725370,5725370,8363112,2005-02-04,Sinaloa Cartel,"Criminals / Gangs,Dissident",Mexico,Use unconventional violence,180,-9.0,Mexico,...,7094900,6,Associated Press Newswires,Nuevo Laredo,,Estado de Tamaulipas,Mexico,27.4763,-99.5164,2005


In [18]:
cartel_mex.shape

(1600, 22)

In [19]:
cartel_mex['event_year'].unique()

array([1996, 1999, 2003, 2004, 2005, 2007, 2008, 2009, 2010, 2011, 2012,
       2013, 2014, 2018, 2019, 2020, 1997, 2000, 2002, 2017, 2006, 1998],
      dtype=int64)

In [20]:
cartel_mex['event_year'].value_counts()

2010    297
2011    219
2009    174
2008    172
2014    143
2007    137
2012    135
2013     65
2005     53
2019     50
2017     42
2006     42
2018     27
2004     17
1999      7
2020      4
1997      4
2002      4
2000      3
2003      2
1998      2
1996      1
Name: event_year, dtype: int64

In [21]:
# NOT WORKING
# years = [2001,2002,2003,'2004','2005',
#     '2006','2007','2008','2009','2010','2011',
#     '2012','2013','2014','2015','2016']
# cartel_mex = cartel_mex.loc[cartel_mex.event_year.isin(years)]
# cartel_mex.shape

In [22]:
# state = ['Central America']
# cartel_mex = cartels_v1[cartels_v1.provice.isin(Mex)]

AttributeError: 'DataFrame' object has no attribute 'provice'

In [23]:
cartel_mex['source_name'].unique()

array(['Beltrán-Leyva Cartel', 'Juárez Cartel', 'Tijuana Cartel',
       'Sinaloa Cartel', 'Los Zetas Cartel', 'Gulf Cartel',
       'La Familia Michoacana Cartel', 'Knights Templar Cartel',
       'Attacker (Mexico)', 'Citizen (Mexico)', 'Criminal (Mexico)',
       'Citizen (United States)', 'Drug Gang (Mexico)',
       'Joaquín Guzmán Loera', 'Mexico', 'Mob (Mexico)',
       'Military (Mexico)', 'Police (Mexico)', 'Barrio Azteca Drug Gang',
       'Military Personnel (United States)',
       'Military Personnel - Special (Mexico)', 'Mexican Army',
       'Edgar Valdez Villarreal', 'Marine Corp (Mexico)',
       'Armed Gang (Mexico)', 'Business (Mexico)',
       'Miguel Treviño Morales', 'Secret Agent (Mexico)', 'Mexican Navy',
       'Militia (Mexico)', 'Military Personnel (Mexico)', 'Men (Mexico)',
       'Drug Dealer (Mexico)', 'Drug Gang (Brazil)', 'Mexicles Drug Gang',
       'Drug Enforcement Administration', 'Drug Dealer (Brazil)',
       'Governor (Mexico)', 'Police (Brazil)',

In [24]:
cartel_mex.to_csv('../data/icews_cartels.csv')