### Loading Data

In [419]:
pd.set_option('display.precision',8)
pd.set_option('display.max_rows', 1500)
pd.set_option('display.max_columns', 1500)
pd.set_option('display.width', 10000)

1. Read in the dataset, and view a sample of rows with the `.sample` method:

In [1]:
import numpy as np  
import pandas as pd 
import folium

In [4]:
gterr = pd.read_csv('globalterrorismdb.csv',delimiter=';', low_memory=False)

In [5]:
gterr.sample(random_state=42)

Unnamed: 0,eventid,iyear,imonth,iday,approxdate,extended,resolution,country,country_txt,region,...,addnotes,scite1,scite2,scite3,dbsource,INT_LOG,INT_IDEO,INT_MISC,INT_ANY,related
188334,201807140017,2018,7,14,,0,,228,Yemen,10,...,,"""Yemen: Roundup of Political, Security, CT Dev...",,,START Primary Collection,-9,-9,0,-9,


2. Get the dimensions of the DataFrame with the `.shape` attribute:

In [6]:
gterr.shape

(201183, 135)

In [21]:
gterr.columns

Index(['eventid', 'iyear', 'imonth', 'iday', 'approxdate', 'extended', 'resolution', 'country', 'country_txt', 'region',
       ...
       'addnotes', 'scite1', 'scite2', 'scite3', 'dbsource', 'INT_LOG', 'INT_IDEO', 'INT_MISC', 'INT_ANY', 'related'], dtype='object', length=135)

3. List the data type of each column, the number of non-missing values, and memory
usage with the `.info` method:

In [8]:
gterr.info(max_cols=200)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 201183 entries, 0 to 201182
Data columns (total 135 columns):
 #    Column              Non-Null Count   Dtype  
---   ------              --------------   -----  
 0    eventid             201183 non-null  int64  
 1    iyear               201183 non-null  int64  
 2    imonth              201183 non-null  int64  
 3    iday                201183 non-null  int64  
 4    approxdate          11754 non-null   object 
 5    extended            201183 non-null  int64  
 6    resolution          4668 non-null    object 
 7    country             201183 non-null  int64  
 8    country_txt         201183 non-null  object 
 9    region              201183 non-null  int64  
 10   region_txt          201183 non-null  object 
 11   provstate           201183 non-null  object 
 12   city                200757 non-null  object 
 13   latitude            196556 non-null  object 
 14   longitude           196555 non-null  object 
 15   specificity    

4. Get summary statistics for the numerical columns and transpose the DataFrame for
more readable output:

In [13]:
gterr.describe(include=[np.number]).T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
eventid,201183.0,200000000000.0,1340000000.0,197000000000.0,199000000000.0,201000000000.0,202000000000.0,202000000000.0
iyear,201183.0,2000.0,13.4,1970.0,1990.0,2010.0,2020.0,2020.0
imonth,201183.0,6.45,3.39,0.0,4.0,6.0,9.0,12.0
iday,201183.0,15.5,8.81,0.0,8.0,15.0,23.0,31.0
extended,201183.0,0.0499,0.218,0.0,0.0,0.0,0.0,1.0
country,201183.0,131.0,112.0,4.0,75.0,101.0,160.0,1000.0
region,201183.0,7.25,2.91,1.0,6.0,8.0,10.0,12.0
specificity,201182.0,1.47,0.989,1.0,1.0,1.0,1.0,5.0
vicinity,201183.0,0.0684,0.282,-9.0,0.0,0.0,0.0,1.0
crit1,201183.0,0.988,0.107,0.0,1.0,1.0,1.0,1.0


5. Get summary statistics for the object (string) columns:

In [14]:
gterr.describe(include=[np.object]).T

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  gterr.describe(include=[np.object]).T


Unnamed: 0,count,unique,top,freq
approxdate,11754,2946,"September 18-24, 2016",101
resolution,4668,3416,04/08/1998,18
country_txt,201183,205,Iraq,26755
region_txt,201183,12,Middle East & North Africa,56414
provstate,201183,2607,Baghdad,7949
city,200757,42357,Unknown,10721
latitude,196556,59533,33303567,4449
longitude,196555,59358,44371771,4449
location,63769,49150,"The attack took place in Baghdad, Baghdad, Iraq.",142
summary,135062,131656,"09/00/2016: Sometime between September 18, 201...",100


### Reducing memory by changing data types

1. After reading in our college dataset, we select a few columns of different data types
that will clearly show how much memory may be saved:

In [22]:
different_cols = [
    'eventid', 
    'iyear',
    'imonth',
    'iday',
    'approxdate',
    'extended',
    'resolution', 
    'country',
    'country_txt', 
    'region',
]

In [40]:
gterr2 = gterr.loc[:, different_cols]
gterr2.head()

Unnamed: 0,eventid,iyear,imonth,iday,approxdate,extended,resolution,country,country_txt,region
0,197000000001,1970,7,2,,0,,58,Dominican Republic,2
1,197000000002,1970,0,0,,0,,130,Mexico,1
2,197001000001,1970,1,0,,0,,160,Philippines,5
3,197001000002,1970,1,0,,0,,78,Greece,8
4,197001000003,1970,1,0,,0,,101,Japan,4


2. Inspect the data types of each column:

In [41]:
gterr2.dtypes

eventid         int64
iyear           int64
imonth          int64
iday            int64
approxdate     object
extended        int64
resolution     object
country         int64
country_txt    object
region          int64
dtype: object

3. Find the memory usage of each column with the `.memory_usage` method:

In [42]:
original_mem = gterr2.memory_usage(deep=True)
original_mem

Index               128
eventid         1609464
iyear           1609464
imonth          1609464
iday            1609464
approxdate      6907712
extended        1609464
resolution      6601236
country         1609464
country_txt    13021220
region          1609464
dtype: int64

4. There is no need to use 64 bits for the `iyear` since it is small number.
Let's convert it with the `.astype` method:

In [43]:
gterr2['iyear'] = gterr2['iyear'].astype(np.int8)

5. Use the `.dtypes` attribute to confirm the data type change:

In [44]:
gterr2.dtypes

eventid         int64
iyear            int8
imonth          int64
iday            int64
approxdate     object
extended        int64
resolution     object
country         int64
country_txt    object
region          int64
dtype: object

6. Find the memory usage of each column again and note the large reduction:

In [45]:
gterr2.memory_usage(deep=True)

Index               128
eventid         1609464
iyear            201183
imonth          1609464
iday            1609464
approxdate      6907712
extended        1609464
resolution      6601236
country         1609464
country_txt    13021220
region          1609464
dtype: int64

7. To save even more memory, you will want to consider changing object data types to categorical if they have a reasonably low cardinality (number of unique values). Let's first check the number of unique values for both the object columns:

In [32]:
gterr2.select_dtypes(include=['object']).nunique()

approxdate     2946
resolution     3416
country_txt     205
dtype: int64

8. The `country_txt` column is a good candidate to convert to categorical as less than one percent of its values are unique:

In [46]:
gterr2['country_txt'] = gterr2['country_txt'].astype('category')

In [47]:
gterr2.dtypes

eventid           int64
iyear              int8
imonth            int64
iday              int64
approxdate       object
extended          int64
resolution       object
country           int64
country_txt    category
region            int64
dtype: object

9. Compute the memory usage again:

In [48]:
new_mem = gterr2.memory_usage(deep=True)

10. Finally, let's compare the original memory usage with our updated memory usage.
The RELAFFIL column is, as expected, an eighth of its original size, while the
STABBR column has shrunk to just three percent of its original size:

In [49]:
new_mem / original_mem

Index          1.00
eventid        1.00
iyear          0.12
imonth         1.00
iday           1.00
approxdate     1.00
extended       1.00
resolution     1.00
country        1.00
country_txt    0.03
region         1.00
dtype: float64

In [358]:
df = pd.read_csv('globalterrorismdb.csv', delimiter=';', low_memory=False, decimal='.')

### Exploratory Analysis

In [359]:
type(df)

pandas.core.frame.DataFrame

In [360]:
len(df)

201183

In [361]:
df.shape

(201183, 135)

In [363]:
df.head()

Unnamed: 0,eventid,iyear,imonth,iday,approxdate,extended,resolution,country,country_txt,region,region_txt,provstate,city,latitude,longitude,specificity,vicinity,location,summary,crit1,crit2,crit3,doubtterr,alternative,alternative_txt,multiple,success,suicide,attacktype1,attacktype1_txt,attacktype2,attacktype2_txt,attacktype3,attacktype3_txt,targtype1,targtype1_txt,targsubtype1,targsubtype1_txt,corp1,target1,natlty1,natlty1_txt,targtype2,targtype2_txt,targsubtype2,targsubtype2_txt,corp2,target2,natlty2,natlty2_txt,targtype3,targtype3_txt,targsubtype3,targsubtype3_txt,corp3,target3,natlty3,natlty3_txt,gname,gsubname,gname2,gsubname2,gname3,gsubname3,motive,guncertain1,guncertain2,guncertain3,individual,nperps,nperpcap,claimed,claimmode,claimmode_txt,claim2,claimmode2,claimmode2_txt,claim3,claimmode3,claimmode3_txt,compclaim,weaptype1,weaptype1_txt,weapsubtype1,weapsubtype1_txt,weaptype2,weaptype2_txt,weapsubtype2,weapsubtype2_txt,weaptype3,weaptype3_txt,weapsubtype3,weapsubtype3_txt,weaptype4,weaptype4_txt,weapsubtype4,weapsubtype4_txt,weapdetail,nkill,nkillus,nkillter,nwound,nwoundus,nwoundte,property,propextent,propextent_txt,propvalue,propcomment,ishostkid,nhostkid,nhostkidus,nhours,ndays,divert,kidhijcountry,ransom,ransomamt,ransomamtus,ransompaid,ransompaidus,ransomnote,hostkidoutcome,hostkidoutcome_txt,nreleased,addnotes,scite1,scite2,scite3,dbsource,INT_LOG,INT_IDEO,INT_MISC,INT_ANY,related
0,197000000001,1970,7,2,,0,,58,Dominican Republic,2,Central America & Caribbean,National,Santo Domingo,18456792,-69951164,1.0,0,,,1,1,1,0,,,0,1,0,1,Assassination,,,,,14,Private Citizens & Property,68.0,Named Civilian,,Julio Guzman,58.0,Dominican Republic,,,,,,,,,,,,,,,,,MANO-D,,,,,,,0.0,,,0,,,,,,,,,,,,,13,Unknown,,,,,,,,,,,,,,,,1.0,,,0.0,,,0,,,,,0.0,,,,,,,0.0,,,,,,,,,,,,,PGIS,0,0,0,0,
1,197000000002,1970,0,0,,0,,130,Mexico,1,North America,Federal,Mexico city,19371887,-99086624,1.0,0,,,1,1,1,0,,,0,1,0,6,Hostage Taking (Kidnapping),,,,,7,Government (Diplomatic),45.0,"Diplomatic Personnel (outside of embassy, cons...",Belgian Ambassador Daughter,"Nadine Chaval, daughter",21.0,Belgium,,,,,,,,,,,,,,,,,23rd of September Communist League,,,,,,,0.0,,,0,7.0,,,,,,,,,,,,13,Unknown,,,,,,,,,,,,,,,,0.0,,,0.0,,,0,,,,,1.0,1.0,0.0,,,,Mexico,1.0,800000.0,,,,,,,,,,,,PGIS,0,1,1,1,
2,197001000001,1970,1,0,,0,,160,Philippines,5,Southeast Asia,Tarlac,Unknown,15478598,120599741,4.0,0,,,1,1,1,0,,,0,1,0,1,Assassination,,,,,10,Journalists & Media,54.0,Radio Journalist/Staff/Facility,Voice of America,Employee,217.0,United States,,,,,,,,,,,,,,,,,Unknown,,,,,,,0.0,,,0,,,,,,,,,,,,,13,Unknown,,,,,,,,,,,,,,,,1.0,,,0.0,,,0,,,,,0.0,,,,,,,0.0,,,,,,,,,,,,,PGIS,-9,-9,1,1,
3,197001000002,1970,1,0,,0,,78,Greece,8,Western Europe,Attica,Athens,3799749,23762728,1.0,0,,,1,1,1,0,,,0,1,0,3,Bombing/Explosion,,,,,7,Government (Diplomatic),46.0,Embassy/Consulate,,U.S. Embassy,217.0,United States,,,,,,,,,,,,,,,,,Unknown,,,,,,,0.0,,,0,,,,,,,,,,,,,6,Explosives,16.0,Unknown Explosive Type,,,,,,,,,,,,,Explosive,,,,,,,1,,,,,0.0,,,,,,,0.0,,,,,,,,,,,,,PGIS,-9,-9,1,1,
4,197001000003,1970,1,0,,0,,101,Japan,4,East Asia,Fukouka,Fukouka,33580412,130396361,1.0,0,,,1,1,1,-9,,,0,1,0,7,Facility/Infrastructure Attack,,,,,7,Government (Diplomatic),46.0,Embassy/Consulate,,U.S. Consulate,217.0,United States,,,,,,,,,,,,,,,,,Unknown,,,,,,,0.0,,,0,,,,,,,,,,,,,8,Incendiary,,,,,,,,,,,,,,,Incendiary,,,,,,,1,,,,,0.0,,,,,,,0.0,,,,,,,,,,,,,PGIS,-9,-9,1,1,


In [166]:
df.tail()

Unnamed: 0,eventid,iyear,imonth,iday,approxdate,extended,resolution,country,country_txt,region,region_txt,provstate,city,latitude,longitude,specificity,vicinity,location,summary,crit1,crit2,crit3,doubtterr,alternative,alternative_txt,multiple,success,suicide,attacktype1,attacktype1_txt,attacktype2,attacktype2_txt,attacktype3,attacktype3_txt,targtype1,targtype1_txt,targsubtype1,targsubtype1_txt,corp1,target1,natlty1,natlty1_txt,targtype2,targtype2_txt,targsubtype2,targsubtype2_txt,corp2,target2,natlty2,natlty2_txt,targtype3,targtype3_txt,targsubtype3,targsubtype3_txt,corp3,target3,natlty3,natlty3_txt,gname,gsubname,gname2,gsubname2,gname3,gsubname3,motive,guncertain1,guncertain2,guncertain3,individual,nperps,nperpcap,claimed,claimmode,claimmode_txt,claim2,claimmode2,claimmode2_txt,claim3,claimmode3,claimmode3_txt,compclaim,weaptype1,weaptype1_txt,weapsubtype1,weapsubtype1_txt,weaptype2,weaptype2_txt,weapsubtype2,weapsubtype2_txt,weaptype3,weaptype3_txt,weapsubtype3,weapsubtype3_txt,weaptype4,weaptype4_txt,weapsubtype4,weapsubtype4_txt,weapdetail,nkill,nkillus,nkillter,nwound,nwoundus,nwoundte,property,propextent,propextent_txt,propvalue,propcomment,ishostkid,nhostkid,nhostkidus,nhours,ndays,divert,kidhijcountry,ransom,ransomamt,ransomamtus,ransompaid,ransompaidus,ransomnote,hostkidoutcome,hostkidoutcome_txt,nreleased,addnotes,scite1,scite2,scite3,dbsource,INT_LOG,INT_IDEO,INT_MISC,INT_ANY,related
201178,201912310028,2019,12,31,"December 31, 2019",0,,95,Iraq,10,Middle East & North Africa,Baghdad,Baghdad,33303567,44371771,1.0,0,The incident occurred along Palestine Street.,12/31/2019: An explosive device detonated outs...,1,1,1,0,,,0,1,0,3,Bombing/Explosion,,,,,14,Private Citizens & Property,77.0,Laborer (General)/Occupation Identified,Not Applicable,Residence of Tribal Leader,95.0,Iraq,,,,,,,,,,,,,,,,,Unknown,,,,,,,0.0,,,0,-99.0,0,0.0,,,,,,,,,,6,Explosives,16.0,Unknown Explosive Type,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,1,3.0,Minor (likely < $1 million),-99.0,Building damaged.,0.0,,,,,,,,,,,,,,,,,"""Iraq: ISHM 235: December 20, 2019 - January 2...",,,START Primary Collection,-9,-9,0,-9,
201179,201912310030,2019,12,31,,0,,195,Sudan,11,Sub-Saharan Africa,West Darfur,El Geneina,13440886,22441728,1.0,0,,12/31/2019: Assailants attacked the police hea...,1,1,1,0,,,0,1,0,9,Unknown,,,,,3,Police,22.0,"Police Building (headquarters, station, school)",Sudanese Police,West Darfur Police Headquarters,195.0,Sudan,,,,,,,,,,,,,,,,,Unknown,,,,,,,0.0,,,0,-99.0,0,0.0,,,,,,,,,,13,Unknown,,,,,,,,,,,,,,,,2.0,0.0,0.0,1.0,0.0,0.0,1,4.0,Unknown,-99.0,Police vehicle and weapons stolen.,0.0,,,,,,,,,,,,,,,,,"""World: Protection in Danger Monthly News Brie...",,,START Primary Collection,-9,-9,0,-9,
201180,201912310031,2019,12,31,"December 31, 2019",0,,195,Sudan,11,Sub-Saharan Africa,West Darfur,El Geneina,13440886,22441728,1.0,0,The incident occurred in El Jebel neighborhood.,12/31/2019: Assailants attacked the West Darfu...,1,1,1,0,,,0,1,0,2,Armed Assault,,,,,2,Government (General),21.0,Government Building/Facility/Office,Government of West Darfur,West Darfur Legislative Council Building,195.0,Sudan,3.0,Police,25.0,Police Security Forces/Officers,Sudanese Police,Officers,195.0,Sudan,,,,,,,,,Unknown,,,,,,,0.0,,,0,-99.0,0,0.0,,,,,,,,,,5,Firearms,5.0,Unknown Gun Type,,,,,,,,,,,,,,2.0,0.0,0.0,0.0,0.0,0.0,1,4.0,Unknown,-99.0,Items stolen from government building.,0.0,,,,,,,,,,,,,,,,,"""World: Protection in Danger Monthly News Brie...",,,START Primary Collection,-9,-9,0,-9,
201181,201912310032,2019,12,31,,0,,92,India,6,South Asia,Jammu and Kashmir,Bagiot Dora,3381279,7409773,1.0,0,,12/31/2019: A landmine detonated targeting a c...,1,1,1,0,,,0,1,0,3,Bombing/Explosion,,,,,14,Private Citizens & Property,67.0,Unnamed Civilian/Unspecified,Not Applicable,Civilian,92.0,India,,,,,,,,,,,,,,,,,Unknown,,,,,,,0.0,,,0,-99.0,0,0.0,,,,,,,,,,6,Explosives,8.0,Landmine,,,,,,,,,,,,,,0.0,0.0,0.0,1.0,0.0,0.0,0,,,,,0.0,,,,,,,,,,,,,,,,,"""Civilian injured in landmine blast in Indian-...",,,START Primary Collection,-9,-9,0,-9,
201182,201912310033,2019,12,31,,0,,44,China,4,East Asia,Hong Kong,Hong Kong,22340073,114138494,1.0,0,The incident occurred in Lai Chi Kok neighborh...,12/31/2019: Assailants threw petrol bombs at g...,1,1,1,0,,,0,1,0,7,Facility/Infrastructure Attack,,,,,2,Government (General),21.0,Government Building/Facility/Office,Government of Lai Chi Kok,Offices,89.0,Hong Kong,,,,,,,,,,,,,,,,,Unknown,,,,,,,0.0,,,0,2.0,0,0.0,,,,,,,,,,8,Incendiary,19.0,Molotov Cocktail/Petrol Bomb,,,,,,,,,,,,,Petrol bombs were used in the attack.,0.0,0.0,0.0,0.0,0.0,0.0,1,3.0,Minor (likely < $1 million),-99.0,Shutters and a floor were damaged.,0.0,,,,,,,,,,,,,,,,,"""Hong Kong restaurant firebombed by black-clad...","""Hong Kong restaurant firebombed by black-clad...",,START Primary Collection,-9,-9,1,1,


In [167]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 201183 entries, 0 to 201182
Columns: 135 entries, eventid to related
dtypes: float64(45), int64(24), object(66)
memory usage: 207.2+ MB


In [168]:
df.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
eventid,201183.0,200000000000.0,1340000000.0,197000000000.0,199000000000.0,201000000000.0,202000000000.0,202000000000.0
iyear,201183.0,2000.0,13.4,1970.0,1990.0,2010.0,2020.0,2020.0
imonth,201183.0,6.45,3.39,0.0,4.0,6.0,9.0,12.0
iday,201183.0,15.5,8.81,0.0,8.0,15.0,23.0,31.0
extended,201183.0,0.0499,0.218,0.0,0.0,0.0,0.0,1.0
country,201183.0,131.0,112.0,4.0,75.0,101.0,160.0,1000.0
region,201183.0,7.25,2.91,1.0,6.0,8.0,10.0,12.0
specificity,201182.0,1.47,0.989,1.0,1.0,1.0,1.0,5.0
vicinity,201183.0,0.0684,0.282,-9.0,0.0,0.0,0.0,1.0
crit1,201183.0,0.988,0.107,0.0,1.0,1.0,1.0,1.0


Count the number of ocurrences per country

In [169]:
df['country_txt'].value_counts()

Iraq                                26755
Afghanistan                         16313
Pakistan                            15208
India                               13477
Colombia                             8742
Philippines                          7976
Peru                                 6109
Yemen                                5526
United Kingdom                       5424
El Salvador                          5320
Nigeria                              5070
Somalia                              5037
Turkey                               4464
Thailand                             4162
Spain                                3255
Sri Lanka                            3040
United States                        3004
Algeria                              2749
Syria                                2737
France                               2726
West Bank and Gaza Strip             2608
Egypt                                2597
Lebanon                              2496
Libya                             

Counting the 5 countries:

1. Niger:


In [170]:
df.loc[df['country_txt'] == 'Niger', 'iyear'].value_counts(sort=True)

2019    49
2015    41
2016    25
2018    19
1992    16
2017    13
2008     9
1994     8
2007     7
1991     6
2014     5
2013     4
2009     4
2010     3
1997     3
2011     2
1999     2
1995     2
2012     1
1989     1
1996     1
1985     1
Name: iyear, dtype: int64

Data shows that recent years have been busy.

2. Chad:



In [171]:
df.loc[df['country_txt'] == 'Chad', 'iyear'].value_counts(sort=True)

2015    27
2019    14
2018    12
2006    11
2008     8
2007     7
2017     6
2016     5
2002     4
1992     4
1995     3
2009     3
1991     2
1997     1
1998     1
1999     1
1984     1
1996     1
1994     1
2010     1
2014     1
1986     1
1985     1
1974     1
Name: iyear, dtype: int64

Not so much as Niger, yet the top10 years are mostly in the past decade.

3. Mali


In [172]:
df.loc[df['country_txt'] == 'Mali', 'iyear'].value_counts(sort=True)

2018    164
2017    142
2019    137
2015    121
2016    100
2014     69
2013     58
2012     19
1994     13
1991     12
2008      9
2009      6
2007      5
2011      4
1992      3
1997      3
1990      1
2005      1
1995      1
2010      1
Name: iyear, dtype: int64

As predicted, Mali has experienced many attacks in the past ten years.

4. Burkina Faso:

In [173]:
df.loc[df['country_txt'] == 'Burkina Faso', 'iyear'].value_counts(sort=True)

2019    160
2018     72
2017     32
2016     10
2015      6
1991      2
1984      1
2013      1
Name: iyear, dtype: int64

Now that is quite a surprise. Notice how the number of attacks has dramatically increased in 2019 when compared to 2018, which had been a busy year already.

5. Mauritania

In [174]:
df.loc[df['country_txt'] == 'Mauritania', 'iyear'].value_counts(sort=True)

2009    5
1977    4
2011    3
2008    2
1996    1
2004    1
2005    1
2010    1
2018    1
Name: iyear, dtype: int64

All quiet in Mauritania.

In [175]:
df.loc[df['country_txt'] == 'Mali', 'iyear'].max()

2019

In [176]:
df.loc[df['country_txt'] == 'Mali', 'gname'].value_counts()

Unknown                                                                  328
Jamaat Nusrat al-Islam wal Muslimin (JNIM)                               115
Muslim extremists                                                         91
Ansar al-Dine (Mali)                                                      54
Al-Qaida in the Islamic Maghreb (AQIM)                                    53
Movement for Oneness and Jihad in West Africa (MUJAO)                     42
Tuareg extremists                                                         31
Azawad National Liberation Movement (MNLA)                                27
Islamic State in the Greater Sahara (ISGS)                                27
Macina Liberation Front (FLM)                                             14
Coordination of Azawad Movements (CMA)                                    13
Al-Mua'qi'oon Biddam Brigade (Those who Sign with Blood)                   9
Fulani extremists                                                          9

In [177]:
df[(df['country_txt'] == 'Mali') & (df['iyear'] > 2009)]

Unnamed: 0,eventid,iyear,imonth,iday,approxdate,extended,resolution,country,country_txt,region,region_txt,provstate,city,latitude,longitude,specificity,vicinity,location,summary,crit1,crit2,crit3,doubtterr,alternative,alternative_txt,multiple,success,suicide,attacktype1,attacktype1_txt,attacktype2,attacktype2_txt,attacktype3,attacktype3_txt,targtype1,targtype1_txt,targsubtype1,targsubtype1_txt,corp1,target1,natlty1,natlty1_txt,targtype2,targtype2_txt,targsubtype2,targsubtype2_txt,corp2,target2,natlty2,natlty2_txt,targtype3,targtype3_txt,targsubtype3,targsubtype3_txt,corp3,target3,natlty3,natlty3_txt,gname,gsubname,gname2,gsubname2,gname3,gsubname3,motive,guncertain1,guncertain2,guncertain3,individual,nperps,nperpcap,claimed,claimmode,claimmode_txt,claim2,claimmode2,claimmode2_txt,claim3,claimmode3,claimmode3_txt,compclaim,weaptype1,weaptype1_txt,weapsubtype1,weapsubtype1_txt,weaptype2,weaptype2_txt,weapsubtype2,weapsubtype2_txt,weaptype3,weaptype3_txt,weapsubtype3,weapsubtype3_txt,weaptype4,weaptype4_txt,weapsubtype4,weapsubtype4_txt,weapdetail,nkill,nkillus,nkillter,nwound,nwoundus,nwoundte,property,propextent,propextent_txt,propvalue,propcomment,ishostkid,nhostkid,nhostkidus,nhours,ndays,divert,kidhijcountry,ransom,ransomamt,ransomamtus,ransompaid,ransompaidus,ransomnote,hostkidoutcome,hostkidoutcome_txt,nreleased,addnotes,scite1,scite2,scite3,dbsource,INT_LOG,INT_IDEO,INT_MISC,INT_ANY,related
97676,201008100009,2010,8,10,,1,16/08/2010,123,Mali,11,Sub-Saharan Africa,Kidal,Kidal,18442701.0,1411332.0,1.0,0,The attack occurred in an unknown area of nort...,"08/10/2010: On Tuesday, in an unknown area of ...",1,1,1,0,,,0,1,0,6,Hostage Taking (Kidnapping),2.0,Armed Assault,,,4,Military,34.0,"Military Personnel (soldiers, troops, officers...",Malian National Guard,A Malian National Guard soldier was targeted i...,123.0,Mali,2.0,Government (General),18.0,"Government Personnel (excluding police, military)",Malian Customs,A Malian Customs guide was targeted in the att...,123.0,Mali,,,,,,,,,Al-Qaida in the Islamic Maghreb (AQIM),,,,,,The specific motive for the attack is unknown.,1.0,,,0,-99.0,0,0.0,,,,,,,,,,13,Unknown,,,,,,,,,,,,,,,Unknown weapons were used in the attack.,1.0,0.0,0.0,0.0,0.0,0.0,0,,,,,1.0,2.0,0.0,,6.0,,,0.0,,,,,,6.0,Combination,1.0,The guide was killed on 08/12/2010 while the s...,"Xinhua News Agency, ""Mali Says Two Nationals K...","Agence France Presse, ""Al-Qaeda Executes Mali ...","Agence France Presse, ""Al-Qaida Frees Malian S...",ISVG,1,1,0,1,
99755,201101050006,2011,1,5,,0,,123,Mali,11,Sub-Saharan Africa,Bamako,Bamako,12648483.0,-799414.0,1.0,0,The attack occurred at the French embassy in B...,"01/05/2011: On Wednesday evening, in Bamako ci...",1,1,1,0,,,0,1,0,7,Facility/Infrastructure Attack,,,,,7,Government (Diplomatic),46.0,Embassy/Consulate,French Government,The French embassy was targeted in the attack.,69.0,France,,,,,,,,,,,,,,,,,Al-Qaida in the Islamic Maghreb (AQIM),,,,,,The specific motive for the attack was to inti...,1.0,,,0,1.0,0,0.0,,,,,,,,,,5,Firearms,3.0,Handgun,8.0,Incendiary,19.0,Molotov Cocktail/Petrol Bomb,,,,,,,,,A Molotov cocktail and an automatic pistol wer...,0.0,0.0,0.0,2.0,0.0,0.0,-9,,,,It is unknown if the attack caused any propert...,0.0,,,,,,,,,,,,,,,,It is unknown if civilians were the target of ...,"Jane's Intelligence, ""IED Attack Targets Frenc...","Xinhua News Agency, ""Explosion Hits French Emb...","Agence France Presse, ""Mali: Bomber of French ...",ISVG,1,1,1,1,
102189,201106280011,2011,6,28,,0,,123,Mali,11,Sub-Saharan Africa,Unknown,Unknown,,,5.0,0,The attack occurred on the Mali-Mauritanian bo...,"06/28/2011: On Tuesday, in an unspecified loca...",1,1,1,0,,,0,1,0,3,Bombing/Explosion,,,,,20,Unknown,,,,The target was unknown.,123.0,Mali,,,,,,,,,,,,,,,,,Unknown,,,,,,The specific motive for the attack is unknown.,0.0,,,0,-99.0,0,0.0,,,,,,,,,,6,Explosives,17.0,Other Explosive Type,,,,,,,,,,,,,An improvised explosive device was used in the...,3.0,0.0,0.0,0.0,0.0,0.0,0,,,,,0.0,,,,,,,,,,,,,,,,,"Yahoo News, ""Three Killed in Mali Mine Explosi...","Jane’s Intelligence, “IED Attack Kills Three C...",,ISVG,-9,-9,0,-9,
104213,201111240026,2011,11,24,,0,09/12/2014,123,Mali,11,Sub-Saharan Africa,Mopti,Hombori,15286179.0,-1702154.0,1.0,0,The incident occurred at a hotel.,11/24/2011: Suspected members of Al-Qa ida in ...,1,1,1,0,,,0,1,0,6,Hostage Taking (Kidnapping),,,,,14,Private Citizens & Property,68.0,Named Civilian,Civilians,"Phillipe Verdon and Serge Lazarevic, two forei...",69.0,France,,,,,,,,,,,,,,,,,Al-Qaida in the Islamic Maghreb (AQIM),,,,,,AQIM reportedly killed Verdon in retaliation f...,0.0,,,0,-99.0,0,0.0,,,,,,,,,,5,Firearms,5.0,Unknown Gun Type,,,,,,,,,,,,,,1.0,0.0,0.0,0.0,0.0,0.0,0,,,,,1.0,2.0,0.0,,1111.0,,,-9.0,,,,,,6.0,Combination,1.0,,"Henry Samuel, ""France's last remaining hostage...","""Mali gunmen abduct two French nationals from ...","""French Mali hostage Philippe Verdon confirmed...",START Primary Collection,1,1,1,1,
104235,201111250020,2011,11,25,,1,,123,Mali,11,Sub-Saharan Africa,Timbuktu,Timbuktu,1677532.0,-3008265.0,1.0,0,Amanar restaurant in Timbuktu,11/25/2011: Members of Al-Qa ida in the Lands ...,1,1,1,0,,,0,1,0,6,Hostage Taking (Kidnapping),,,,,18,Tourists,97.0,Tourist,Multinational tourists,"Tourists in Timbuktu, including German, Dutch,...",999.0,Multinational,,,,,,,,,,,,,,,,,Al-Qaida in the Islamic Maghreb (AQIM),,,,,,"Specific motive is unknown; however, several t...",0.0,,,0,-99.0,0,1.0,6.0,Video,,,,,,,0.0,5,Firearms,5.0,Unknown Gun Type,,,,,,,,,,,,,,1.0,0.0,0.0,0.0,0.0,0.0,0,,,,,1.0,3.0,0.0,,2078.0,,,1.0,-99.0,0.0,4200000.0,0.0,3500000 Euros,2.0,Hostage(s) released by perpetrators,3.0,,"Sebastian Boe, ""Second Group of Foreign Nation...","""Dutch hostage Sjaak Rijke freed in Mali,"" BBC...","Spencer, Richard, ""New British 'front man' for...",START Primary Collection,1,1,1,1,
105198,201201170029,2012,1,17,,0,,123,Mali,11,Sub-Saharan Africa,Gao,Menaka,15914431.0,2397368.0,1.0,0,The incident occurred in the Menaka district.,"01/17/2012: Assailants attacked Menaka, Gao, M...",1,1,1,0,,,0,1,0,9,Unknown,,,,,14,Private Citizens & Property,75.0,Village/City/Town/Suburb,Menaka Town,Town,123.0,Mali,4.0,Military,34.0,"Military Personnel (soldiers, troops, officers...",Malian Army,Members,123.0,Mali,,,,,,,,,Azawad National Liberation Movement (MNLA),,,,,,The Azawad National Liberation Movement (MNLA)...,0.0,,,0,-99.0,0,1.0,7.0,"Posted to website, blog, etc.",,,,,,,,13,Unknown,,,,,,,,,,,,,,,,1.0,0.0,,,0.0,,-9,,,,,0.0,,,,,,,,,,,,,,,,,"""Mali Army Bombs Tuareg Rebel Positions, Arres...","""UPDATE 1-Mali,"" Reuters News, January 17, 2012.","""Tuareg - Mali - 2012,"" GlobalSecurity.org, Ju...",START Primary Collection,0,0,0,0,
105219,201201180030,2012,1,18,,0,,123,Mali,11,Sub-Saharan Africa,Kidal,Aguelhoc,19464848.0,856393.0,1.0,0,The incident occurred in the Tessalit district.,01/18/2012: Assailants opened fire on Malian A...,1,1,1,0,,,1,1,0,2,Armed Assault,,,,,4,Military,34.0,"Military Personnel (soldiers, troops, officers...",Malian Army,Soldiers,123.0,Mali,14.0,Private Citizens & Property,75.0,Village/City/Town/Suburb,Aguelhok Town,Town,123.0,Mali,,,,,,,,,Azawad National Liberation Movement (MNLA),,Al-Qaida in the Islamic Maghreb (AQIM),,Ansar al-Dine (Mali),,,0.0,0.0,0.0,0,-99.0,0,1.0,8.0,Personal claim,0.0,,,0.0,,,,5,Firearms,5.0,Unknown Gun Type,,,,,,,,,,,,,,36.0,0.0,35.0,,0.0,,-9,,,,,0.0,,,,,,,,,,,,,,,,,"""Mali military battles Taureg [sic] rebels in ...","""Mali Troops, Tuareg Rebels Battle in Second T...","""Al-Qa'idah said involved in northern Mali fig...",START Primary Collection,0,1,0,1,"201201180030, 201201180031"
105220,201201180031,2012,1,18,,0,,123,Mali,11,Sub-Saharan Africa,Kidal,Tessalit,20252065.0,981526.0,1.0,0,The incident occurred in the Tessalit district.,01/18/2012: Assailants opened fire on Malian A...,1,1,1,0,,,1,1,0,2,Armed Assault,,,,,4,Military,34.0,"Military Personnel (soldiers, troops, officers...",Malian Army,Soldiers,123.0,Mali,14.0,Private Citizens & Property,75.0,Village/City/Town/Suburb,Tessalit Town,Town,123.0,Mali,,,,,,,,,Azawad National Liberation Movement (MNLA),,,,,,,0.0,,,0,-99.0,0,1.0,8.0,Personal claim,,,,,,,,5,Firearms,5.0,Unknown Gun Type,,,,,,,,,,,,,,11.0,0.0,10.0,,0.0,,-9,,,,,0.0,,,,,,,,,,,,,,,,,"""Mali military battles Taureg [sic] rebels in ...","""Mali Troops Battle Tuareg as Rebels Launch Of...","""Tuareg - Mali - 2012,"" GlobalSecurity.org, Ju...",START Primary Collection,0,0,0,0,"201201180030, 201201180031"
105411,201201260038,2012,1,26,,0,,123,Mali,11,Sub-Saharan Africa,Gao,Anderamboukane,15421337.0,3021221.0,1.0,0,,01/26/2012: Assailants opened fire on Malian A...,1,1,1,0,,,1,1,0,2,Armed Assault,,,,,4,Military,34.0,"Military Personnel (soldiers, troops, officers...",Malian Army,Soldiers,123.0,Mali,14.0,Private Citizens & Property,75.0,Village/City/Town/Suburb,Anderamboukane Town,Town,123.0,Mali,,,,,,,,,Azawad National Liberation Movement (MNLA),,Al-Qaida in the Islamic Maghreb (AQIM),,,,,0.0,0.0,,0,-99.0,0,1.0,8.0,Personal claim,0.0,,,,,,,5,Firearms,5.0,Unknown Gun Type,,,,,,,,,,,,,,,0.0,,,0.0,,-9,,,,,0.0,,,,,,,,,,,,,,,,,"""Mali Blames Al Qa'ida as Rebels Launch New At...","""Tuareg rebels attack fifth town in Mali,"" Al ...","""Mali: Tuareg Rebels Fight With Malian Army in...",START Primary Collection,0,1,0,1,"201201260038, 201201260039"
105412,201201260039,2012,1,26,,0,,123,Mali,11,Sub-Saharan Africa,Timbuktu,Lere,1571704.0,-490009.0,1.0,0,,01/26/2012: Assailants opened fire on Malian A...,1,1,1,0,,,1,1,0,2,Armed Assault,,,,,4,Military,34.0,"Military Personnel (soldiers, troops, officers...",Malian Army,Soldiers,123.0,Mali,14.0,Private Citizens & Property,75.0,Village/City/Town/Suburb,Lere Town,Town,123.0,Mali,,,,,,,,,Azawad National Liberation Movement (MNLA),,Al-Qaida in the Islamic Maghreb (AQIM),,,,,0.0,0.0,,0,-99.0,0,1.0,8.0,Personal claim,0.0,,,,,,,5,Firearms,5.0,Unknown Gun Type,,,,,,,,,,,,,,,0.0,,,0.0,,-9,,,,,0.0,,,,,,,,,,,,,,,,,"""Mali Blames Al Qa'ida as Rebels Launch New At...","""Tuareg rebels attack fifth town in Mali,"" Al ...","""Tuareg - Mali - 2012,"" GlobalSecurity.org, Ju...",START Primary Collection,0,1,0,1,"201201260038, 201201260039"


In [178]:
df.loc[df['country_txt'] == 'Mali', 'gname'].max()

'Youth Movement for the Total Liberation of Azawad'

Let's use `groupby` to improve data selection

In [179]:
group_countries = df.groupby('country_txt', sort=False)
group_countries

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x000001D7D69EC040>

In [180]:
group_countries['iyear'].value_counts()

country_txt         iyear
Dominican Republic  1987     15
                    1997     12
                    1989     11
                    1992     11
                    1991      8
                             ..
South Sudan         2017     54
                    2014     40
                    2013     13
                    2019     13
                    2012      5
Name: iyear, Length: 3970, dtype: int64

In [181]:
year_results = df[df['country_txt'] == 'Mali'].groupby(['iyear'])

In [182]:
year_results['gname'].value_counts()

iyear  gname                                                                
1990   Islamic Legion                                                            1
1991   Tuareg extremists                                                         8
       Unknown                                                                   3
       Tuareg Guerrillas                                                         1
1992   Tuareg Guerrillas                                                         2
       Black Malian Group                                                        1
1994   Tuareg extremists                                                         8
       Islamic Arab Front of Azawad (FIAA)                                       2
       Association of Students and Pupils in Mali (AEEM)                         1
       Black African Vigilantes                                                  1
       Ganda Koi                                                                 1
1995   Tua

In [183]:
df[
    (df['country_txt'] == 'Mali') &
    (df['iyear'] == 2019)
].groupby(['gname','success'])['iyear'].count()

gname                                                    success
Al-Qaida in the Islamic Maghreb (AQIM)                   1           1
Ansar al-Islam (Burkina Faso)                            1           2
Dan Na Ambassagou                                        1           2
Dogon extremists                                         1           5
Dozo militia                                             1           5
Fulani extremists                                        1           3
Islamic State in the Greater Sahara (ISGS)               1           8
Jamaat Nusrat al-Islam wal Muslimin (JNIM)               0           1
                                                         1          24
Muslim extremists                                        0           1
                                                         1          15
Self-Defense Group of Imghad Tuaregs and Allies (GATIA)  1           1
Unknown                                                  0           4
            

### Changing Columns

In [374]:
df1 = df.copy()

Since there are just too many columns we don't need, it makes more sense to select the ones we want to work on.

In [375]:
df1 = df1[[
    'iyear', 'imonth','iday', 'country_txt', 
    'region_txt', 'provstate', 'city','latitude',
    'longitude', 'location', 'summary', 'success', 'attacktype1_txt',
    'targtype1_txt','gname','weaptype1_txt', 'nkill'
     ]]
df1.head()

Unnamed: 0,iyear,imonth,iday,country_txt,region_txt,provstate,city,latitude,longitude,location,summary,success,attacktype1_txt,targtype1_txt,gname,weaptype1_txt,nkill
0,1970,7,2,Dominican Republic,Central America & Caribbean,National,Santo Domingo,18456792,-69951164,,,1,Assassination,Private Citizens & Property,MANO-D,Unknown,1.0
1,1970,0,0,Mexico,North America,Federal,Mexico city,19371887,-99086624,,,1,Hostage Taking (Kidnapping),Government (Diplomatic),23rd of September Communist League,Unknown,0.0
2,1970,1,0,Philippines,Southeast Asia,Tarlac,Unknown,15478598,120599741,,,1,Assassination,Journalists & Media,Unknown,Unknown,1.0
3,1970,1,0,Greece,Western Europe,Attica,Athens,3799749,23762728,,,1,Bombing/Explosion,Government (Diplomatic),Unknown,Explosives,
4,1970,1,0,Japan,East Asia,Fukouka,Fukouka,33580412,130396361,,,1,Facility/Infrastructure Attack,Government (Diplomatic),Unknown,Incendiary,


In [376]:
df1['imonth'].describe()

count    201183.00
mean          6.45
std           3.39
min           0.00
25%           4.00
50%           6.00
75%           9.00
max          12.00
Name: imonth, dtype: float64

In [377]:
df1['imonth'] = df1['imonth'].replace(0,df['imonth'].median())

In [378]:
df1['imonth'].value_counts()

5     18898
7     18038
8     17293
10    17167
6     17090
3     16906
4     16836
1     16654
11    16257
9     15681
2     15425
12    14938
Name: imonth, dtype: int64

In [379]:
df1['imonth'].describe()

count    201183.00
mean          6.45
std           3.39
min           1.00
25%           4.00
50%           6.00
75%           9.00
max          12.00
Name: imonth, dtype: float64

In [380]:
df1['iday'].describe()

count    201183.00
mean         15.52
std           8.81
min           0.00
25%           8.00
50%          15.00
75%          23.00
max          31.00
Name: iday, dtype: float64

In [381]:
df1['iday'] = df1['iday'].replace(0,df['iday'].median())

In [382]:
df1['iday'].describe()

count    201183.00
mean         15.59
std           8.75
min           1.00
25%           8.00
50%          15.00
75%          23.00
max          31.00
Name: iday, dtype: float64

In [383]:
df1['iyear'].describe()

count    201183.00
mean       2004.16
std          13.43
min        1970.00
25%        1992.00
50%        2011.00
75%        2015.00
max        2019.00
Name: iyear, dtype: float64

In [384]:
df1['date'] = pd.to_datetime(
    df1.iyear.astype(str) + 
    '-' + df1.imonth.astype(str) +
    '-' + df1.iday.astype(str), 
    format='%Y-%m-%d'
    )

In [385]:
df1.tail()

Unnamed: 0,iyear,imonth,iday,country_txt,region_txt,provstate,city,latitude,longitude,location,summary,success,attacktype1_txt,targtype1_txt,gname,weaptype1_txt,nkill,date
201178,2019,12,31,Iraq,Middle East & North Africa,Baghdad,Baghdad,33303567,44371771,The incident occurred along Palestine Street.,12/31/2019: An explosive device detonated outs...,1,Bombing/Explosion,Private Citizens & Property,Unknown,Explosives,0.0,2019-12-31
201179,2019,12,31,Sudan,Sub-Saharan Africa,West Darfur,El Geneina,13440886,22441728,,12/31/2019: Assailants attacked the police hea...,1,Unknown,Police,Unknown,Unknown,2.0,2019-12-31
201180,2019,12,31,Sudan,Sub-Saharan Africa,West Darfur,El Geneina,13440886,22441728,The incident occurred in El Jebel neighborhood.,12/31/2019: Assailants attacked the West Darfu...,1,Armed Assault,Government (General),Unknown,Firearms,2.0,2019-12-31
201181,2019,12,31,India,South Asia,Jammu and Kashmir,Bagiot Dora,3381279,7409773,,12/31/2019: A landmine detonated targeting a c...,1,Bombing/Explosion,Private Citizens & Property,Unknown,Explosives,0.0,2019-12-31
201182,2019,12,31,China,East Asia,Hong Kong,Hong Kong,22340073,114138494,The incident occurred in Lai Chi Kok neighborh...,12/31/2019: Assailants threw petrol bombs at g...,1,Facility/Infrastructure Attack,Government (General),Unknown,Incendiary,0.0,2019-12-31


In [386]:
df1.columns.tolist()

['iyear',
 'imonth',
 'iday',
 'country_txt',
 'region_txt',
 'provstate',
 'city',
 'latitude',
 'longitude',
 'location',
 'summary',
 'success',
 'attacktype1_txt',
 'targtype1_txt',
 'gname',
 'weaptype1_txt',
 'nkill',
 'date']

In [387]:
df1 = df1[[
    'iyear', 'imonth', 'iday', 'date', 'country_txt', 'region_txt',
    'provstate', 'city', 'latitude', 'longitude', 'location',
    'summary', 'success', 'attacktype1_txt', 'targtype1_txt',
    'gname', 'weaptype1_txt', 'nkill',
    ]]

In [388]:
df1.head(5)

Unnamed: 0,iyear,imonth,iday,date,country_txt,region_txt,provstate,city,latitude,longitude,location,summary,success,attacktype1_txt,targtype1_txt,gname,weaptype1_txt,nkill
0,1970,7,2,1970-07-02,Dominican Republic,Central America & Caribbean,National,Santo Domingo,18456792,-69951164,,,1,Assassination,Private Citizens & Property,MANO-D,Unknown,1.0
1,1970,6,15,1970-06-15,Mexico,North America,Federal,Mexico city,19371887,-99086624,,,1,Hostage Taking (Kidnapping),Government (Diplomatic),23rd of September Communist League,Unknown,0.0
2,1970,1,15,1970-01-15,Philippines,Southeast Asia,Tarlac,Unknown,15478598,120599741,,,1,Assassination,Journalists & Media,Unknown,Unknown,1.0
3,1970,1,15,1970-01-15,Greece,Western Europe,Attica,Athens,3799749,23762728,,,1,Bombing/Explosion,Government (Diplomatic),Unknown,Explosives,
4,1970,1,15,1970-01-15,Japan,East Asia,Fukouka,Fukouka,33580412,130396361,,,1,Facility/Infrastructure Attack,Government (Diplomatic),Unknown,Incendiary,


In [389]:
df1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 201183 entries, 0 to 201182
Data columns (total 18 columns):
 #   Column           Non-Null Count   Dtype         
---  ------           --------------   -----         
 0   iyear            201183 non-null  int64         
 1   imonth           201183 non-null  int64         
 2   iday             201183 non-null  int64         
 3   date             201183 non-null  datetime64[ns]
 4   country_txt      201183 non-null  object        
 5   region_txt       201183 non-null  object        
 6   provstate        201183 non-null  object        
 7   city             200757 non-null  object        
 8   latitude         196556 non-null  object        
 9   longitude        196555 non-null  object        
 10  location         63769 non-null   object        
 11  summary          135062 non-null  object        
 12  success          201183 non-null  int64         
 13  attacktype1_txt  201183 non-null  object        
 14  targtype1_txt    201

In [390]:
df1.memory_usage(deep=True)

Index                   128
iyear               1609464
imonth              1609464
iday                1609464
date                1609464
country_txt        13021220
region_txt         14951179
provstate          13290267
city               13179419
latitude           13065779
longitude          13121720
location           11559578
summary            50908079
success             1609464
attacktype1_txt    14813620
targtype1_txt      14679323
gname              15078345
weaptype1_txt      13284638
nkill               1609464
dtype: int64

In [391]:
cols_int = [
        'iyear', 
        'imonth',
        'iday',
        'success',
        ]
df1[cols_int] = df1[cols_int].astype('int16')

In [392]:
df1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 201183 entries, 0 to 201182
Data columns (total 18 columns):
 #   Column           Non-Null Count   Dtype         
---  ------           --------------   -----         
 0   iyear            201183 non-null  int16         
 1   imonth           201183 non-null  int16         
 2   iday             201183 non-null  int16         
 3   date             201183 non-null  datetime64[ns]
 4   country_txt      201183 non-null  object        
 5   region_txt       201183 non-null  object        
 6   provstate        201183 non-null  object        
 7   city             200757 non-null  object        
 8   latitude         196556 non-null  object        
 9   longitude        196555 non-null  object        
 10  location         63769 non-null   object        
 11  summary          135062 non-null  object        
 12  success          201183 non-null  int16         
 13  attacktype1_txt  201183 non-null  object        
 14  targtype1_txt    201

In [393]:
df1.select_dtypes(include=['object']).nunique()

country_txt           205
region_txt             12
provstate            2607
city                42357
latitude            59533
longitude           59358
location            49150
summary            131656
attacktype1_txt         9
targtype1_txt          22
gname                3671
weaptype1_txt          12
dtype: int64

In [394]:
cols_cat = [
    'country_txt',
    'region_txt',
    'attacktype1_txt',
    'targtype1_txt',
    'gname',
    'weaptype1_txt',
]

df1[cols_cat] = df1[cols_cat].astype('category')

In [395]:
df1['latitude'] = df1['latitude'].str.replace(',','.')
df1['longitude'] = df1['longitude'].str.replace(',','.')

In [421]:
df1[['latitude', 'longitude']] = df1[['latitude', 'longitude']].astype('float64')

In [422]:
df1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 201183 entries, 0 to 201182
Data columns (total 18 columns):
 #   Column           Non-Null Count   Dtype         
---  ------           --------------   -----         
 0   iyear            201183 non-null  int16         
 1   imonth           201183 non-null  int16         
 2   iday             201183 non-null  int16         
 3   date             201183 non-null  datetime64[ns]
 4   country_txt      201183 non-null  category      
 5   region_txt       201183 non-null  category      
 6   provstate        201183 non-null  object        
 7   city             200757 non-null  object        
 8   latitude         196556 non-null  float64       
 9   longitude        196555 non-null  float64       
 10  location         63769 non-null   object        
 11  summary          135062 non-null  object        
 12  success          201183 non-null  int16         
 13  attacktype1_txt  201183 non-null  category      
 14  targtype1_txt    201

In [423]:
df1.head()

Unnamed: 0,iyear,imonth,iday,date,country_txt,region_txt,provstate,city,latitude,longitude,location,summary,success,attacktype1_txt,targtype1_txt,gname,weaptype1_txt,nkill
0,1970,7,2,1970-07-02,Dominican Republic,Central America & Caribbean,National,Santo Domingo,18.456792,-69.951164,,,1,Assassination,Private Citizens & Property,MANO-D,Unknown,1.0
1,1970,6,15,1970-06-15,Mexico,North America,Federal,Mexico city,19.371887,-99.086624,,,1,Hostage Taking (Kidnapping),Government (Diplomatic),23rd of September Communist League,Unknown,0.0
2,1970,1,15,1970-01-15,Philippines,Southeast Asia,Tarlac,Unknown,15.478598,120.599741,,,1,Assassination,Journalists & Media,Unknown,Unknown,1.0
3,1970,1,15,1970-01-15,Greece,Western Europe,Attica,Athens,37.99749,23.762728,,,1,Bombing/Explosion,Government (Diplomatic),Unknown,Explosives,
4,1970,1,15,1970-01-15,Japan,East Asia,Fukouka,Fukouka,33.580412,130.396361,,,1,Facility/Infrastructure Attack,Government (Diplomatic),Unknown,Incendiary,


In [424]:
df1.shape

(201183, 18)

In [425]:
df_afr = df1.loc[df1['country_txt'].isin([
        'Niger', 'Mali', 'Nigeria', 'Chad',
        'Libya', 'Burkina Faso', 'Mauritania',
        'Benin', 'Cameroon', 'Togo', 'Guinea', 'Senegal',
        'Algeria', 'Guinea-Bissau', 'Ghana',
        ])]
                

In [426]:
df_afr = df_afr[(df_afr['iyear'] > 2010)]

In [427]:
df_afr.sample(random_state=80)

Unnamed: 0,iyear,imonth,iday,date,country_txt,region_txt,provstate,city,latitude,longitude,location,summary,success,attacktype1_txt,targtype1_txt,gname,weaptype1_txt,nkill
155658,2015,11,13,2015-11-13,Algeria,Middle East & North Africa,Tebessa,Umm Kmakam,34.480301,7.522332,,11/13/2015: An explosive device detonated in U...,1,Bombing/Explosion,Private Citizens & Property,Unknown,Explosives,0.0


In [446]:
cols = df_afr.columns.tolist()

In [450]:
df_afr.columns = [
                 'year', 'month', 'day', 'date', 'country', 
                 'region', 'province', 'city', 'latitude',
                 'longitude', 'location', 'summary', 'success',
                 'attack_type', 'target_type','group_name',
                 'weapon_type','casualties']

In [451]:
df_afr.head()

Unnamed: 0,year,month,day,date,country,region,province,city,latitude,longitude,location,summary,success,attack_type,target_type,group_name,weapon_type,casualties
99713,2011,1,1,2011-01-01,Nigeria,Sub-Saharan Africa,Borno,Maiduguri,11.840929,13.141459,"At the Victory Christ Church in Maiduguri, Bor...","01/01/2011: On Saturday night, in Maiduguri, B...",1,Facility/Infrastructure Attack,Religious Figures/Institutions,Boko Haram,Incendiary,0.0
99726,2011,1,3,2011-01-03,Nigeria,Sub-Saharan Africa,Borno,Maiduguri,11.840929,13.141459,The attack occurred in the Gwange area of Maid...,"01/03/2011: On Monday morning around 0945, in ...",1,Armed Assault,Police,Boko Haram,Firearms,1.0
99727,2011,1,3,2011-01-03,Nigeria,Sub-Saharan Africa,Delta,Ughelli,5.5,5.983333,At the the Independent National Electoral Comm...,"01/03/2011: On Monday night around 0100, in Ug...",1,Bombing/Explosion,Government (General),Delta Democratic Militia,Explosives,0.0
99755,2011,1,5,2011-01-05,Mali,Sub-Saharan Africa,Bamako,Bamako,12.648483,-7.99414,The attack occurred at the French embassy in B...,"01/05/2011: On Wednesday evening, in Bamako ci...",1,Facility/Infrastructure Attack,Government (Diplomatic),Al-Qaida in the Islamic Maghreb (AQIM),Firearms,0.0
99777,2011,1,7,2011-01-07,Nigeria,Sub-Saharan Africa,Bayelsa,Opokuma,5.076111,6.263889,At a house in the governmental district of Opo...,"01/07/2011: On Friday night, in the government...",1,Armed Assault,Private Citizens & Property,Unknown,Firearms,2.0


In [452]:
df_afr.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 9259 entries, 99713 to 201176
Data columns (total 18 columns):
 #   Column       Non-Null Count  Dtype         
---  ------       --------------  -----         
 0   year         9259 non-null   int16         
 1   month        9259 non-null   int16         
 2   day          9259 non-null   int16         
 3   date         9259 non-null   datetime64[ns]
 4   country      9259 non-null   category      
 5   region       9259 non-null   category      
 6   province     9259 non-null   object        
 7   city         9259 non-null   object        
 8   latitude     9259 non-null   float64       
 9   longitude    9259 non-null   float64       
 10  location     9259 non-null   object        
 11  summary      9259 non-null   object        
 12  success      9259 non-null   int16         
 13  attack_type  9259 non-null   category      
 14  target_type  9259 non-null   category      
 15  group_name   9259 non-null   category      
 16  

In [453]:
df_afr.isna().sum()

year           0
month          0
day            0
date           0
country        0
region         0
province       0
city           0
latitude       0
longitude      0
location       0
summary        0
success        0
attack_type    0
target_type    0
group_name     0
weapon_type    0
casualties     0
dtype: int64

In [454]:
df_nan = df_afr[df_afr.isnull().any(axis=1)]

In [455]:
df_nan.loc[df_nan['latitude'].isnull()]

Unnamed: 0,year,month,day,date,country,region,province,city,latitude,longitude,location,summary,success,attack_type,target_type,group_name,weapon_type,casualties


In [456]:
df_afr.isnull().sum()

year           0
month          0
day            0
date           0
country        0
region         0
province       0
city           0
latitude       0
longitude      0
location       0
summary        0
success        0
attack_type    0
target_type    0
group_name     0
weapon_type    0
casualties     0
dtype: int64

In [457]:
df_afr['location'].fillna('Unknown',inplace=True)

In [458]:
df_afr['city'].fillna('Unknown',inplace=True)

In [460]:
df_afr['casualties'].fillna('Unknown', inplace=True)

In [436]:
df_afr.isnull().sum()

iyear               0
imonth              0
iday                0
date                0
country_txt         0
region_txt          0
provstate           0
city                0
latitude           92
longitude          92
location            0
summary             0
success             0
attacktype1_txt     0
targtype1_txt       0
gname               0
weaptype1_txt       0
nkill               0
dtype: int64

In [461]:
df_afr.dropna(inplace=True)

In [438]:
df_afr.isnull().sum()

iyear              0
imonth             0
iday               0
date               0
country_txt        0
region_txt         0
provstate          0
city               0
latitude           0
longitude          0
location           0
summary            0
success            0
attacktype1_txt    0
targtype1_txt      0
gname              0
weaptype1_txt      0
nkill              0
dtype: int64

In [462]:
df_afr.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 9259 entries, 99713 to 201176
Data columns (total 18 columns):
 #   Column       Non-Null Count  Dtype         
---  ------       --------------  -----         
 0   year         9259 non-null   int16         
 1   month        9259 non-null   int16         
 2   day          9259 non-null   int16         
 3   date         9259 non-null   datetime64[ns]
 4   country      9259 non-null   category      
 5   region       9259 non-null   category      
 6   province     9259 non-null   object        
 7   city         9259 non-null   object        
 8   latitude     9259 non-null   float64       
 9   longitude    9259 non-null   float64       
 10  location     9259 non-null   object        
 11  summary      9259 non-null   object        
 12  success      9259 non-null   int16         
 13  attack_type  9259 non-null   category      
 14  target_type  9259 non-null   category      
 15  group_name   9259 non-null   category      
 16  

In [463]:
df_afr.sample(random_state=55)

Unnamed: 0,year,month,day,date,country,region,province,city,latitude,longitude,location,summary,success,attack_type,target_type,group_name,weapon_type,casualties
135864,2014,7,30,2014-07-30,Nigeria,Sub-Saharan Africa,Yobe,Buni Yadi,11.24707,12.01246,Unknown,"07/30/2014: Assailants stormed Buni Yadi town,...",1,Hostage Taking (Kidnapping),Private Citizens & Property,Boko Haram,Firearms,2.0


In [464]:
df_afr.to_csv('sahelterrorism.csv')

In [441]:
from folium import plugins

In [442]:
lat_sahel = 19.0643
long_sahel = 13.5437
sahel_map = folium.Map(location=[lat_sahel, long_sahel], zoom_start=5, tiles='Stamen Terrain')

In [443]:
incidents = plugins.MarkerCluster().add_to(sahel_map)


In [None]:
for lat, lng, in zip(df_afr['latitude'], df_afr['longitude']):
    folium.features.CircleMarker(
        [lat, lng],
        radius=2, # define how big you want the circle markers to be
        color='red',
        fill=True,
        fill_color='red',
        fill_opacity=0.8
    ).add_to(incidents)
folium.Marker([14.579932593201292, -0.004067063632717075], popup='Liptako-Gourma').add_to(sahel_map)
sahel_map.add_child(folium.ClickForMarker(popup='Waypoint'))
# show map
sahel_map

In [None]:
df_sahel = df_new.loc[['Niger', 'Mali', 'Chad', 'Burkina Faso','Mauritania']]

In [None]:
df_sahel.head()

Unnamed: 0_level_0,iyear,imonth,latitude,longitude,gname
country_txt,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Niger,1985,5,15.898,5.803,People's Liberation Front of Niger
Niger,1989,9,16.864946,11.953771,Hezbollah
Niger,1991,2,17.905421,6.04542,Tuareg extremists
Niger,1991,2,13.516667,2.116667,Unknown
Niger,1991,6,14.916667,0.85,Tuareg Guerrillas


In [None]:
%matplotlib inline 

import matplotlib as mpl
import matplotlib.pyplot as plt

In [None]:
print ('Matplotlib version: ', mpl.__version__)

Matplotlib version:  3.4.3


In [None]:
print(plt.style.available)
mpl.style.use(['dark_background'])

['Solarize_Light2', '_classic_test_patch', 'bmh', 'classic', 'dark_background', 'fast', 'fivethirtyeight', 'ggplot', 'grayscale', 'seaborn', 'seaborn-bright', 'seaborn-colorblind', 'seaborn-dark', 'seaborn-dark-palette', 'seaborn-darkgrid', 'seaborn-deep', 'seaborn-muted', 'seaborn-notebook', 'seaborn-paper', 'seaborn-pastel', 'seaborn-poster', 'seaborn-talk', 'seaborn-ticks', 'seaborn-white', 'seaborn-whitegrid', 'tableau-colorblind10']


In [None]:
years = list(map(str, range(2009, 2020)))
years

['2009',
 '2010',
 '2011',
 '2012',
 '2013',
 '2014',
 '2015',
 '2016',
 '2017',
 '2018',
 '2019']

In [None]:
df_sahel = df_sahel.reset_index(['country_txt'])

In [None]:
df_sahel.head()

Unnamed: 0,country_txt,iyear
0,Niger,1985
1,Niger,1989
2,Niger,1991
3,Niger,1991
4,Niger,1991


In [None]:
df_sahel.value_counts(['country_txt'])

country_txt 
Mali            869
Burkina Faso    284
Niger           222
Chad            117
Mauritania       19
dtype: int64

In [None]:
df_sahel.set_index(['country_txt'], append=True)

Unnamed: 0_level_0,Unnamed: 1_level_0,iyear
Unnamed: 0_level_1,country_txt,Unnamed: 2_level_1
0,Niger,1985
1,Niger,1989
2,Niger,1991
3,Niger,1991
4,Niger,1991
...,...,...
1506,Mauritania,2010
1507,Mauritania,2011
1508,Mauritania,2011
1509,Mauritania,2011


In [None]:
(df_sahel['country_txt'] == 'Chad').sum()

117

In [None]:
df_sahel_c = df_sahel.value_counts(df_sahel['country_txt'].values, sort=False)

In [None]:
df_sahel_y = df_sahel.value_counts(df_sahel['iyear'].values, sort=False)

In [None]:
df_sahel_y

1974      1
1977      4
1984      2
1985      2
1986      1
1989      1
1990      1
1991     22
1992     23
1994     22
1995      6
1996      3
1997      7
1998      1
1999      3
2002      4
2004      1
2005      2
2006     11
2007     19
2008     28
2009     18
2010      6
2011      9
2012     20
2013     63
2014     75
2015    195
2016    140
2017    193
2018    268
2019    360
dtype: int64

In [None]:
df_sahel_a = pd.Data_sahel_y[(df_sahel_y['iyear'] > 2010)]
df_cases = pd.Datadf_afr[(df_afr['iyear'] > 2010)]

KeyError: 'iyear'

In [None]:
df_sahel['total'] = df_sahel['country_txt'] == 'Niger'

Unnamed: 0,country_txt,iyear,total
0,Niger,1985,1985
1,Niger,1989,1989
2,Niger,1991,1991
3,Niger,1991,1991
4,Niger,1991,1991
...,...,...,...
1506,Mauritania,2010,2010
1507,Mauritania,2011,2011
1508,Mauritania,2011,2011
1509,Mauritania,2011,2011
