In [28]:
import pandas as pd
import numpy as np
import plotly.express as px
import matplotlib.pyplot as plt
import seaborn as sns


In [29]:
path = '../datasets/datasets_processed/datasets_for_analysis'
df = pd.read_csv(f'{path}/df_1970.csv', index_col=0)
df.sort_values('iyear')
df.info()


<class 'pandas.core.frame.DataFrame'>
Index: 7252 entries, 0 to 7251
Data columns (total 34 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   eventid           7252 non-null   int64  
 1   iyear             7252 non-null   int64  
 2   imonth            7252 non-null   int64  
 3   iday              7252 non-null   int64  
 4   extended          7252 non-null   int64  
 5   country_txt       7252 non-null   object 
 6   region_txt        7252 non-null   object 
 7   provstate         7226 non-null   object 
 8   city              7252 non-null   object 
 9   latitude          7073 non-null   float64
 10  longitude         7073 non-null   float64
 11  crit1             7252 non-null   int64  
 12  crit2             7252 non-null   int64  
 13  crit3             7252 non-null   int64  
 14  doubtterr         7252 non-null   float64
 15  multiple          7252 non-null   float64
 16  success           7252 non-null   int64  
 17  

In [30]:
df

Unnamed: 0,eventid,iyear,imonth,iday,extended,country_txt,region_txt,provstate,city,latitude,...,gname,guncertain1,individual,nkill,nkillus,nkillter,nwound,nwoundus,nwoundte,ishostkid
0,197000000001,1970,7,2,0,Dominican Republic,Central America & Caribbean,,Santo Domingo,18.456792,...,MANO-D,0.0,0,1.0,,,0.0,,,0.0
1,197000000002,1970,0,0,0,Mexico,North America,Federal,Mexico city,19.371887,...,23rd of September Communist League,0.0,0,0.0,,,0.0,,,1.0
2,197001000001,1970,1,0,0,Philippines,Southeast Asia,Tarlac,Unknown,15.478598,...,Unknown,0.0,0,1.0,,,0.0,,,0.0
3,197001000002,1970,1,0,0,Greece,Western Europe,Attica,Athens,37.997490,...,Unknown,0.0,0,,,,,,,0.0
4,197001000003,1970,1,0,0,Japan,East Asia,Fukouka,Fukouka,33.580412,...,Unknown,0.0,0,,,,,,,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7247,197812290006,1978,12,29,0,Colombia,South America,Antioquia,Urrao,6.316090,...,Revolutionary Armed Forces of Colombia (FARC),0.0,0,1.0,,1.0,,,,0.0
7248,197812290007,1978,12,29,0,El Salvador,Central America & Caribbean,La Paz,Zacatecoluca,13.500000,...,Armed Forces of National Resistance (FARN),0.0,0,0.0,,,0.0,,,1.0
7249,197812290008,1978,12,29,0,El Salvador,Central America & Caribbean,San Vicente,San Vicente,13.641210,...,Armed Forces of National Resistance (FARN),0.0,0,0.0,,,0.0,,,1.0
7250,197812300001,1978,12,30,0,Namibia,Sub-Saharan Africa,Erongo,Swakopmund,-22.684698,...,South-West Africa People's Organization (SWAPO),0.0,0,0.0,,,60.0,,,0.0


In [31]:
fig = px.scatter_mapbox(df,
                        lat="latitude",
                        lon="longitude",
                        hover_name="city",
                        hover_data=["country_txt", "attacktype1_txt", "gname", "nkill"],
                        color="attacktype1_txt",
                        zoom=1,
                        height=600,
                        title="1970 Events - Global map")

# Configurar el estilo del mapa
fig.update_layout(mapbox_style="open-street-map")
fig.update_layout(margin={"r":0,"t":40,"l":0,"b":0})
fig.update_layout(legend_title_text='Attack Type')

# Mostrar el mapa
fig.show()

### How has the number of terrorist activities changed over the years? Are there certain regions where this trend is different from the global averages?

In [32]:
country_stats = df.groupby(['country_txt', 'iyear']).agg(
    num_attacks=('eventid', 'count'),
    latitude=('latitude', 'mean'),
    longitude=('longitude', 'mean')
).reset_index()

country_stats['iyear'] = country_stats['iyear'].astype(int)
country_stats = country_stats.sort_values('iyear')

fig = px.scatter_mapbox(country_stats,
                        lat="latitude",
                        lon="longitude",
                        size="num_attacks",
                        color="num_attacks",
                        hover_name="country_txt",
                        hover_data={"num_attacks": True, "iyear": True, "latitude": False, "longitude": False},
                        zoom=1,
                        height=800,
                        title="Number of attacks per country (70's)",
                        color_continuous_scale=px.colors.sequential.Plasma,
                        size_max=40,
                        animation_frame='iyear',
                        animation_group='country_txt')

fig.update_layout(mapbox_style="carto-positron")
fig.update_layout(margin={"r":0,"t":40,"l":0,"b":0})
fig.update_layout(coloraxis_colorbar=dict(title="N. attemps"))
fig.update_traces(marker=dict(opacity=0.7))

fig.layout.updatemenus[0].buttons[0].args[1]["frame"]["duration"] = 800
fig.layout.updatemenus[0].buttons[0].args[1]["transition"]["duration"] = 600
fig.layout.sliders[0].currentvalue.prefix = "Year: "

fig.show()

In [33]:
import os

base_path = "../datasets/datasets_processed/datasets_for_analysis"

dfs = []

for root, dirs, files in os.walk(base_path):
    for file in files:
        if file.endswith('.csv'):
            file_path = os.path.join(root, file)
            df = pd.read_csv(file_path)
            dfs.append(df)

df_total = pd.concat(dfs, ignore_index=True)

In [None]:
country_stats = df_total.groupby(['country_txt', 'iyear']).agg(
    num_attacks=('eventid', 'count'),
    latitude=('latitude', 'mean'),
    longitude=('longitude', 'mean')
).reset_index()

country_stats['iyear'] = country_stats['iyear'].astype(int)
country_stats = country_stats.sort_values('iyear')

fig = px.scatter_mapbox(country_stats,
                        lat="latitude",
                        lon="longitude",
                        size="num_attacks",
                        color="num_attacks",
                        hover_name="country_txt",
                        hover_data={"num_attacks": True, "iyear": True, "latitude": False, "longitude": False},
                        zoom=1,
                        height=1000,
                        title="Number of attacks per country over the time",
                        color_continuous_scale=px.colors.sequential.Plasma,
                        size_max=40,
                        animation_frame='iyear',
                        animation_group='country_txt')

fig.update_layout(mapbox_style="carto-positron")
fig.update_layout(margin={"r":0,"t":40,"l":0,"b":0})
fig.update_layout(coloraxis_colorbar=dict(title="N. attemps"))
fig.update_traces(marker=dict(opacity=0.7))

fig.layout.updatemenus[0].buttons[0].args[1]["frame"]["duration"] = 600
fig.layout.updatemenus[0].buttons[0].args[1]["transition"]["duration"] = 400
fig.layout.sliders[0].currentvalue.prefix = "Year: "

fig.show()

### Is the number of incidents and the number of casualties correlated? Can you spot any irregularities or outliers?

In [36]:
df = pd.read_csv('../datasets/datasets_processed/datasets_for_analysis/df_1970.csv', index_col=0)
df.head()

Unnamed: 0,eventid,iyear,imonth,iday,extended,country_txt,region_txt,provstate,city,latitude,...,gname,guncertain1,individual,nkill,nkillus,nkillter,nwound,nwoundus,nwoundte,ishostkid
0,197000000001,1970,7,2,0,Dominican Republic,Central America & Caribbean,,Santo Domingo,18.456792,...,MANO-D,0.0,0,1.0,,,0.0,,,0.0
1,197000000002,1970,0,0,0,Mexico,North America,Federal,Mexico city,19.371887,...,23rd of September Communist League,0.0,0,0.0,,,0.0,,,1.0
2,197001000001,1970,1,0,0,Philippines,Southeast Asia,Tarlac,Unknown,15.478598,...,Unknown,0.0,0,1.0,,,0.0,,,0.0
3,197001000002,1970,1,0,0,Greece,Western Europe,Attica,Athens,37.99749,...,Unknown,0.0,0,,,,,,,0.0
4,197001000003,1970,1,0,0,Japan,East Asia,Fukouka,Fukouka,33.580412,...,Unknown,0.0,0,,,,,,,0.0


In [39]:
df_nonan = df.dropna(subset=['nkill', 'nwound'])
df_nonan

Unnamed: 0,eventid,iyear,imonth,iday,extended,country_txt,region_txt,provstate,city,latitude,...,gname,guncertain1,individual,nkill,nkillus,nkillter,nwound,nwoundus,nwoundte,ishostkid
0,197000000001,1970,7,2,0,Dominican Republic,Central America & Caribbean,,Santo Domingo,18.456792,...,MANO-D,0.0,0,1.0,,,0.0,,,0.0
1,197000000002,1970,0,0,0,Mexico,North America,Federal,Mexico city,19.371887,...,23rd of September Communist League,0.0,0,0.0,,,0.0,,,1.0
2,197001000001,1970,1,0,0,Philippines,Southeast Asia,Tarlac,Unknown,15.478598,...,Unknown,0.0,0,1.0,,,0.0,,,0.0
5,197001010002,1970,1,1,0,United States,North America,Illinois,Cairo,37.005105,...,Black Nationalists,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,197001020001,1970,1,2,0,Uruguay,South America,Montevideo,Montevideo,-34.891151,...,Tupamaros (Uruguay),0.0,0,0.0,,,0.0,,,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7246,197812290004,1978,12,29,0,United States,North America,New York,New York City,40.697132,...,Omega-7,0.0,0,0.0,,,0.0,,,0.0
7248,197812290007,1978,12,29,0,El Salvador,Central America & Caribbean,La Paz,Zacatecoluca,13.500000,...,Armed Forces of National Resistance (FARN),0.0,0,0.0,,,0.0,,,1.0
7249,197812290008,1978,12,29,0,El Salvador,Central America & Caribbean,San Vicente,San Vicente,13.641210,...,Armed Forces of National Resistance (FARN),0.0,0,0.0,,,0.0,,,1.0
7250,197812300001,1978,12,30,0,Namibia,Sub-Saharan Africa,Erongo,Swakopmund,-22.684698,...,South-West Africa People's Organization (SWAPO),0.0,0,0.0,,,60.0,,,0.0


In [55]:
df_casualties_country = df_nonan.groupby(['country_txt', 'iyear']).agg(
        num_deaths=('nkill', 'count'),
        num_injuries=('nwound', 'count'),
        latitude=('latitude', 'mean'),
        longitude=('longitude', 'mean')).reset_index()

In [59]:
df_casualties_country.dropna(inplace=True)
df_casualties_country['iyear'] = df_casualties_country.iyear.astype(int)
df_casualties_country['total_victims'] = df_casualties_country['num_deaths'] + df_casualties_country['num_injuries']

In [60]:
df_casualties_country

Unnamed: 0,country_txt,iyear,num_deaths,num_injuries,latitude,longitude,total_victims
0,Afghanistan,1973,1,1,34.516895,69.147011,2
1,Algeria,1972,1,1,36.763763,3.055413,2
2,Algeria,1978,1,1,36.763763,3.055413,2
3,Angola,1978,2,2,-12.765538,15.732861,4
4,Argentina,1970,17,17,-33.186723,-59.598608,34
...,...,...,...,...,...,...,...
366,Zaire,1973,1,1,-4.389166,15.337620,2
367,Zaire,1977,1,1,-8.353530,22.649662,2
369,Zambia,1973,1,1,-15.415393,28.284354,2
370,Zambia,1975,1,1,-15.415393,28.284354,2


In [None]:
fig = px.scatter_mapbox(country_stats,
                        lat="latitude",
                        lon="longitude",
                        size="total_victims",
                        color="num_deaths",
                        hover_name="country_txt",
                        hover_data={"num_deaths": True, "num_wounds": True "iyear": True, "latitude": False, "longitude": False},
                        zoom=1,
                        height=1000,
                        title="Number of victims per country (70's)",
                        color_continuous_scale=px.colors.sequential.amp,
                        size_max=40,
                        animation_frame='iyear',
                        animation_group='country_txt')

fig.update_layout(mapbox_style="carto-positron")
fig.update_layout(margin={"r":0,"t":40,"l":0,"b":0})
fig.update_layout(coloraxis_colorbar=dict(title="N. attemps"))
fig.update_traces(marker=dict(opacity=0.7))

fig.layout.updatemenus[0].buttons[0].args[1]["frame"]["duration"] = 600
fig.layout.updatemenus[0].buttons[0].args[1]["transition"]["duration"] = 400
fig.layout.sliders[0].currentvalue.prefix = "Year: "

fig.show()

ValueError: Value of 'size' is not the name of a column in 'data_frame'. Expected one of ['country_txt', 'iyear', 'num_attacks', 'latitude', 'longitude'] but received: total_victims