# Data Preprocessing - Parte 4 (Maps)

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [3]:
def load_data(file_path, sep=',', encoding='utf-8'):
    """Load data from a CSV file into a pandas DataFrame."""
    try:
        data = pd.read_csv(file_path, sep=sep, encoding=encoding)
        print("Data loaded successfully.")
        return data
    except Exception as e:
        print(f"An error occurred while loading the data: {e}")
        return None

def summarize_data(data):
    """Generate summary statistics of the DataFrame."""
    if data is not None:
        summary = data.describe()
        print("Data summary:")
        print(summary)
        return summary
    else:
        print("No data to summarize.")
        return None

In [6]:
data = load_data("../data/apac_data.csv", sep=';')
summary = summarize_data(data)

Data loaded successfully.
Data summary:
              EVENTS     FATALITIES  POPULATION_EXPOSURE             ID
count  197730.000000  197730.000000         9.976600e+04  197730.000000
mean        3.459632       2.036479         9.523680e+04    1538.280291
std         7.029385       9.795868         2.026675e+05     993.437542
min         1.000000       0.000000         1.000000e+00       1.000000
25%         1.000000       0.000000         1.054400e+04     593.000000
50%         1.000000       0.000000         3.482700e+04    1314.000000
75%         3.000000       0.000000         9.525400e+04    2283.000000
max       190.000000     618.000000         5.030356e+06    4157.000000


In [7]:
data.head()

Unnamed: 0,WEEK,REGION,COUNTRY,ADMIN1,EVENT_TYPE,SUB_EVENT_TYPE,EVENTS,FATALITIES,POPULATION_EXPOSURE,DISORDER_TYPE,ID,CENTROID_LATITUDE,CENTROID_LONGITUDE
0,31-dicembre-2016,Caucasus and Central Asia,Afghanistan,Badakhshan,Battles,Armed clash,6,15,,Political violence,1,36966,733417
1,07-gennaio-2017,Caucasus and Central Asia,Afghanistan,Badakhshan,Battles,Armed clash,5,28,,Political violence,1,36966,733417
2,21-gennaio-2017,Caucasus and Central Asia,Afghanistan,Badakhshan,Battles,Armed clash,1,4,,Political violence,1,36966,733417
3,04-febbraio-2017,Caucasus and Central Asia,Afghanistan,Badakhshan,Battles,Armed clash,1,4,,Political violence,1,36966,733417
4,11-febbraio-2017,Caucasus and Central Asia,Afghanistan,Badakhshan,Battles,Armed clash,4,41,,Political violence,1,36966,733417


In [None]:
# Filtering for Afghanistan
afg_data = data[data['COUNTRY'] == 'Afghanistan']

# Grouping by Admin1, Event_type and summing Events
grouped_data = afg_data.groupby(['ADMIN1', 'EVENT_TYPE']).agg({'EVENTS': 'sum'}).reset_index()

#grouped_data['ADMIN1'].value_counts()

ADMIN1
Badakhshan    6
Badghis       6
Wardak        6
Urozgan       6
Takhar        6
Samangan      6
Parwan        6
Panjshir      6
Paktika       6
Paktia        6
Nuristan      6
Nimruz        6
Nangarhar     6
Logar         6
Laghman       6
Kunduz        6
Kunar         6
Khost         6
Kapisa        6
Baghlan       6
Balkh         6
Bamyan        6
Daykundi      6
Farah         6
Faryab        6
Ghazni        6
Ghor          6
Helmand       6
Herat         6
Jowzjan       6
Kabul         6
Kandahar      6
Zabul         6
Sar-e Pol     5
Name: count, dtype: int64

In [11]:
grouped_data.head()

Unnamed: 0,ADMIN1,EVENT_TYPE,EVENTS
0,Badakhshan,Battles,842
1,Badakhshan,Explosions/Remote violence,233
2,Badakhshan,Protests,49
3,Badakhshan,Riots,15
4,Badakhshan,Strategic developments,74


In [12]:
# For each ADMIN1, find the EVENT_TYPE with the maximum EVENTS
max_events = grouped_data.loc[grouped_data.groupby('ADMIN1')['EVENTS'].idxmax()]

max_events.head()

Unnamed: 0,ADMIN1,EVENT_TYPE,EVENTS
0,Badakhshan,Battles,842
6,Badghis,Battles,1206
12,Baghlan,Battles,1333
18,Balkh,Battles,1647
24,Bamyan,Battles,50


In [13]:
print(max_events)

         ADMIN1 EVENT_TYPE  EVENTS
0    Badakhshan    Battles     842
6       Badghis    Battles    1206
12      Baghlan    Battles    1333
18        Balkh    Battles    1647
24       Bamyan    Battles      50
30     Daykundi    Battles     194
36        Farah    Battles    1190
42       Faryab    Battles    1975
48       Ghazni    Battles    3193
54         Ghor    Battles     457
60      Helmand    Battles    4119
66        Herat    Battles    1600
72      Jowzjan    Battles     702
78        Kabul    Battles     938
84     Kandahar    Battles    2829
90       Kapisa    Battles     822
96        Khost    Battles     726
102       Kunar    Battles    1140
108      Kunduz    Battles    1632
114     Laghman    Battles     930
120       Logar    Battles    1310
126   Nangarhar    Battles    2830
132      Nimruz    Battles     413
138    Nuristan    Battles     251
144      Paktia    Battles    1431
150     Paktika    Battles     880
156    Panjshir    Battles     272
162      Parwan    B