# Political Violence Targeting Women & Demonstrations Featuring Women

#### All violence targeting women, as well as demonstrations featuring women, are included in the data file below. The data in this file cover all events in which women were specifically targeted by political violence, not all events involving women in any way; the file also covers all demonstration events in which women were specifically featured, not all demonstrations involving women. 

#### This dataset was extract from ACLED Access Portal and we perform EDA using Python.


## Data Collection & Data Exploration

We import pandas and necessary libraries for dataframe manipulation and analysis.

In [None]:
# Libraries are imported
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import folium
from folium.plugins import MarkerCluster
from datetime import datetime, date


In [None]:
# Dataset is downloaded
df_gpv = pd.read_csv("gender_Sep27-1.csv")
df_gpv.drop([
    'event_id_cnty',
    #'event_date', 
    'year', 
    'time_precision',
    'disorder_type', 
    #'event_type', 
    #'sub_event_type', 
    'actor1',
    'assoc_actor_1', 
    #'inter1', 
    'actor2', 
    'assoc_actor_2', 
    'inter2',
    #'interaction', 
    'civilian_targeting', 
    #'iso',
    #'region', 
    #'country',
    'admin1', 
    'admin2', 
    'admin3', 
    #'location', 
    #'latitude', 
    #'longitude',
    'geo_precision', 
    'source', 
    'source_scale', 
    #'notes', 
    #'fatalities',
    'tags',
    'timestamp'                           
], axis=1, inplace=True)


df_gpv["date"] = df_gpv["event_date"].apply(date.fromisoformat)

display(df_gpv.shape)

#### Exploring attributes of date.time

In [None]:
df_gpv['date'][0]

In [None]:
df_gpv['date'][0].year

In [None]:
df_gpv['date'][0].month

In [None]:
df_gpv['date'][0].day

#### Exploring the dataset

In [None]:
# First rows are checked

display(df_gpv.head(2))

In [None]:
# Last five rows are checking

display(df_gpv.tail(2))

In [None]:
# Columns's names are checked

display(df_gpv.keys())

In [None]:
# Structural Overview of the DataFrame

df_gpv.info()

In [None]:
# Basic statistical description (numerical columns)

df_gpv.describe()

In [None]:
# Displaying random column
df_gpv['inter1']

#### Checking unique values

In [None]:
df_gpv['region'].unique()

In [None]:
df_gpv['event_type'].unique()

In [None]:
df_gpv['inter1'].unique()


In [None]:
df_gpv['interaction'].unique()

In [None]:
df_gpv['sub_event_type'].unique()

#### Handling missing data

In [None]:
# Missing values are identifying

df_gpv.isnull().sum()

## Pre-processing data

In [None]:
df_gpv[df_gpv['fatalities']== 750].iloc[0]['notes']

In [None]:
df_gpv['region'].unique()

In [None]:
region_map = {
    'africa': ['Southern Africa', 'Northern Africa', 'Middle Africa', 'Western Africa', 'Eastern Africa'],
    'middle east': ['Middle East'],
    'asia':['Caucasus and Central Asia', 'Sotheast Asia', 'South Asia', 'East Asia'],
    'america and caribbean' :[ 'South America', 'North America', 'Central America', 'Caribbean'],
    'europe': ['Europe'],
    'oceania': ['Oceania']
}


In [None]:
# Flatten the mapping to map each element to its group
flattened_map = {item: key for key, values in region_map.items() for item in values}

#flattened_map = {}
#for key, values in region_map.items():
#    for item in values:
#        flattened_map[item] = key


In [None]:
#new_column = {'region': ['america', 'middle east', 'asia', 'america and caribbean', 'europe', 'oceania']}

df_gpv['region'] = df_gpv['region'].map(flattened_map)

display(df_gpv)

## Plotting data

#### Region vs Fatalities

In [None]:
df_gpv['region'].unique()

In [None]:
group_region_fatalities = df_gpv.groupby('region')['fatalities'].sum()

fatalities_per_region = group_region_fatalities.to_dict()

fatalities_per_region

In [None]:
plt.bar(fatalities_per_region.keys(), fatalities_per_region.values())
plt.xlabel('Region')
plt.ylabel('Fatalities')
plt.title('Region vs Fatalities')
plt.xticks(rotation=45)

plt.show()

#customize beautifuly PLEASE

#### Country vs Sub_type

In [None]:
# Distribution of region grouped by subtypes
plt.figure(figsize=(10, 10))
sns.countplot(data=df_gpv, x='region', hue='sub_event_type')
plt.title('Distribution of region grouped by subtypes')
plt.xlabel('region')
plt.ylabel('subtypes')
plt.legend(title='Region vs Subtypes')
plt.show()


In [None]:
grouped_counts = df_gpv.groupby('region')['sub_event_type'].value_counts()

display(grouped_counts)

In [None]:
type(grouped_counts)

In [None]:
counts_dict = grouped_counts.to_dict()

display(counts_dict)


In [None]:
counts_dict[('africa','Mob violence')]

In [None]:
df_gpv['region'].unique()

In [None]:
region = df_gpv['region'].any()
event = df_gpv['sub_event_type'].any()

In [None]:
def get_count_by_region_event(counts_dict: dict, region: str, event: str) -> int:
    for (region,event), counts in counts_dict.items():
        return f"The number of {event} in {region} is {counts_dict[(region, event)]}"

In [None]:
get_count_by_region_event(counts_dict, 'africa', 'Attack')

In [None]:
#get a function for get_count_by_region_event ---> FIX THIS: TE OUTPUT IS NOT CORRECT

def give_plot_by_region_event(counts_dict: dict, region: str, event: str) -> object:

    colors = []
    x_ettiquette = []
    for (key, value) in counts_dict.items():
        x_ettiquette.append(f"{key[0]} - {key[1]}") # Combine region and event for x-axis labeling
        if key == (region, event):
                colors.append('c')
        else:
             colors.append('darkslategray')

    plt.figure(figsize=(12, 6))

    # Plotting the data
    the_plot_bar_re = plt.bar([str(key) for key in counts_dict.keys()], counts_dict.values(), color=colors) 

    # Adding labels and custom title
    plt.xlabel('Event per region')
    plt.ylabel('Count')
    plt.title('Counting events per region')
    

    plt.xticks(rotation=90)

    
    return the_plot_bar_re


In [None]:
give_plot_by_region_event(counts_dict,'asia','Peaceful protest')

#### Function that gives plots

In [None]:
def plot_fatalities_per_region(fatalities_per_region: dict, region: str) -> object:

    colors = []
    for reg in fatalities_per_region.keys():
        if reg == region:
            colors.append('darkslategray')
        else:
            colors.append('m')


     # Plotting the data
    the_plot_bar_fpr = plt.bar(fatalities_per_region.keys(), fatalities_per_region.values(), color=colors) 

    
    # Adding labels and custom title
    plt.xlabel('Region')
    plt.ylabel('Fatalities')
    plt.title('Fatalities per region')  # Use the title parameter for the chart title
    
    plt.xticks(rotation=45)
    
    return the_plot_bar_fpr


plot_fatalities_per_region(fatalities_per_region, 'middle east')




In [None]:
plot_fatalities_per_region(fatalities_per_region, 'europe')

In [None]:
def give_bar_plot_unique(value_region: str) -> object:
   
    # Get the x-axis and y-axis data from the 'fatalities_per_region' dictiona

    #regions = list(fatalities_per_region.keys()))
    fatality_values = list(fatalities_per_region.values())
    
    # Create a new figure and axis objects
    fig, ax = plt.subplots(figsize=(2, 12))
    
    # Create the bar plot
    ax.bar(value_region, fatality_values)
    
    # Set the axis labels and title
    ax.set_xlabel('Region')
    ax.set_ylabel('Fatalities')
    ax.set_title('Fatalities per region')
    
    # Rotate the x-axis labels for better readability
    plt.xticks(rotation=45)
    
    # Adjust the spacing to accommodate the rotated labels
    fig.subplots_adjust(bottom=0.2)
    
    # Return the figure object
    return fig

fig = give_bar_plot_unique('oceania')

#Maybe I have change the type of plot

In [None]:
def plot_count_region_event(region: str, event: str)-> object:

    counting = list(get_count_by_region_event.keys())

    fig, ax = plt.subplots(figsize=(2, 12))
    
    # Create the bar plot
    ax.bar(region, counting)
    
    ax.set_xlabel('Region')
    ax.set_ylabel('Event')
    ax.set_title('Event per region')
    
    # Rotate the x-axis labels for better readability
    plt.xticks(rotation=45)
    
    # Adjust the spacing to accommodate the rotated labels
    fig.subplots_adjust(bottom=0.2)
    
    # Return the figure object
    return fig

#fig = plot_count_region_event('oceania', )


    

#def get_count_by_region_event(counts_dict: dict, region: str, event: str) -> int:
    #for (region,event), counts in counts_dict.items():
        #return f"The number of {event} in {region} is {counts_dict[(region, event)]}"
    



### Challenge

##### 1. Crear una clase `DataProcessor` para nuestro problema específico, que guarde el DataFrame y cualquier variable que necesite nuestro script como atributos.
##### 2. Convertir las funciones de plot que ya funcionan, a métodos utilizando los atributos del objeto en lugar de pasar las cosas como parámetro.

In [None]:
class DataProcessor:
    def __init__(self, df_gpv: pd.DataFrame, fatalities_per_region: dict, counts_dict: dict, region: str, event: str, color:list):
        self.df_gpv = df_gpv
        self.region = region
        self.event = event
        self.fatalities_per_region = fatalities_per_region
        self.counts_dict = counts_dict
        self.region = region
        self.event = event
        self.color = color


    def plot_fatalities_per_region(self) -> object:

        the_plot_bar_fpr = plt.bar(fatalities_per_region.keys(), fatalities_per_region.values(), color= colors) 

        return the_plot_bar_fpr 


    def give_plot_by_region_event(self) -> object:
        
        the_plot_bar_re = plt.bar([str(key) for key in counts_dict.keys()], counts_dict.values(), color=colors) 

        return the_plot_bar_re


In [None]:
plot_fatalities_per_region(fatalities_per_region, 'europe')

In [None]:
give_plot_by_region_event(counts_dict, 'africa', 'Abduction/forced disappearance')

In [None]:
df_gpv['sub_event_type'].unique()

In [None]:
category_colors = {
    'A':'#6A5ACD', 
    'B':'#6B8E23', 
    'C': '#BA55D3', 
    'D':'#B8860B', 
    'E':'#20B2AA', 
    'F':'#4682B4',
    'G':'#8B4513', 
    'H':'#B22222', 
    'I':'#6495ED', 
    'J':'#BDB76B', 
    'K':'#CD853F', 
    'J': '#48D1CC',
    'L': '#2F4F4F',
    'M':'#2E8B57',
    'N':'#5F9EA0', 
    'O': '#BC8F8F',
    'P':'#CD5C5C', 
    'Q':'#DDA0DD'
}
df_gpv['Color'] = df_gpv['sub_event_type'].map(category_colors)

In [None]:
geo_data = df_gpv[[
    'country',
    'sub_event_type',
    'latitude',
    'longitude'
]]
   
df_geo = pd.DataFrame(geo_data)

In [None]:
df_geo

In [None]:
import geopandas as gpd
gdf = gpd.GeoDataFrame(df_geo, geometry= gpd.points_from_xy(df_geo["longitude"], df_geo["latitude"]), crs=4326)
gdf

In [None]:
gdf.to_file('polvioliolence.geojson', driver="GeoJSON")

In [None]:
from keplergl import KeplerGl
m = KeplerGl(height = 600)
m.add_data(gdf, "asd")
m

In [None]:
# Initialize the map centered on a rough global midpoint
m = folium.Map(location=[0, 0], zoom_start=2)

# Add a marker cluster to group nearby markers
marker_cluster = MarkerCluster().add_to(m)

# Add markers to the map
for _, row in df_geo.iterrows():
    folium.Marker(
        location=[row['latitude'], row['longitude']],
        popup=f"Region: {row['Region']}<br>Category: {row['Category']}",
        icon=folium.Icon(color=category_colors[row['Category']])
    ).add_to(marker_cluster)

In [None]:
#df_gpv[df_gpv['fatalities']== 750].iloc[0]['notes']