# Political Violence Targeting Women & Demonstrations Featuring Women

#### All violence targeting women, as well as demonstrations featuring women, are included in the data file below. The data in this file cover all events in which women were specifically targeted by political violence, not all events involving women in any way; the file also covers all demonstration events in which women were specifically featured, not all demonstrations involving women. 

#### This dataset was extract from ACLED Access Portal and we perform EDA using Python.


## Data Collection & Data Exploration

We import pandas and necessary libraries for dataframe manipulation and analysis.

In [1]:
# Libraries are imported
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import folium
from folium.plugins import MarkerCluster
from datetime import datetime, date


In [3]:
from pol_violence.data_loaded import load_dataframe

df_gpv = load_dataframe()

#### Exploring attributes of date.time

In [4]:
df_gpv['date'][0]

datetime.date(2024, 9, 27)

In [5]:
df_gpv['date'][0].year

2024

In [None]:
df_gpv['date'][0].month

In [None]:
df_gpv['date'][0].day

#### Exploring the dataset

In [None]:
# First rows are checked

display(df_gpv.head(2))

In [None]:
# Last five rows are checking

display(df_gpv.tail(2))

In [None]:
# Columns's names are checked

display(df_gpv.keys())

In [None]:
# Structural Overview of the DataFrame

df_gpv.info()

In [None]:
# Basic statistical description (numerical columns)

df_gpv.describe()

In [None]:
# Displaying random column
df_gpv['inter1']

#### Checking unique values

In [None]:
df_gpv['region'].unique()

In [None]:
df_gpv['event_type'].unique()

In [None]:
df_gpv['inter1'].unique()


In [None]:
df_gpv['interaction'].unique()

In [None]:
df_gpv['sub_event_type'].unique()

#### Handling missing data

In [None]:
# Missing values are identifying

df_gpv.isnull().sum()

## Pre-processing data

In [None]:
df_gpv[df_gpv['fatalities']== 750].iloc[0]['notes']

In [None]:
df_gpv['region'].unique()

In [None]:
region_map = {
    'africa': ['Southern Africa', 'Northern Africa', 'Middle Africa', 'Western Africa', 'Eastern Africa'],
    'middle east': ['Middle East'],
    'asia':['Caucasus and Central Asia', 'Sotheast Asia', 'South Asia', 'East Asia'],
    'america and caribbean' :[ 'South America', 'North America', 'Central America', 'Caribbean'],
    'europe': ['Europe'],
    'oceania': ['Oceania']
}


In [None]:
# Flatten the mapping to map each element to its group
flattened_map = {item: key for key, values in region_map.items() for item in values}

#flattened_map = {}
#for key, values in region_map.items():
#    for item in values:
#        flattened_map[item] = key


In [None]:
#new_column = {'region': ['america', 'middle east', 'asia', 'america and caribbean', 'europe', 'oceania']}

df_gpv['region'] = df_gpv['region'].map(flattened_map)

display(df_gpv)

## Plotting data

#### Region vs Fatalities

In [None]:
df_gpv['region'].unique()

In [None]:
group_region_fatalities = df_gpv.groupby('region')['fatalities'].sum()

fatalities_per_region = group_region_fatalities.to_dict()

fatalities_per_region

In [None]:
plt.bar(fatalities_per_region.keys(), fatalities_per_region.values())
plt.xlabel('Region')
plt.ylabel('Fatalities')
plt.title('Region vs Fatalities')
plt.xticks(rotation=45)

plt.show()

#customize beautifuly PLEASE

#### Country vs Sub_type

In [None]:
# Distribution of region grouped by subtypes
plt.figure(figsize=(10, 10))
sns.countplot(data=df_gpv, x='region', hue='sub_event_type')
plt.title('Distribution of region grouped by subtypes')
plt.xlabel('region')
plt.ylabel('subtypes')
plt.legend(title='Region vs Subtypes')
plt.show()


In [None]:
grouped_counts = df_gpv.groupby('region')['sub_event_type'].value_counts()

display(grouped_counts)

In [None]:
#Let's grouped region and events to see counts
counts_dict = grouped_counts.to_dict()

display(counts_dict)

In [None]:
counts_dict[('africa','Mob violence')]

In [None]:
region = df_gpv['region'].any()
event = df_gpv['sub_event_type'].any()

In [None]:
def get_count_by_region_event(counts_dict: dict, region: str, event: str) -> int:
    for (region,event), counts in counts_dict.items():
        return f"The number of {event} in {region} is {counts_dict[(region, event)]}"

In [None]:
get_count_by_region_event(counts_dict, 'africa', 'Attack')

#### Functions that give plots

In [None]:

def plot_fatalities_per_region(fatalities_per_region: dict)-> object:

    fig = plt.bar(fatalities_per_region.keys(), fatalities_per_region.values())
    plt.xlabel('Region')
    plt.ylabel('Fatalities')
    plt.title('Region vs Fatalities')
    plt.xticks(rotation=45)

    plt.show()

    return fig


In [None]:
plot_fatalities_per_region(fatalities_per_region)

In [None]:

def plot_stacked_bar(grouped_counts: pd.DataFrame) -> object:
    stacked_df = grouped_counts.unstack(fill_value=0)  

    plt.figure(figsize=(12, 6))
    fig = stacked_df.plot(kind='bar', stacked=True, colormap='viridis', figsize=(12, 6))

    plt.title('Stacked Bar Chart of Sub-event Counts by Region')
    plt.xlabel('Region')
    plt.ylabel('Count')
    plt.xticks(rotation=45)
    plt.tight_layout()

    return fig

In [None]:
plot_stacked_bar(grouped_counts)

In [None]:

def plot_region_event_ondemand(counts_dict: dict, region: str, event: str) -> object:
    
    filtered_data = {(region, event): counts_dict.get((region, event), 0)}# Returns the value if found, and 0 if not found (default value)

    x_labels = [f"{region} - {event}"]  # Single bar for the specified region-event
    colors = ['mediumslateblue']  # Highlight color for the bar

    fig, ax = plt.subplots(figsize=(6, 4))

    bars = ax.bar(x_labels, filtered_data.values(), color=colors)

    ax.set_xlabel('Event per Region', fontsize=12)
    ax.set_ylabel('Count', fontsize=12)
    ax.set_title('Counting Events per Region', fontsize=14)
    ax.tick_params(axis='x', rotation=0)

    plt.close()
    return fig


In [None]:
plot_region_event_ondemand(counts_dict, 'asia', 'Peaceful protest')

In [None]:
def plot_all_region_event_highlight(counts_dict: dict, region: str, event: str) -> object:

    colors = []
    x_etiquette = []
    values = []

    for (key, value) in counts_dict.items():
        x_etiquette.append(f"{key[0]} - {key[1]}")
        values.append(value)
        
        # Highlight selected region-event
        if key == (region, event):
            colors.append('c')
        else:
            colors.append('darkslategray')
    
    # Create figure and axes
    fig, ax = plt.subplots(figsize=(12, 6))
    
    # Create bars
    bars = ax.bar(x_etiquette, values, color=colors)
    
    # Add value labels on top of bar
    for bar in bars:
        height = bar.get_height()
        ax.text(bar.get_x() + bar.get_width()/2., height,
                f'{height:,}',
                ha='center', va='bottom')
    
    ax.set_xlabel('Events per Region', fontsize=12)
    ax.set_ylabel('Count', fontsize=12)
    ax.set_title(f'Event Counts by Region (Highlighting {region} - {event})', 
                 fontsize=14)
    
    # Rotate x-axis labels for better readability
    plt.xticks(rotation=90, ha='right')
    
    # Add grid
    ax.grid(True, linestyle='--', alpha=0.7, axis='y')
    
    # Adjust layout
    plt.tight_layout()
    plt.close()
    return fig


In [None]:
plot_all_region_event_highlight(counts_dict, 'asia', 'Attack')

In [None]:
from pol_violence.plots import plot_fatalities_per_region

In [None]:
plot_fatalities_per_region(fatalities_per_region)


### Challenge

##### 1. Crear una clase `DataProcessor` para nuestro problema específico, que guarde el DataFrame y cualquier variable que necesite nuestro script como atributos.
##### 2. Convertir las funciones de plot que ya funcionan, a métodos utilizando los atributos del objeto en lugar de pasar las cosas como parámetro.

In [None]:
plot_fatalities_per_region(fatalities_per_region)

In [None]:
df_gpv['sub_event_type'].unique()

In [None]:
category_colors = {
    'A':'#6A5ACD', 
    'B':'#6B8E23', 
    'C': '#BA55D3', 
    'D':'#B8860B', 
    'E':'#20B2AA', 
    'F':'#4682B4',
    'G':'#8B4513', 
    'H':'#B22222', 
    'I':'#6495ED', 
    'J':'#BDB76B', 
    'K':'#CD853F', 
    'J': '#48D1CC',
    'L': '#2F4F4F',
    'M':'#2E8B57',
    'N':'#5F9EA0', 
    'O': '#BC8F8F',
    'P':'#CD5C5C', 
    'Q':'#DDA0DD'
}
df_gpv['Color'] = df_gpv['sub_event_type'].map(category_colors)

In [None]:
geo_data = df_gpv[[
    'country',
    'sub_event_type',
    'latitude',
    'longitude'
]]
   
df_geo = pd.DataFrame(geo_data)

In [None]:
df_geo

In [None]:
import geopandas as gpd
gdf = gpd.GeoDataFrame(df_geo, geometry= gpd.points_from_xy(df_geo["longitude"], df_geo["latitude"]), crs=4326)
gdf

In [None]:
gdf.to_file('polvioliolence.geojson', driver="GeoJSON")

In [None]:
from keplergl import KeplerGl
m = KeplerGl(height = 600)
m.add_data(gdf, "asd")
m

In [None]:
# Initialize the map centered on a rough global midpoint
m = folium.Map(location=[0, 0], zoom_start=2)

# Add a marker cluster to group nearby markers
marker_cluster = MarkerCluster().add_to(m)

# Add markers to the map
for _, row in df_geo.iterrows():
    folium.Marker(
        location=[row['latitude'], row['longitude']],
        popup=f"Region: {row['Region']}<br>Category: {row['Category']}",
        icon=folium.Icon(color=category_colors[row['Category']])
    ).add_to(marker_cluster)

In [None]:
explore_notes = df_gpv[df_gpv['fatalities']== 750].iloc[0]['notes']