# Assignment 4

In [None]:
from IPython.display import display, HTML

display(HTML(data="""
<style>
    div#notebook-container    { width: 95%; }
    div#menubar-container     { width: 65%; }
    div#maintoolbar-container { width: 99%; }a
</style>
"""))

# Libraries

import pandas as pd
import numpy as np

import chardet
import folium as fm


from folium import Marker, GeoJson, Choropleth
from folium.plugins import MarkerCluster, HeatMap, StripePattern

import geopandas as gpd
from geopandas import GeoSeries
from shapely.geometry import Point, LineString

# Get data of labor
sex_work = pd.read_stata(r'C:\Users\rafae\Documents\GitHub\QLAB_Summer_Python\_data\8_trab_sex_20.dta' )
sex_work[ 'dpt_code' ] = sex_work.cod_ubigeo.str[ :2 ].copy()
sex_work[ 'prov_code' ] = sex_work.cod_ubigeo.str[ :4 ].copy()

"""
The code creates a new column filled with elements that are extracted from the first two 
characters in the column "cod_ubigeo"
    ".str" treats each element in the column as a string, so I can  use string operations
    ".copy()" is used to ensure that the new column ins created as a separate copy,
    not just a reference of the original data.
"""

# Sex work
women_work = sex_work[ sex_work.sex == 'Mujer' ].copy().reset_index( drop = True )

"""
The code filters the rows and creates a new data frame where the value in "sex" column is 
equal to "Mujer".

".reset_index" discards the old index column. This is done to have a clean index for the 
new data frame.
"""

# get data from lima
women_work[ 'month' ] = pd.to_datetime( women_work.month , format = '%B' ) \
                                    .dt.strftime( '%m' ) \
                                    .astype( int )

# Sort by department and month
women_work.sort_values([ 'dpt_code', 'month'], inplace = True )

# Get the total number of women workers by dpt
dpt_women_work = women_work.groupby( [ 'dpt_code', 'month'], as_index = False )[['empl']] \
                            .sum() \
                            .rename( columns = {'empl' :'women_empl'})

# Sort by dpt code and month
dpt_women_work.sort_values([ 'dpt_code', 'month'], inplace = True )

df2 = dpt_women_work.groupby( ['dpt_code'],as_index = False )[['women_empl']].mean()

dpt_shp = gpd.read_file( r'C:\Users\rafae\Documents\GitHub\QLAB_Summer_Python\_data\INEI_LIMITE_DEPARTAMENTAL\INEI_LIMITE_DEPARTAMENTAL.shp' )

df3 = dpt_shp.merge( df2, left_on = 'CCDD', right_on = 'dpt_code'  )

## First map (Rafael)

In [None]:
# We set the code as a commentary to save space so we can upload the file to GitHub

center = [-10.1900, -75.0152]
m1 = fm.Map(location=center, tiles="cartodbpositron", zoom_start=6, control_scale=True)

scale1 = [0, 10000, 40000, 60000, 100000, df3['women_empl'].max()]

choropleth = Choropleth(
    geo_data=df3,
    name='choropleth',
    data=df3,
    columns=['dpt_code', 'women_empl'],
    key_on='feature.properties.dpt_code',
    fill_color='YlOrRd',
    fill_opacity=1,
    line_opacity=0.2,
    legend_name='Women Employment Rate',
    nan_fill_color="white",
    threshold_scale = scale1,
    highlight=True,  # If you want to highlight the region on hover
).add_to(m1)

## Second map (Rafael)

In [None]:
# Define scale

scale = [0, 20000, 40000, 60000, 100000, df3['women_empl'].max()]

# Create a folium map

# Specify the center of the map based on your data
center = [-10.1900, -75.0152]

# Create the folium map
m = fm.Map(location=center, tiles="OpenStreetMap", zoom_start=6, control_scale=True)
m

# Add a choropleth layer to the map:

choropleth = Choropleth(
    geo_data=df3, # dataframe the geometry is
    name='Map 1',
    data=df3, # dataframe where the database is
    columns=['dpt_code', 'women_empl'],  # The first variable is the one matching between dataframes
    key_on='feature.properties.dpt_code',# Key to link GeoJSON features with data
    fill_color='YlOrRd',  # Specify color scale
    fill_opacity=1,
    line_opacity=0.2,
    legend_name='Women Employment Rate',
    nan_fill_color = "white",
    threshold_scale = scale,
).add_to(m)

geojson_layer = GeoJson(
    df3,
    style_function=lambda feature: {
        'color': 'black',
        'weight': 0.25,
        'dashArray': '5, 5',  # Set the line style to "--"
        'fillOpacity': 0,
    }
).add_to(m)

## Maps (Ana Lucía)

In [None]:
#first we read the file
shapefile_answer = gpd.read_file(r'C:\Users\analu\OneDrive\Escritorio\QLAB_Summer_Python\_data\INEI_LIMITE_DEPARTAMENTAL\INEI_LIMITE_DEPARTAMENTAL.shp')

print(shapefile_answer.columns) #then we explore its columns

#first we read the file

df4 = sex_work.groupby( ['dpt_code', 'month', 'sex'], as_index = False )[['empl']].sum() \
        .pivot( index = [ 'dpt_code', 'month' ] , columns = 'sex',values='empl') \
        .reset_index()
df4[ 'prop_wom' ] = ( df4.Mujer * 100 / df4.Hombre )

df5 = dpt_shp.merge(df4, left_on = 'CCDD', right_on = 'dpt_code') #now we merge this df 

# Inverting colour map
cmap = plt.cm.OrRd

fig, axis = plt.subplots( nrows = 4, ncols= 3, figsize = ( 15, 15 ) )

idx = 0
for i in range( 4 ):
    for j in range ( 3 ):
        
        
        ax = axis[ i ][ j ]
        
        month = df5.month.unique()[ idx ]
        
        df6 = df5[ df5.month == month ]
        
        df6.plot( column='prop_wom', 
                  cmap='Reds', 
                  linestyle='--',
                  edgecolor='black', 
                  legend = True, 
                  ax = ax 
                )
        
        ax.set_title( month )
        
        idx = idx + 1

from textwrap import wrap
# Inverting colour map
cmap = plt.cm.OrRd

fig, ax = plt.subplots(figsize=(20, 20))
df6.plot( ax = ax, 
        column='prop_wom', 
         cmap= cmap, 
         figsize=(20, 20), 
         linestyle='--',
         edgecolor='black', 
         legend = True,  
         scheme = "User_Defined", 
         classification_kwds = dict( bins = [ 20, 30, 40, 50,  100 ] ), 
         legend_kwds=dict(  loc='upper left',
                            bbox_to_anchor=(1.01, 1),
                            fontsize='x-large',
                            title= "Women Proportion", 
                            title_fontsize = 'x-large', 
                            frameon= False ))

In [None]:
# First we Read the shapefile
shapefile_answer = gpd.read_file(r'C:\Users\analu\OneDrive\Escritorio\QLAB_Summer_Python\_data\INEI_LIMITE_DEPARTAMENTAL\INEI_LIMITE_DEPARTAMENTAL.shp')

# Then we Get aggregated data
df4 = sex_work.groupby(['dpt_code', 'month', 'sex'], as_index=False)[['empl']].sum() \
        .pivot(index=['dpt_code', 'month'], columns='sex', values='empl') \
        .reset_index()
df4['prop_wom'] = (df4.Mujer * 100 / df4.Hombre)

# Merge with the shapefile
df5 = shapefile_answer.merge(df4, left_on='CCDD', right_on='dpt_code')

# Now we create a folium map centered at a middle point, with a zoom of 6
map2 = folium.Map(location=[-9.19, -75.0152], zoom_start=6)

# Create subplots
fig, axis = plt.subplots(nrows=4, ncols=3, figsize=(15, 15))

# Create a loop for to forIterate over the subplots
idx = 0
for i in range(4):
    for j in range(3):
        # Get the current axis
        ax = axis[i][j]

        # Get the corresponding month
        month = df5.month.unique()[idx]

        # Filter the DataFrame by month
        df6 = df5[df5.month == month]

        # Create the choropleth map
        Choropleth(
            geo_data=df6,
            data=df6,
            columns=['geometry', 'prop_wom'],
            fill_color='OrRd',
            fill_opacity=0.7,
            line_opacity=0.2,
            legend_name=f'Women Proportion - {month}',
        ).add_to(map2)
        

        # Add polygon boundaries 
        folium.GeoJson(df6, name=f'geojson_{month}').add_to(map2)

        # Create the plot on the current axis
        df6.plot(
            column='prop_wom',
            cmap='Reds',
            linestyle='--',
            edgecolor='black',
            legend=True,
            ax=ax
        )

        # Set the title
        ax.set_title(month)

        idx += 1

# Add the folium map at the end
folium.LayerControl().add_to(map2)

#save the map 
map2.save("map2.html")

# Show the map
map2

# FIrst, we create a Folium map centered at the desired location
map3 = folium.Map(location=[-9.19, -75.0152], zoom_start=6)  

# Then, we create subplots
fig, ax = plt.subplots(figsize=(20, 20))

# This way we handle NaN values
df6.loc[(df6.NOMBDEP == 'LIMA'), 'prop_wom'] = np.nan

# Get the inverted color palette
cmap = plt.cm.OrRd_r

# Create the choropleth map
Choropleth(
    geo_data=df6,
    data=df6,
    columns=['geometry', 'prop_wom'],
    key_on='feature.properties.geometry',
    fill_color=cmap,
    fill_opacity=0.7,
    line_opacity=0.2,
    legend_name='Women Proportion',
    bins=[20, 30, 40, 50, 100],
    missing_kwds=dict(color="#DADADB"),
).add_to(map3)

# Add polygon boundaries (optional)
folium.GeoJson(df6, name='geojson').add_to(map3)

# Create the plot on the current axis
df6.plot(
    column='prop_wom',
    cmap=cmap,
    linestyle='--',
    edgecolor='black',
    legend=True,
    ax=ax
)

# Configure legend
divider = make_axes_locatable(ax)
cax = divider.append_axes("right", size="5%", pad=0.1)
sm = plt.cm.ScalarMappable(cmap=cmap, norm=plt.Normalize(vmin=20, vmax=100))
sm._A = []
plt.colorbar(sm, cax=cax, label="Women Proportion")

# Configure title
ax.set_title("Women Proportion")

# Save the map as an HTML file
map3.save("map3.html")

# Show the map
# map3

## Maps (Ilenia)

In [None]:
# Convert GeoDataFrame to GeoJSON
geojson_data = ctr_shp.to_crs(epsg='4326').to_json()

# Create a Folium map centered at the average coordinates of the GeoDataFrame
center_coords = [ctr_shp.geometry.centroid.y.mean(), ctr_shp.geometry.centroid.x.mean()]
m = folium.Map(location=center_coords, zoom_start=5)

# Add GeoJSON data to the map
folium.GeoJson(geojson_data).add_to(m)

# Display the map
#m

In [None]:
import folium
import geopandas as gpd

# Convert GeoDataFrame to GeoJSON
geojson_data = dpt_shp.to_crs(epsg='4326').to_json()

# Create a Folium map centered at the average coordinates of the GeoDataFrame
center_coords = [dpt_shp.geometry.centroid.y.mean(), dpt_shp.geometry.centroid.x.mean()]
m = folium.Map(location=center_coords, zoom_start=5)

# Add GeoJSON data to the map
folium.GeoJson(geojson_data).add_to(m)

# Display the map
#m