In [2]:

# Libraries

import pandas as pd
import numpy as np

import chardet
import folium as fm


from folium import Marker, GeoJson, Choropleth
from folium.plugins import MarkerCluster, HeatMap, StripePattern

import geopandas as gpd
from geopandas import GeoSeries
from shapely.geometry import Point, LineString

# Get data of labor
sex_work = pd.read_stata(r'/Users/jamesmedinavanini/Documents/GitHub/QLAB_Summer_Python/_data/8_trab_sex_20.dta' )
sex_work[ 'dpt_code' ] = sex_work.cod_ubigeo.str[ :2 ].copy()
sex_work[ 'prov_code' ] = sex_work.cod_ubigeo.str[ :4 ].copy()

"""
The code creates a new column filled with elements that are extracted from the first two 
characters in the column "cod_ubigeo"
    ".str" treats each element in the column as a string, so I can  use string operations
    ".copy()" is used to ensure that the new column ins created as a separate copy,
    not just a reference of the original data.
"""

# Sex work
women_work = sex_work[ sex_work.sex == 'Mujer' ].copy().reset_index( drop = True )

"""
The code filters the rows and creates a new data frame where the value in "sex" column is 
equal to "Mujer".

".reset_index" discards the old index column. This is done to have a clean index for the 
new data frame.
"""

# get data from lima
women_work[ 'month' ] = pd.to_datetime( women_work.month , format = '%B' ) \
                                    .dt.strftime( '%m' ) \
                                    .astype( int )

# Sort by department and month
women_work.sort_values([ 'dpt_code', 'month'], inplace = True )

# Get the total number of women workers by dpt
dpt_women_work = women_work.groupby( [ 'dpt_code', 'month'], as_index = False )[['empl']] \
                            .sum() \
                            .rename( columns = {'empl' :'women_empl'})

# Sort by dpt code and month
dpt_women_work.sort_values([ 'dpt_code', 'month'], inplace = True )

df2 = dpt_women_work.groupby( ['dpt_code'],as_index = False )[['women_empl']].mean()


dpt_shp = gpd.read_file( r'/Users/jamesmedinavanini/Documents/GitHub/QLAB_Summer_Python/_data/INEI_LIMITE_DEPARTAMENTAL/INEI_LIMITE_DEPARTAMENTAL.shp' )

df3 = dpt_shp.merge( df2, left_on = 'CCDD', right_on = 'dpt_code'  )

In [None]:
# We set the code as a commentary to save space so we can upload the file to GitHub

center = [-10.1900, -75.0152]
m1 = fm.Map(location=center, tiles="cartodbpositron", zoom_start=6, control_scale=True)

scale1 = [0, 10000, 40000, 60000, 100000, df3['women_empl'].max()]

choropleth = Choropleth(
    geo_data=df3,
    name='choropleth',
    data=df3,
    columns=['dpt_code', 'women_empl'],
    key_on='feature.properties.dpt_code',
    fill_color='YlOrRd',
    fill_opacity=1,
    line_opacity=0.2,
    legend_name='Women Employment Rate',
    nan_fill_color="white",
    threshold_scale = scale1,
    highlight=True,  # If you want to highlight the region on hover
).add_to(m1)
m1

In [30]:
df4 = sex_work.groupby( ['dpt_code', 'month', 'sex'], as_index = False )[['empl']].sum() \
        .pivot( index = [ 'dpt_code', 'month' ] , columns = 'sex',values='empl') \
        .reset_index()

df4[ 'prop_wom' ] = ( df4.Mujer * 100 / df4.Hombre )

df5 = dpt_shp.merge( df4, left_on = 'CCDD', right_on = 'dpt_code'  )

idx = 0
for i in range( 4 ):
    for j in range ( 3 ):
        
        
        month = df5.month.unique()[ idx ]
        
        df6 = df5[ df5.month == month ]
        
        idx = idx + 1
        
df6.loc[ (df6.NOMBDEP == 'LIMA'), 'prop_wom' ] = np.nan

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)


In [None]:
# Women Proportion

# Import necessary libraries
import folium as fm
from folium import Choropleth

# Set the center of the map
center = [-10.1900, -75.0152]

# Create a Folium map with specified parameters
m1 = fm.Map(location=center, tiles="cartodbpositron", zoom_start=6, control_scale=True)

# Define a scale for the choropleth map based on the minimum, mean, and maximum values of the 'prop_wom' column in the DataFrame 'df6'
scale1 = [0, df6['prop_wom'].min(), df6['prop_wom'].mean(), df6['prop_wom'].max()]

# Create a choropleth layer using the specified GeoJSON data ('df6') and DataFrame ('df6')
choropleth = Choropleth(
    geo_data=df6,
    name='choropleth',
    data=df6,
    columns=['dpt_code', 'prop_wom'],
    key_on='feature.properties.dpt_code',
    fill_color='YlOrRd',
    fill_opacity=1,
    line_opacity=0.1,
    legend_name='Women Proportion',
    nan_fill_color="white",
    threshold_scale=scale1,
    highlight=True,  # If you want to highlight the region on hover
).add_to(m1)

# Display the map
m1
