#### Calculating the percent share of agriculture in total employment per province for December 2022

###### Data Source: PSA Labor Survey December 2022

In [None]:
import pandas as pd
# import numpy as np

##### Reading CSV file as DF

In [None]:
fp = r"LFS PUF December 2022.CSV"

df = pd.read_csv(fp, usecols=['PUFREG', 'PUFC09_WORK', 'PUFC11A_PROVMUN', 'PUFC15_PKB'],
                 na_values=[' ', '  ', '   ', '    ', '     ', '      '])
df.sample(5)

##### Check column data types & DF shape

In [None]:
df.dtypes

In [None]:
df.shape

##### Save to a new DF only rows where work indicator = 1

In [None]:
work_df = df.loc[df['PUFC09_WORK'] == 1]
work_df.sample(5)

##### Rename columns and drop work indicator column

In [None]:
col_names = {
    'PUFREG': 'REGION',
    'PUFC11A_PROVMUN': 'PROV_MUN',
    'PUFC15_PKB': 'INDUSTRY'
}

work_df = work_df.rename(columns=col_names)[['REGION', 'PROV_MUN', 'INDUSTRY']]
work_df.sample(5)

##### Parse province out of prov-muni column

In [None]:
# Remove decimal place from prov-muni by converting to int
work_df['PROV_MUN'] = work_df['PROV_MUN'].astype(int)

# Convert to string, pad w/ leading zeroes up to 4 chars,
# then slice out first 2 chars as province code
work_df['PROVINCE'] = work_df['PROV_MUN'].astype(str)\
                      .str.zfill(4).str.slice(0,2)

work_df.sample(5)

##### Group DF by province code

In [None]:
by_prov = work_df.groupby('PROVINCE')
len(by_prov)

##### Calculate percent share of agriculture in total employment per province

In [None]:
# Create empty dataframe for aggregated values
agshare_by_prov = pd.DataFrame()

# Iterate over each province
for key, group in by_prov:

    # Create empty series in which to store each calculation
    c = pd.Series()

    # Add region code to series as well
    c['REGION'] = group['REGION'].median().astype(int)

    # Add key (i.e. province code) into series of values
    c['PROVINCE'] = key

    # Count total number of employed (aka length of group)
    c['TOTAL_EMPLOYED'] = group['INDUSTRY'].count()

    # Count number of people employed in agriculture
    c['AGRI_EMPLOYED'] = group.loc[group['INDUSTRY'] < 4]['INDUSTRY'].count()

    # Calculate ratio between agri & total employment
    # and round off values to 2 decimal places
    c['PERCENT_AGRI'] = c['AGRI_EMPLOYED'] / c['TOTAL_EMPLOYED'] * 100
    c['PERCENT_AGRI'] = round(c['PERCENT_AGRI'], 2)

    # Convert series into dataframe and transpose into a row
    row = c.to_frame().transpose()

    # Concatenate new row into agshare_by_prov dataframe
    agshare_by_prov = pd.concat([agshare_by_prov, row], ignore_index=True)

agshare_by_prov = agshare_by_prov.set_index('PROVINCE')
agshare_by_prov

In [None]:
agshare_by_prov.dtypes

In [None]:
agshare_by_prov[['PERCENT_AGRI']]\
        .sort_values(by='PERCENT_AGRI')\
        .plot(kind='barh', linewidth=2, width=0.5, figsize=(7, 21))

In [None]:
agshare_by_prov.to_csv(r"Agri Share by Province - LFS Dec 2022.csv")