In [44]:
##### Cleans India labor data
# changes units and reformats

import os
import pandas as pd

In [45]:
##### Load data

# Get the current working directory
cd = os.path.dirname(os.getcwd())

# Import data
labor = pd.read_csv(f"{cd}/Data/Raw/Sub_National/India/ICRISAT-District Level Data.csv")

IND_codes = pd.read_csv(f"{cd}/Data/Correspondence_tables/IND_districts.csv")

# Set save path
save_path = f"{cd}/Data/Clean/Labor/IND_districts_labor_census.csv"

In [46]:
##### Clean

# convert to total jobs
labor['ag_workers'] = labor['AGRICULTURAL WORKERS POPULATION (1000 Number)'] * 1000

# combine with ID's
labor = labor.merge(IND_codes, left_on=['Dist Code', 'State Code'], right_on=['DIST_CODE', 'STATE_CODE'], how='inner')

# reformat to wide 
labor_wide = labor.pivot(
    index='GID_2',
    columns='Year',
    values='ag_workers'  
).reset_index()

# add units
labor_wide['Units'] = 'Ag labor - jobs'

# remerge with state and district ID
labor_wide = labor_wide.merge(IND_codes, on='GID_2', how='inner')

# convert type
labor_wide['STATE_CODE'] = labor_wide['STATE_CODE'].astype('Int64')
labor_wide['DIST_CODE'] = labor_wide['DIST_CODE'].astype('Int64')

# reorder columns
columns_to_keep = ['GID_2', 'STATE_CODE', 'DIST_CODE', 'Units', 2001, 2011]
labor_wide = labor_wide[columns_to_keep]

In [47]:
##### Save cleaned data
labor_wide.to_csv(save_path, index=False)