In [1]:
import pandas as pd

In [2]:
df = pd.read_csv(r"C:\Users\ZAINAB KHAN\Desktop\PLFS Project\PLFS_Master_2017_2024_crisp.csv")

In [3]:
# To check the first five rows of the data-set
df.head()

Unnamed: 0,State/UT Code,Age,Marital Status,General Edu Level,Technical Edu Level,Current Weekly Status,Sub-sample wise Multiplier,Year
0,1,55,2.0,1.0,1.0,93,307015,2017-18
1,1,26,1.0,12.0,1.0,31,307015,2017-18
2,1,24,1.0,10.0,1.0,91,307015,2017-18
3,1,33,2.0,7.0,1.0,93,307015,2017-18
4,1,30,2.0,10.0,1.0,11,418657,2017-18


In [5]:
# All the columns in the data-set
df.columns.tolist()

['State/UT Code',
 'Age',
 'Marital Status',
 'General Edu Level',
 'Technical Edu Level',
 'Current Weekly Status',
 'Sub-sample wise Multiplier',
 'Year']

### Calculating Female Labour Force Participation across all states amongst females of the age group 15-59 (2017-18 till 2023-24)

In [7]:
#Making sure numeric columns are already numeric

numeric_cols = ["Age", "Current Weekly Status", "Sub-sample wise Multiplier"]
df[numeric_cols] = df[numeric_cols].apply(pd.to_numeric, errors='coerce')

print(df.dtypes)


State/UT Code                   int64
Age                             int64
Marital Status                float64
General Edu Level             float64
Technical Edu Level           float64
Current Weekly Status           int64
Sub-sample wise Multiplier      int64
Year                           object
dtype: object


In [11]:
df["Year_num"] = df["Year"].str[:4].astype(int)
print(df.dtypes)

State/UT Code                   int64
Age                             int64
Marital Status                float64
General Edu Level             float64
Technical Edu Level           float64
Current Weekly Status           int64
Sub-sample wise Multiplier      int64
Year                           object
Year_num                        int32
dtype: object


In [12]:
# Defining the Labour Force indicator

def in_labour_force(status):
    if 11 <= status <= 72 or status in [81, 82]:
        return 1   # In labour force (employed or unemployed)
    else:
        return 0   # Not in labour force

df["In_LF"] = df["Current Weekly Status"].apply(in_labour_force)

# Now each woman is coded as:

   # 1 → in labour force
   # 0 → not in labour force

In [13]:
# Weighted sum of women in labour force ÷ total weighted women × 100

flfp_state = (
    df.groupby(["Year_num", "State/UT Code"])
    .apply(lambda x: (
        (x["In_LF"] * x["Sub-sample wise Multiplier"]).sum()
        / x["Sub-sample wise Multiplier"].sum() * 100
    ))
    .reset_index(name="FLFP_rate_15_59")
)


In [14]:
print(flfp_state)

     Year_num  State/UT Code  FLFP_rate_15_59
0        2017              1        22.905784
1        2017              2        22.235569
2        2017              3        19.232934
3        2017              4        22.803601
4        2017              5        15.627378
..        ...            ...              ...
234      2023             32        32.383840
235      2023             33        29.012279
236      2023             34        29.399776
237      2023             35        46.397373
238      2023             36        32.098682

[239 rows x 3 columns]


In [17]:
print(flfp_state.head())

   Year_num  State/UT Code  FLFP_rate_15_59
0      2017              1        22.905784
1      2017              2        22.235569
2      2017              3        19.232934
3      2017              4        22.803601
4      2017              5        15.627378


In [18]:
print(flfp_state["Year_num"].unique())

[2017 2018 2019 2020 2021 2022 2023]


In [19]:
print(flfp_state["FLFP_rate_15_59"].describe())

count    239.000000
mean      25.149163
std        8.480930
min        6.964806
25%       19.002338
50%       25.165068
75%       30.185953
max       53.532831
Name: FLFP_rate_15_59, dtype: float64


In [20]:
# Weighted sum of women in labour force ÷ total weighted women × 100

flfp_india = (
    df.groupby(["Year_num"])
    .apply(lambda x: (
        (x["In_LF"] * x["Sub-sample wise Multiplier"]).sum()
        / x["Sub-sample wise Multiplier"].sum() * 100
    ))
    .reset_index(name="FLFP_rate_india_15_59")
)

In [23]:
flfp_india.head(8)

Unnamed: 0,Year_num,FLFP_rate_india_15_59
0,2017,20.274804
1,2018,21.215894
2,2019,22.486564
3,2020,22.37266
4,2021,22.04928
5,2022,24.569418
6,2023,26.826537


In [25]:
#state-wise and national FLFP datasets ready

flfp_state.to_csv(r"C:\Users\ZAINAB KHAN\Desktop\PLFS Project\Results\FLFP_State_2017_2024.csv", index=False)
flfp_india.to_csv(r"C:\Users\ZAINAB KHAN\Desktop\PLFS Project\Results\FLFP_India_2017_2024.csv", index=False)


### Working Population Ratio across all states 

In [26]:
# This flags women who worked or had a job during the reference week

def is_employed(status):
    if 11 <= status <= 72:
        return 1   # Employed
    else:
        return 0   # Not employed

df["Employed"] = df["Current Weekly Status"].apply(is_employed)


In [27]:
fwpr_state = (
    df.groupby(["Year_num", "State/UT Code"])
    .apply(lambda x: (
        (x["Employed"] * x["Sub-sample wise Multiplier"]).sum()
        / x["Sub-sample wise Multiplier"].sum() * 100
    ))
    .reset_index(name="FWPR_15_59")
)


In [28]:
print(fwpr_state["FWPR_15_59"].describe())


count    239.000000
mean      21.439310
std        7.499991
min        3.313475
25%       16.047049
50%       21.531392
75%       26.596488
max       48.701543
Name: FWPR_15_59, dtype: float64


In [29]:
# National-level Female WPR trend
fwpr_india = (
    df.groupby("Year_num")
    .apply(lambda x: (
        (x["Employed"] * x["Sub-sample wise Multiplier"]).sum()
        / x["Sub-sample wise Multiplier"].sum() * 100
    ))
    .reset_index(name="FWPR_India_15_59")
)

fwpr_india


Unnamed: 0,Year_num,FWPR_India_15_59
0,2017,17.616928
1,2018,18.607801
2,2019,19.532291
3,2020,19.106728
4,2021,19.625956
5,2022,22.170141
6,2023,24.461788


In [30]:
fwpr_state.to_csv(r"C:\Users\ZAINAB KHAN\Desktop\PLFS Project\Results\FWPR_State_2017_2024.csv", index=False)
fwpr_india.to_csv(r"C:\Users\ZAINAB KHAN\Desktop\PLFS Project\Results\FWPR_India_2017_2024.csv", index=False)

### Female Unemployment Rate across all states

In [31]:
def is_unemployed(status):
    if status in [81, 82]:
        return 1   # Unemployed
    else:
        return 0   # Not unemployed

df["Unemployed"] = df["Current Weekly Status"].apply(is_unemployed)


In [32]:
fur_state = (
    df.groupby(["Year_num", "State/UT Code"])
    .apply(lambda x: (
        (x["Unemployed"] * x["Sub-sample wise Multiplier"]).sum()
        / (x["In_LF"] * x["Sub-sample wise Multiplier"]).sum() * 100
    ))
    .reset_index(name="FUR_15_59")
)


In [33]:
print(fur_state["FUR_15_59"].describe())


count    239.000000
mean      14.545652
std        9.611629
min        0.131120
25%        8.570084
50%       12.388231
75%       17.661427
max       79.829830
Name: FUR_15_59, dtype: float64


In [34]:
fur_india = (
    df.groupby("Year_num")
    .apply(lambda x: (
        (x["Unemployed"] * x["Sub-sample wise Multiplier"]).sum()
        / (x["In_LF"] * x["Sub-sample wise Multiplier"]).sum() * 100
    ))
    .reset_index(name="FUR_India_15_59")
)

fur_india


Unnamed: 0,Year_num,FUR_India_15_59
0,2017,13.109255
1,2018,12.293111
2,2019,13.13795
3,2020,14.59787
4,2021,10.990491
5,2022,9.765298
6,2023,8.81496


In [35]:
fur_state.to_csv(r"C:\Users\ZAINAB KHAN\Desktop\PLFS Project\Results\FUR_State_2017_2024.csv", index=False)
fur_india.to_csv(r"C:\Users\ZAINAB KHAN\Desktop\PLFS Project\Results\FUR_India_2017_2024.csv", index=False)


### Literate Females + Part of labour force

In [36]:
df["Literate"] = df["General Edu Level"].apply(lambda x: 1 if 2 <= x <= 13 else 0)
# Now:

# Literate = 1 → literate female

# Literate = 0 → illiterate female

In [37]:
literate_lf = df[(df["Literate"] == 1) & (df["In_LF"] == 1)]
# This subset includes:

# Literate women (Edu Level 2–13)

# Who are in the labour force (employed or unemployed)

In [38]:
literate_lf_share = (
    df[df["In_LF"] == 1]
    .groupby(["Year_num", "State/UT Code"])
    .apply(lambda x: (
        (x["Literate"] * x["Sub-sample wise Multiplier"]).sum()
        / x["Sub-sample wise Multiplier"].sum() * 100
    ))
    .reset_index(name="Literate_Female_Share_in_LF_15_59")
)


In [39]:
literate_lf_india = (
    df[df["In_LF"] == 1]
    .groupby("Year_num")
    .apply(lambda x: (
        (x["Literate"] * x["Sub-sample wise Multiplier"]).sum()
        / x["Sub-sample wise Multiplier"].sum() * 100
    ))
    .reset_index(name="Literate_Female_Share_in_LF_India_15_59")
)

literate_lf_india


Unnamed: 0,Year_num,Literate_Female_Share_in_LF_India_15_59
0,2017,80.89607
1,2018,80.497049
2,2019,82.163692
3,2020,81.855917
4,2021,83.725395
5,2022,84.050195
6,2023,85.095107


In [40]:
literate_lf_share.to_csv(r"C:\Users\ZAINAB KHAN\Desktop\PLFS Project\Results\Literate_Female_Share_LF_State_2017_2024.csv", index=False)
literate_lf_india.to_csv(r"C:\Users\ZAINAB KHAN\Desktop\PLFS Project\Results\Literate_Female_Share_LF_India_2017_2024.csv", index=False)


### Literate + Not a part of labour market

In [41]:
# Literate = 1 if education level between 2 and 13
df["Literate"] = df["General Edu Level"].apply(lambda x: 1 if 2 <= x <= 13 else 0)

# In_LF = 1 if CWS is employed or unemployed
def in_labour_force(status):
    if 11 <= status <= 72 or status in [81, 82]:
        return 1
    else:
        return 0

df["In_LF"] = df["Current Weekly Status"].apply(in_labour_force)


In [42]:
literate_notlf = df[(df["Literate"] == 1) & (df["In_LF"] == 0)]


In [43]:
literate_notlf_share = (
    df[df["Literate"] == 1]
    .groupby(["Year_num", "State/UT Code"])
    .apply(lambda x: (
        ((1 - x["In_LF"]) * x["Sub-sample wise Multiplier"]).sum()
        / x["Sub-sample wise Multiplier"].sum() * 100
    ))
    .reset_index(name="Literate_Female_Share_Not_in_LF_15_59")
)


In [44]:
print(literate_notlf_share["Literate_Female_Share_Not_in_LF_15_59"].describe())


count    239.000000
mean      75.395881
std        8.630801
min       46.646604
25%       70.206596
50%       76.079746
75%       81.590232
max       93.273531
Name: Literate_Female_Share_Not_in_LF_15_59, dtype: float64


In [45]:
literate_notlf_india = (
    df[df["Literate"] == 1]
    .groupby("Year_num")
    .apply(lambda x: (
        ((1 - x["In_LF"]) * x["Sub-sample wise Multiplier"]).sum()
        / x["Sub-sample wise Multiplier"].sum() * 100
    ))
    .reset_index(name="Literate_Female_Share_Not_in_LF_India_15_59")
)

literate_notlf_india


Unnamed: 0,Year_num,Literate_Female_Share_Not_in_LF_India_15_59
0,2017,80.353865
1,2018,79.60939
2,2019,78.093258
3,2020,78.387213
4,2021,78.50705
5,2022,76.098279
6,2023,73.871457


In [46]:
literate_notlf_share.to_csv(r"C:\Users\ZAINAB KHAN\Desktop\PLFS Project\Results\Literate_Female_NotLF_State_2017_2024.csv", index=False)
literate_notlf_india.to_csv(r"C:\Users\ZAINAB KHAN\Desktop\PLFS Project\Results\Literate_Female_NotLF_India_2017_2024.csv", index=False)
