In [None]:
'''
This notebook contains the code to merge all male data across the years based on sections.
There are datasets for 2012, 2012-1.5, 2013 and 2014
The different sections that will be merged are as follows:
**2012**
1. Cover
2. Roaster
3. Section 1: Education (All men 18 and above)
4. Section 2: Agriculture
5. Section 3: Assets
6. Section 4: Consumption and Expenditure
7. Section 5: Credit
8. Section 6: Employment and Income
9. Section 7: Economic Events/Shocks
10. Section 8: Community Participation and Social Network Membership

**2013**
1. Cover
2. Roaster
3. Section 1: Education: Males 19 years and older
4. Section 2: Agriculture
5. Section 3: Assets
6. Section 4: Consumption and Expenditure
7. Section 5: Credit
8. Section 6: Employment and Income
9. Section 7: Health
10. Section 8: Political Participation and Governance

**2014**
1. Cover
2. Roaster
3. Section 1: Education: Males 19 years and older
4. Section 2: Agriculture
5. Section 3: Assets
6. Section 4: Consumption and Expenditure
7. Section 5: Credit
8. Section 6: Employment and Income
9. Section 7: Economic Events/Shocks”
10. Section 8: Participation in Social Safety Net
11. Section 9: Siblings
12. Section 10: Transfers
13. Section 11: Health and Nutrition

'''

In [2]:
#since there are 2 files for this section in multiple years, we will make a standardized single file for sectional merging

import pandas as pd

# Load the first file
df1 = pd.read_excel(r"C:\Users\warra\Desktop\Freelance\data\data\MaleMerge\8. Section 2 Part 6 PAID FARM WORK DURING R&K\2012_5_s7p1.xlsx")

# Load the second file
df2 = pd.read_excel(r"C:\Users\warra\Desktop\Freelance\data\data\MaleMerge\8. Section 2 Part 6 PAID FARM WORK DURING R&K\2012_5_s7p2.xlsx")

# Merge the two files based on a common column
merged_df = pd.concat([df1, df2], ignore_index=True)

# Drop redundant columns
merged_df.drop(merged_df.columns[merged_df.columns.str.contains('Unnamed', case=True)], axis=1, inplace=True)
merged_df.drop(merged_df.columns[merged_df.columns.str.contains(' ', case=False)], axis=1, inplace=True)
# Save the merged dataframe to a CSV file
merged_df.to_csv('2012_5_s7p1&p2.csv', index=True)

In [8]:
# This code block stores file paths to variables to make the code neat
# The stored variables are called in the read_excel function and stored as dataframes

import pandas as pd

# Store excel file locations to variables

agri_2012 = r"C:\Users\warra\Desktop\Freelance\data\data\MaleMerge\8. Section 2 Part 6 PAID FARM WORK DURING R&K\2012_s6p2_m.xlsx"
agri_2012_5 = r"C:\Users\warra\Desktop\Freelance\data\data\MaleMerge\8. Section 2 Part 6 PAID FARM WORK DURING R&K\2012_5_s7p1&p2.csv"
agri_2013 = r"C:\Users\warra\Desktop\Freelance\data\data\MaleMerge\8. Section 2 Part 6 PAID FARM WORK DURING R&K\2013_s2p7B_s2p7A.csv"
agri_2014 = r"C:\Users\warra\Desktop\Freelance\data\data\MaleMerge\8. Section 2 Part 6 PAID FARM WORK DURING R&K\2014_s2p7_s2p8.csv"

# Read excel files 
df_2012 = pd.read_excel(agri_2012)
df_2012_5 = pd.read_csv(agri_2012_5)
df_2013 = pd.read_csv(agri_2013)
df_2014 = pd.read_csv(agri_2014)


In [10]:
#This code block will be used to standardize column names across the years to avoid discrepancies during the merging process.

# Rename columns in df
df_2012.rename(columns={
    'pid': 'r_pid',
    'S6P2AQ1A': 's2p7_q2',
    'S6P2AQ1B': 's2p7_q3',
    'S6P2AQ1C': 's2p7_q4',
    'S6P2AQ2A': 's2p7_q5',
    'S6P2AQ2B': 's2p7_q6',
    'S6P2AQ2C': 's2p7_q7',
    'S6P2AQ5A': 's2p7_q8',
    'S6P2AQ5B': 's2p7_q9',
    'S6P2AQ5C': 's2p7_q10',
    'S6P2AQ3A': 's2p7_q11',
    'S6P2AQ3B': 's2p7_q12',
    'S6P2AQ3C': 's2p7_q13',
    'S6P2AQ4A': 's2p7_q14',
    'S6P2AQ4B': 's2p7_q15',
    'S6P2AQ4C': 's2p7_q16',
    'S6P2AQ6A': 's2p7_q17',
    'S6P2AQ6B': 's2p7_q18',
    'S6P2AQ6C': 's2p7_q19',
    'S6P2AQ8A': 's2p7_q20',
    'S6P2AQ8B': 's2p7_q21',
    'S6P2AQ8C': 's2p7_q22',
    'S6P2AQ7A': 's2p7_q29',
    'S6P2AQ7B': 's2p7_q30',
    'S6P2AQ7C': 's2p7_q31',
    'S6P2BQ1A': 's2p8_q2',
    'S6P2BQ1B': 's2p8_q3',
    'S6P2BQ1C': 's2p8_q4',
    'S6P2BQ2A': 's2p8_q5',
    'S6P2BQ2B': 's2p8_q6',
    'S6P2BQ2C': 's2p8_q7',
    'S6P2BQ5A': 's2p8_q8',
    'S6P2BQ5B': 's2p8_q9',
    'S6P2BQ5C': 's2p8_q10',
    'S6P2BQ3A': 's2p8_q11',
    'S6P2BQ3B': 's2p8_q12',
    'S6P2BQ3C': 's2p8_q13',
    'S6P2BQ4A': 's2p8_q14',
    'S6P2BQ4B': 's2p8_q15',
    'S6P2BQ4C': 's2p8_q16',
    'S6P2BQ6A': 's2p8_q17',
    'S6P2BQ6B': 's2p8_q18',
    'S6P2BQ6C': 's2p8_q19',
    'S6P2BQ8A': 's2p8_q20',
    'S6P2BQ8B': 's2p8_q21',
    'S6P2BQ8C': 's2p8_q22',
    'S6P2BQ7A': 's2p8_q29',
    'S6P2BQ7B': 's2p8_q30',
    'S6P2BQ7C': 's2p8_q31',
    'S6P2AQ9': 'K_Farm_Loc',
    'S6P2AQ10': 'K_Farm_Loc_Dist',
    'PROVINCE_ID': 'P_ID',
    'DISTRICT_ID': 'D_ID',
    'TEHSIL_ID': 'T_ID',
    'UC_ID': 'UC_ID',
    'MAUZA_ID': 'M_ID',
    'S6P2CQ1A': 'Lvstk_Care_HpD',
    'S6P2CQ1B': 'Lvstk_Care_TD',
    'S6P2CQ1C': 'Lvstk_Care_WpD',
    'S6P2CQ2A': 'Lvstk_Prod_HpD',
    'S6P2CQ2B': 'Lvstk_Prod_TD',
    'S6P2CQ2C': 'Lvstk_Prod_WpD',
    'S6P2CQ3': 'Lvstk_Farm_Loc',
    'S6P2CQ4': 'Lvstk_Farm_Loc_Dist'
}, inplace=True)

df_2012_5.rename(columns={
    'Round': 'round',
    'PID1': 'r_pid',
    'S7P1Q1A': 's2p7_q2',
    'S7P1Q1B': 's2p7_q3',
    'S7P1Q1C': 's2p7_q4',
    'S7P1Q2A': 's2p7_q5',
    'S7P1Q2B': 's2p7_q6',
    'S7P1Q2C': 's2p7_q7',
    'S7P1Q3A': 's2p7_q8',
    'S7P1Q3B': 's2p7_q9',
    'S7P1Q3C': 's2p7_q10',
    'S7P1Q4A': 's2p7_q11',
    'S7P1Q4B': 's2p7_q12',
    'S7P1Q4C': 's2p7_q13',
    'S7P1Q5A': 's2p7_q14',
    'S7P1Q5B': 's2p7_q15',
    'S7P1Q5C': 's2p7_q16',
    'S7P1Q6A': 's2p7_q17',
    'S7P1Q6B': 's2p7_q18',
    'S7P1Q6C': 's2p7_q19',
    'S7P1Q7A': 's2p7_q20',
    'S7P1Q7B': 's2p7_q21',
    'S7P1Q7C': 's2p7_q22',
    'S7P1Q8A': 's2p7_q29',
    'S7P1Q8B': 's2p7_q30',
    'S7P1Q8C': 's2p7_q31',
    'S7P1Q9': 'K_Farm_Loc',
    'S7P1Q10': 'K_Farm_Loc_Dist',
    'C_PROVINCE': 'P_ID',
    'C_DISTRICT': 'D_ID',
    'C_TEHSIL': 'T_ID',
    'C_UC': 'UC_ID',
    'C_MOUZA': 'M_ID',
    'C_HH_NUM': 'C_HH_NUM',
    'S7P2Q1A': 'Lvstk_Care_HpD',
    'S7P2Q1B': 'Lvstk_Care_TD',
    'S7P2Q1C': 'Lvstk_Care_WpD',
    'S7P2Q2A': 'Lvstk_Prod_HpD',
    'S7P2Q2B': 'Lvstk_Prod_TD',
    'S7P2Q2C': 'Lvstk_Prod_WpD',
    'S7P2Q3': 'Lvstk_Farm_Loc',
    'S7P2Q4': 'Lvstk_Farm_Loc_Dist'
    
}, inplace=True)

df_2013.rename(columns={
    's2p7a_q1_i': 's2p7_q2',
    's2p7a_q1_ii': 's2p7_q3',
    's2p7a_q1_iii': 's2p7_q4',
    's2p7a_q2_i': 's2p7_q5',
    's2p7a_q2_ii': 's2p7_q6',
    's2p7a_q2_iii': 's2p7_q7',
    's2p7a_q3_i': 's2p7_q8',
    's2p7a_q3_ii': 's2p7_q9',
    's2p7a_q3_iii': 's2p7_q10',
    's2p7a_q4_i': 's2p7_q11',
    's2p7a_q4_ii': 's2p7_q12',
    's2p7a_q4_iii': 's2p7_q13',
    's2p7a_q5_i': 's2p7_q14',
    's2p7a_q5_ii': 's2p7_q15',
    's2p7a_q5_iii': 's2p7_q16',
    's2p7a_q6_i': 's2p7_q17',
    's2p7a_q6_ii': 's2p7_q18',
    's2p7a_q6_iii': 's2p7_q19',
    's2p7a_q7_i': 's2p7_q20',
    's2p7a_q7_ii': 's2p7_q21',
    's2p7a_q7_iii': 's2p7_q22',
    's2p7a_q8_i': 's2p7_q23',
    's2p7a_q8_ii': 's2p7_q24',
    's2p7a_q8_iii': 's2p7_q25',
    's2p7a_q9_i': 's2p7_q26',
    's2p7a_q9_ii': 's2p7_q27',
    's2p7a_q9_iii': 's2p7_q28',
    's2p7a_q10_i': 's2p7_q29',
    's2p7a_q10_ii': 's2p7_q30',
    's2p7a_q10_iii': 's2p7_q31',
    's2p7b_qa': 's2p8_qa',
    's2p7b_q1_i': 's2p8_q2',
    's2p7b_q1_ii': 's2p8_q3',
    's2p7b_q1_iii': 's2p8_q4',
    's2p7b_q2_i': 's2p8_q5',
    's2p7b_q2_ii': 's2p8_q6',
    's2p7b_q2_iii': 's2p8_q7',
    's2p7b_q3_i': 's2p8_q8',
    's2p7b_q3_ii': 's2p8_q9',
    's2p7b_q3_iii': 's2p8_q10',
    's2p7b_q4_i': 's2p8_q11',
    's2p7b_q4_ii': 's2p8_q12',
    's2p7b_q4_iii': 's2p8_q13',
    's2p7b_q5_i': 's2p8_q14',
    's2p7b_q5_ii': 's2p8_q15',
    's2p7b_q5_iii': 's2p8_q16',
    's2p7b_q6_i': 's2p8_q17',
    's2p7b_q6_ii': 's2p8_q18',
    's2p7b_q6_iii': 's2p8_q19',
    's2p7b_q7_i': 's2p8_q20',
    's2p7b_q7_ii': 's2p8_q21',
    's2p7b_q7_iii': 's2p8_q22',
    's2p7b_q8_i': 's2p8_q23',
    's2p7b_q8_ii': 's2p8_q24',
    's2p7b_q8_iii': 's2p8_q25',
    's2p7b_q9_i': 's2p8_q26',
    's2p7b_q9_ii': 's2p8_q27',
    's2p7b_q9_iii': 's2p8_q28',
    's2p7b_q10_i': 's2p8_q29',
    's2p7b_q10_ii': 's2p8_q30',
    's2p7b_q10_iii': 's2p8_q31'
}, inplace=True)


# df_2014 doesn't need renaming as it is the reference

In [17]:
# Define the column mappings based on the provided positions
# Make dictionaries for each year with updated names
# replace with 'None' where there are no columns
#Here are the updated mapping lists for the given datasets:

mapping_2012= [
    'hid', 'round', None, 'r_pid', None, 's2p7_q2', 's2p7_q3', 's2p7_q4',
    's2p7_q5', 's2p7_q6', 's2p7_q7', 's2p7_q8', 's2p7_q9', 's2p7_q10', 's2p7_q11', 
    's2p7_q12', 's2p7_q13', 's2p7_q14', 's2p7_q15', 's2p7_q16', 's2p7_q17', 's2p7_q18', 
    's2p7_q19', 's2p7_q20', 's2p7_q21', 's2p7_q22', None, None, None, None, None, None,
    's2p7_q29', 's2p7_q30', 's2p7_q31', None, None, None, None, 's2p8_q2', 's2p8_q3',
    's2p8_q4', 's2p8_q5', 's2p8_q6', 's2p8_q7', 's2p8_q8', 's2p8_q9', 's2p8_q10', 's2p8_q11',
    's2p8_q12', 's2p8_q13', 's2p8_q14', 's2p8_q15', 's2p8_q16', 's2p8_q17', 's2p8_q18', 's2p8_q19', 
    's2p8_q20', 's2p8_q21', 's2p8_q22', None, None, None, None, None, None, 's2p8_q29', 's2p8_q30', 
    's2p8_q31', None, None, 'K_Farm_Loc', 'K_Farm_Loc_Dist', 'P_ID', 'D_ID', 'T_ID', 'UC_ID', 'M_ID', 
    None, 'Lvstk_Care_HpD', 'Lvstk_Care_TD', 'Lvstk_Care_WpD', 'Lvstk_Prod_HpD', 'Lvstk_Prod_TD', 
    'Lvstk_Prod_WpD','Lvstk_Farm_Loc', 'Lvstk_Farm_Loc_Dist'
]

mapping_2012_5=[
    'hid', 'round', None, 'r_pid', None, 's2p7_q2', 's2p7_q3', 's2p7_q4', 's2p7_q5', 's2p7_q6', 's2p7_q7',
's2p7_q8', 's2p7_q9', 's2p7_q10', 's2p7_q11', 's2p7_q12', 's2p7_q13', 's2p7_q14', 's2p7_q15', 's2p7_q16',
's2p7_q17', 's2p7_q18', 's2p7_q19', 's2p7_q20', 's2p7_q21', 's2p7_q22', None, None, None, None, None, None,
's2p7_q29', 's2p7_q30', 's2p7_q31', None, None, None, None, None, None, None, None, None, None, None, None,
None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None,
None, None, None, None, None, None, 'K_Farm_Loc', 'K_Farm_Loc_Dist', 'P_ID', 'D_ID', 'T_ID', 'UC_ID', 'M_ID', 'C_HH_NUM',
'Lvstk_Care_HpD', 'Lvstk_Care_TD', 'Lvstk_Care_WpD', 'Lvstk_Prod_HpD', 'Lvstk_Prod_TD', 'Lvstk_Prod_WpD', 'Lvstk_Farm_Loc', 'Lvstk_Farm_Loc_Dist'
]

mapping_2013 = [
    "hid", "round", None, "r_pid", None, 
    "s2p7_q2", "s2p7_q3", "s2p7_q4", 
    "s2p7_q5", "s2p7_q6", "s2p7_q7", 
    "s2p7_q8", "s2p7_q9", "s2p7_q10", 
    "s2p7_q11", "s2p7_q12", "s2p7_q13", 
    "s2p7_q14", "s2p7_q15", "s2p7_q16", 
    "s2p7_q17", "s2p7_q18", "s2p7_q19", 
    "s2p7_q20", "s2p7_q21", "s2p7_q22", 
    "s2p7_q23", "s2p7_q24", "s2p7_q25", 
    "s2p7_q26", "s2p7_q27", "s2p7_q28", 
    "s2p7_q29", "s2p7_q30", "s2p7_q31", 
    None, None, "s2p8_qa", None, 
    "s2p8_q2", "s2p8_q3", "s2p8_q4", 
    "s2p8_q5", "s2p8_q6", "s2p8_q7", 
    "s2p8_q8", "s2p8_q9", "s2p8_q10", 
    "s2p8_q11", "s2p8_q12", "s2p8_q13", 
    "s2p8_q14", "s2p8_q15", "s2p8_q16", 
    "s2p8_q17", "s2p8_q18", "s2p8_q19", 
    "s2p8_q20", "s2p8_q21", "s2p8_q22", 
    "s2p8_q23", "s2p8_q24", "s2p8_q25", 
    "s2p8_q26", "s2p8_q27", "s2p8_q28", 
    "s2p8_q29", "s2p8_q30", "s2p8_q31", 
    None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None
]


mapping_2014 = [
    "hid", "round", "s2p7_qa", "r_pid", 
    "s2p7_q1", "s2p7_q2", "s2p7_q3", "s2p7_q4", "s2p7_q5", "s2p7_q6", 
    "s2p7_q7", "s2p7_q8", "s2p7_q9", "s2p7_q10", "s2p7_q11", "s2p7_q12", 
    "s2p7_q13", "s2p7_q14", "s2p7_q15", "s2p7_q16", "s2p7_q17", "s2p7_q18", 
    "s2p7_q19", "s2p7_q20", "s2p7_q21", "s2p7_q22", "s2p7_q23", "s2p7_q24", 
    "s2p7_q25", "s2p7_q26", "s2p7_q27", "s2p7_q28", "s2p7_q29", "s2p7_q30", 
    "s2p7_q31", "s2p7_q32", "s2p7_q33", "s2p8_qa", "s2p8_q1", "s2p8_q2", 
    "s2p8_q3", "s2p8_q4", "s2p8_q5", "s2p8_q6", "s2p8_q7", "s2p8_q8", 
    "s2p8_q9", "s2p8_q10", "s2p8_q11", "s2p8_q12", "s2p8_q13", "s2p8_q14", 
    "s2p8_q15", "s2p8_q16", "s2p8_q17", "s2p8_q18", "s2p8_q19", "s2p8_q20", 
    "s2p8_q21", "s2p8_q22", "s2p8_q23", "s2p8_q24", "s2p8_q25", "s2p8_q26", 
    "s2p8_q27", "s2p8_q28", "s2p8_q29", "s2p8_q30", "s2p8_q31", "s2p8_q32", 
    "s2p8_q33", None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None
]



In [18]:
# Create a list of all possible columns in the correct order
all_columns = []

for col in mapping_2012:
    if col and col not in all_columns:
        all_columns.append(col)
for col in mapping_2012_5:
    if col and col not in all_columns:
        all_columns.append(col)
for col in mapping_2013:
    if col and col not in all_columns:
        all_columns.append(col)
for col in mapping_2014:
    if col and col not in all_columns:
        all_columns.append(col)



In [21]:
def standardize_and_merge(dfs, mappings, all_columns):
    merged_data = {col: [] for col in all_columns}

    for df, mapping in zip(dfs, mappings):
        print(f"Processing DataFrame with columns: {df.columns.tolist()}")
        for i, col in enumerate(mapping):
            if col:
                ref_col = col.strip()  # Remove leading/trailing whitespace
                if ref_col not in merged_data:
                    merged_data[ref_col] = []
                if ref_col in df.columns:
                    print(f"Appending data for column {ref_col}")
                    if isinstance(df[ref_col], pd.Series):
                        merged_data[ref_col].extend(df[ref_col].tolist())
                    elif isinstance(df[ref_col], pd.DataFrame):
                        print(f"Column {ref_col} is duplicated in DataFrame. Appending data for each duplicate.")
                        for _, series in df[ref_col].items():
                            merged_data[ref_col].extend(series.tolist())
                else:
                    print(f"Column {ref_col} not found in DataFrame. Adding NaNs.")
                    merged_data[ref_col].extend([np.nan] * len(df))
    
    max_len = max(len(v) for v in merged_data.values())
    for key in merged_data:
        col_len = len(merged_data[key])
        if col_len < max_len:
            merged_data[key].extend([np.nan] * (max_len - col_len))

    merged_df = pd.DataFrame.from_dict(merged_data)
    return merged_df


In [22]:
# Usage with dataframes and mappings
dfs = [df_2012, df_2012_5, df_2013, df_2014]
mappings = [mapping_2012, mapping_2012_5, mapping_2013, mapping_2014]

merged_df = standardize_and_merge(dfs, mappings, all_columns)

Processing DataFrame with columns: ['Unnamed: 0', 'hid', 'round', 'r_pid', 's2p7_q2', 's2p7_q3', 's2p7_q4', 's2p7_q5', 's2p7_q6', 's2p7_q7', 's2p7_q11', 's2p7_q12', 's2p7_q13', 's2p7_q14', 's2p7_q15', 's2p7_q16', 's2p7_q8', 's2p7_q9', 's2p7_q10', 's2p7_q17', 's2p7_q18', 's2p7_q19', 's2p7_q29', 's2p7_q30', 's2p7_q31', 's2p7_q20', 's2p7_q21', 's2p7_q22', 'K_Farm_Loc', 'K_Farm_Loc_Dist', 'Lvstk_Farm_Loc_Dist', 's2p8_q3', 's2p8_q4', 's2p8_q5', 's2p8_q6', 's2p8_q7', 's2p8_q11', 's2p8_q12', 's2p8_q13', 's2p8_q14', 's2p8_q15', 's2p8_q16', 's2p8_q8', 's2p8_q9', 's2p8_q10', 's2p8_q17', 's2p8_q18', 's2p8_q19', 's2p8_q29', 's2p8_q30', 's2p8_q31', 's2p8_q20', 's2p8_q21', 's2p8_q22', 'S6P2BQ9', 'S6P2BQ10', 'Lvstk_Care_HpD', 'Lvstk_Care_TD', 'Lvstk_Care_WpD', 'Lvstk_Prod_HpD', 'Lvstk_Prod_TD', 'Lvstk_Prod_WpD', 'Lvstk_Farm_Loc', 'Lvstk_Farm_Loc_Dist', 'P_ID', 'D_ID', 'T_ID', 'UC_ID', 'M_ID']
Appending data for column hid
Appending data for column round
Appending data for column r_pid
Appending data 

In [23]:
# Rename columns for the merged file (if needed)
rename_mapping = {
    'hid': 'HID',
    'round': 'Survey_Round',
    'r_pid': 'PID',
    's2p7_qa': 'R_PFA',
    's2p7_q1': 'R_Status_Emp',
    's2p7_q2': 'R_LandPrep_HpD',
    's2p7_q3': 'R_LandPrep_TD',
    's2p7_q4': 'R_LandPrep_WpD',
    's2p7_q5': 'R_Sowing_HpD',
    's2p7_q6': 'R_Sowing_TD',
    's2p7_q7': 'R_Sowing_TWpD',
    's2p7_q8': 'R_Irr_HpD',
    's2p7_q9': 'R_Irr_TD',
    's2p7_q10': 'R_Irr_WpD',
    's2p7_q11': 'R_FertiApp_Hpd',
    's2p7_q12': 'R_FertiApp_TD',
    's2p7_q13': 'R_FertiApp_WpD',
    's2p7_q14': 'R_PestiApp_Hpd',
    's2p7_q15': 'R_PestiApp_TD',
    's2p7_q16': 'R_PestiApp_WpD',
    's2p7_q17': 'R_Weeding_Hpd',
    's2p7_q18': 'R_Weeding_TD',
    's2p7_q19': 'R_Weeding_WpD',
    's2p7_q20': 'R_HPS_Hpd',
    's2p7_q21': 'R_HPS_TD',
    's2p7_q22': 'R_HPS_WpD',
    's2p7_q23': 'R_Thresh_Hpd',
    's2p7_q24': 'R_Thresh_TD',
    's2p7_q25': 'R_Thresh_WpD',
    's2p7_q26': 'R_TnS_Hpd',
    's2p7_q27': 'R_TnS_TD',
    's2p7_q28': 'R_TnS_WpD',
    's2p7_q29': 'R_Prune_Hpd',
    's2p7_q30': 'R_Prune_TD',
    's2p7_q31': 'R_Prune_WpD',
    's2p7_q32': 'R_Income_TD',
    's2p7_q33': 'R_Wage_T',
    's2p8_qa': 'K_PFA',
    's2p8_q1': 'K_Status_Emp',
    's2p8_q2': 'K_LandPrep_HpD',
    's2p8_q3': 'K_LandPrep_TD',
    's2p8_q4': 'K_LandPrep_WpD',
    's2p8_q5': 'K_Sowing_HpD',
    's2p8_q6': 'K_Sowing_TD',
    's2p8_q7': 'K_Sowing_TWpD',
    's2p8_q8': 'K_Irr_HpD',
    's2p8_q9': 'K_Irr_TD',
    's2p8_q10': 'K_Irr_WpD',
    's2p8_q11': 'K_FertiApp_Hpd',
    's2p8_q12': 'K_FertiApp_TD',
    's2p8_q13': 'K_FertiApp_WpD',
    's2p8_q14': 'K_PestiApp_Hpd',
    's2p8_q15': 'K_PestiApp_TD',
    's2p8_q16': 'K_PestiApp_WpD',
    's2p8_q17': 'K_Weeding_Hpd',
    's2p8_q18': 'K_Weeding_TD',
    's2p8_q19': 'K_Weeding_WpD',
    's2p8_q20': 'K_HPS_Hpd',
    's2p8_q21': 'K_HPS_TD',
    's2p8_q22': 'K_HPS_WpD',
    's2p8_q23': 'K_Thresh_Hpd',
    's2p8_q24': 'K_Thresh_TD',
    's2p8_q25': 'K_Thresh_WpD',
    's2p8_q26': 'K_TnS_Hpd',
    's2p8_q27': 'K_TnS_TD',
    's2p8_q28': 'K_TnS_WpD',
    's2p8_q29': 'K_Prune_Hpd',
    's2p8_q30': 'K_Prune_TD',
    's2p8_q31': 'K_Prune_WpD',
    's2p8_q32': 'K_Income_TD',
    's2p8_q33': 'K_Wage_T'
}

merged_df.rename(columns=rename_mapping, inplace=True)

# Save the merged dataframe to a CSV file
merged_df.to_csv('merged_Section_2_part_6.csv', index=False)

