In [None]:
'''
This notebook contains the code to merge all male data across the years based on sections.
There are datasets for 2012, 2012-1.5, 2013 and 2014
The different sections that will be merged are as follows:
**2012**
1. Cover
2. Roaster
3. Section 1: Education (All men 18 and above)
4. Section 2: Agriculture
5. Section 3: Assets
6. Section 4: Consumption and Expenditure
7. Section 5: Credit
8. Section 6: Employment and Income
9. Section 7: Economic Events/Shocks
10. Section 8: Community Participation and Social Network Membership

**2013**
1. Cover
2. Roaster
3. Section 1: Education: Males 19 years and older
4. Section 2: Agriculture
5. Section 3: Assets
6. Section 4: Consumption and Expenditure
7. Section 5: Credit
8. Section 6: Employment and Income
9. Section 7: Health
10. Section 8: Political Participation and Governance

**2014**
1. Cover
2. Roaster
3. Section 1: Education: Males 19 years and older
4. Section 2: Agriculture
5. Section 3: Assets
6. Section 4: Consumption and Expenditure
7. Section 5: Credit
8. Section 6: Employment and Income
9. Section 7: Economic Events/Shocks”
10. Section 8: Participation in Social Safety Net
11. Section 9: Siblings
12. Section 10: Transfers
13. Section 11: Health and Nutrition

'''

In [111]:
# Define the folder path where the data files are located
data_folder = r"C:\Users\warra\Desktop\Freelance\data\data\MaleMerge\2. ALL MERGED CSV Parts"

# List of all the file names to be merged
file_names = ['1. merged_roster.csv']

In [None]:
    """
The following 3 cells do the same thing but on different levels. to summarize:

    * Merges multiple CSV files based on specified merge keys.

    * Parameters:
     - data_folder (str): The folder path where the data files are located.
     - file_names (list of str): List of all the file names to be merged.
     - output_file (str): The name of the output file where merged data will be saved.
     - merge_keys (list of str): List of keys to merge the data on.

    * Returns:
     - None
    """

In [112]:
#'HID', 'PID' Level

import pandas as pd
import os

# Initialize an empty DataFrame to hold the merged data
merged_df = pd.DataFrame()

# Iterate over each file in the list, read it, and concatenate it to the merged DataFrame
for file_name in file_names:
    file_path = os.path.join(data_folder, file_name)  # Create the full file path
    print(f"Looking for file: {file_path}")  # Debug print
    if os.path.exists(file_path):
        print(f"File found: {file_path}")  # Debug print
        df = pd.read_csv(file_path, dtype=str)  # Read the CSV file with all columns as strings to handle mixed data types
        merged_df = pd.concat([merged_df, df], ignore_index=True)  # Concatenate the current file's data to the merged DataFrame
    else:
        print(f"File not found: {file_path}")  # Debug print

# Proceed only if merged_df is not empty
if not merged_df.empty:
    # Define the merge keys
    merge_keys = ['HID', 'PID']
    # Filter out merge keys that are not in the DataFrame
    available_merge_keys = [key for key in merge_keys if key in merged_df.columns]

    # Reorder columns to have available merge keys first and then the rest alphabetically
    remaining_columns = [col for col in merged_df.columns if col not in available_merge_keys]  # Get columns excluding available merge keys
    # Sort the remaining columns alphabetically; put columns starting with digits at the end
    sorted_columns = sorted(remaining_columns, key=lambda x: (x[0].isdigit(), x))

    # Define the final column order with available merge keys first
    column_order = available_merge_keys + sorted_columns
    # Reorder the DataFrame columns according to the defined order
    merged_df = merged_df[column_order]

    # Remove columns that are entirely empty
    merged_df.dropna(axis=1, how='all', inplace=True)
    # Remove rows that are entirely empty
    merged_df.dropna(axis=0, how='all', inplace=True)

    # Save the merged and processed data to a new CSV file
    merged_df.to_csv('1. Roaster_Household-HouseholdMember_Level.csv', index=False)
    print("Merged data saved successfully!")
else:
    print("No data to merge and save.")

Looking for file: C:\Users\warra\Desktop\Freelance\data\data\MaleMerge\2. ALL MERGED CSV Parts\1. merged_roster.csv
File found: C:\Users\warra\Desktop\Freelance\data\data\MaleMerge\2. ALL MERGED CSV Parts\1. merged_roster.csv
Merged data saved successfully!


In [89]:
#'HID', 'PID', 'Plt_ID', 'Crop_ID', 'Season' Level

import pandas as pd
import os

# Initialize an empty DataFrame to hold the merged data
merged_df = pd.DataFrame()

# Iterate over each file in the list, read it, and concatenate it to the merged DataFrame
for file_name in file_names:
    file_path = os.path.join(data_folder, file_name)  # Create the full file path
    print(f"Looking for file: {file_path}")  # Debug print
    if os.path.exists(file_path):
        print(f"File found: {file_path}")  # Debug print
        df = pd.read_csv(file_path, dtype=str)  # Read the CSV file with all columns as strings to handle mixed data types
        merged_df = pd.concat([merged_df, df], ignore_index=True)  # Concatenate the current file's data to the merged DataFrame
    else:
        print(f"File not found: {file_path}")  # Debug print

# Proceed only if merged_df is not empty
if not merged_df.empty:
    # Define the merge keys
    merge_keys = ['HID', 'PID', 'Plt_ID', 'Crop_ID', 'Season']
    # Filter out merge keys that are not in the DataFrame
    available_merge_keys = [key for key in merge_keys if key in merged_df.columns]

    # Reorder columns to have available merge keys first and then the rest alphabetically
    remaining_columns = [col for col in merged_df.columns if col not in available_merge_keys]  # Get columns excluding available merge keys
    # Sort the remaining columns alphabetically; put columns starting with digits at the end
    sorted_columns = sorted(remaining_columns, key=lambda x: (x[0].isdigit(), x))

    # Define the final column order with available merge keys first
    column_order = available_merge_keys + sorted_columns
    # Reorder the DataFrame columns according to the defined order
    merged_df = merged_df[column_order]

    # Remove columns that are entirely empty
    merged_df.dropna(axis=1, how='all', inplace=True)
    # Remove rows that are entirely empty
    merged_df.dropna(axis=0, how='all', inplace=True)

    # Save the merged and processed data to a new CSV file
    merged_df.to_csv('8.1. EconomicEventsAndShocks_Household-HouseholdMember-PlotID-CropID-Season_Level.csv', index=False)
    print("Merged data saved successfully!")
else:
    print("No data to merge and save.")

Looking for file: C:\Users\warra\Desktop\Freelance\data\data\MaleMerge\2. ALL MERGED CSV Parts\8. Economic events and Shocks\1. merged_EEnS_Negative Economic Event.csv
File found: C:\Users\warra\Desktop\Freelance\data\data\MaleMerge\2. ALL MERGED CSV Parts\8. Economic events and Shocks\1. merged_EEnS_Negative Economic Event.csv
Looking for file: C:\Users\warra\Desktop\Freelance\data\data\MaleMerge\2. ALL MERGED CSV Parts\8. Economic events and Shocks\2. merged_EEnS_Positive Economic Event.csv
File found: C:\Users\warra\Desktop\Freelance\data\data\MaleMerge\2. ALL MERGED CSV Parts\8. Economic events and Shocks\2. merged_EEnS_Positive Economic Event.csv
Looking for file: C:\Users\warra\Desktop\Freelance\data\data\MaleMerge\2. ALL MERGED CSV Parts\8. Economic events and Shocks\3. merged_EEnS_Floods - Overview.csv
File found: C:\Users\warra\Desktop\Freelance\data\data\MaleMerge\2. ALL MERGED CSV Parts\8. Economic events and Shocks\3. merged_EEnS_Floods - Overview.csv
Looking for file: C:\U

In [90]:
#'HID', 'Season', 'Shock_ID' Level

import pandas as pd
import os

# Initialize an empty DataFrame to hold the merged data
merged_df = pd.DataFrame()

# Iterate over each file in the list, read it, and concatenate it to the merged DataFrame
for file_name in file_names:
    file_path = os.path.join(data_folder, file_name)  # Create the full file path
    print(f"Looking for file: {file_path}")  # Debug print
    if os.path.exists(file_path):
        print(f"File found: {file_path}")  # Debug print
        df = pd.read_csv(file_path, dtype=str)  # Read the CSV file with all columns as strings to handle mixed data types
        merged_df = pd.concat([merged_df, df], ignore_index=True)  # Concatenate the current file's data to the merged DataFrame
    else:
        print(f"File not found: {file_path}")  # Debug print

# Proceed only if merged_df is not empty
if not merged_df.empty:
    # Define the merge keys
    merge_keys = ['HID', 'Season', 'Shock_ID']
    # Filter out merge keys that are not in the DataFrame
    available_merge_keys = [key for key in merge_keys if key in merged_df.columns]

    # Reorder columns to have available merge keys first and then the rest alphabetically
    remaining_columns = [col for col in merged_df.columns if col not in available_merge_keys]  # Get columns excluding available merge keys
    # Sort the remaining columns alphabetically; put columns starting with digits at the end
    sorted_columns = sorted(remaining_columns, key=lambda x: (x[0].isdigit(), x))

    # Define the final column order with available merge keys first
    column_order = available_merge_keys + sorted_columns
    # Reorder the DataFrame columns according to the defined order
    merged_df = merged_df[column_order]

    # Remove columns that are entirely empty
    merged_df.dropna(axis=1, how='all', inplace=True)
    # Remove rows that are entirely empty
    merged_df.dropna(axis=0, how='all', inplace=True)

    # Save the merged and processed data to a new CSV file
    merged_df.to_csv('8.2. EconomicEventsAndShocks_Household-Season-ShockType_Level.csv', index=False)
    print("Merged data saved successfully!")
else:
    print("No data to merge and save.")

Looking for file: C:\Users\warra\Desktop\Freelance\data\data\MaleMerge\2. ALL MERGED CSV Parts\8. Economic events and Shocks\1. merged_EEnS_Negative Economic Event.csv
File found: C:\Users\warra\Desktop\Freelance\data\data\MaleMerge\2. ALL MERGED CSV Parts\8. Economic events and Shocks\1. merged_EEnS_Negative Economic Event.csv
Looking for file: C:\Users\warra\Desktop\Freelance\data\data\MaleMerge\2. ALL MERGED CSV Parts\8. Economic events and Shocks\2. merged_EEnS_Positive Economic Event.csv
File found: C:\Users\warra\Desktop\Freelance\data\data\MaleMerge\2. ALL MERGED CSV Parts\8. Economic events and Shocks\2. merged_EEnS_Positive Economic Event.csv
Looking for file: C:\Users\warra\Desktop\Freelance\data\data\MaleMerge\2. ALL MERGED CSV Parts\8. Economic events and Shocks\3. merged_EEnS_Floods - Overview.csv
File found: C:\Users\warra\Desktop\Freelance\data\data\MaleMerge\2. ALL MERGED CSV Parts\8. Economic events and Shocks\3. merged_EEnS_Floods - Overview.csv
Looking for file: C:\U