# Henry Salgado
# 08.10.2024

This notebook is used to explore missing data in constructs. 

In [2]:
import pandas as pd
import numpy as np
data = pd.read_excel("hsi_stem_instructional.xlsx")

non_response_codes = [96, 97, 98, 99]


# Replace the non-response codes with NaN to mark them as missing values.
# This makes it easier to handle missing data during analysis.
data.replace(non_response_codes, np.nan, inplace=True)

In [3]:
import numpy as np


# Dictionary of constructs with their corresponding items
constructs = {
    'Chair_Support': ['q185h', 'q185i', 'q185j', 'q185k', 'q185l'],
    'Department_feedback_recog': ['q215a', 'q215d','q215i','q215l'],
    'Work_life_bal':['q200a','q200b','q200c','q200d'],
    'Inst_support': ['q90a','q90c', 'q90f','q90g'],
    'health_retirement': ['q95a', 'q95c', 'q200a'],

    'Relatedness': ['q205c', 'q210c','q212a', ],
    'Volition_Autonomy': ['q70c', 'q70h'],
    'Competence': ['q45a', 'q45c','q45e'],

    
    'Teaching_and_Service_Satisfaction': ['q70a', 'q70b', 'q70d', 'q70e','q70f', 'q70h', 'q70j', 'q70k',  ],
    'Global_Satisfaction': [ 'q250a', 'q250b'],
}


# Function to calculate the average missingness for each construct
def calculate_average_missingness(construct, items):
    missing_values = data[items].isna().mean()
    average_missingness = missing_values.mean()
    return average_missingness

# Calculate and print the average missingness for each construct
average_missingness_results = {construct: calculate_average_missingness(construct, items) for construct, items in constructs.items()}

# Displaying the average missingness for each construct
for construct, average_missingness in average_missingness_results.items():
    print(f"{construct}: {average_missingness:.2%}")

def print_item_and_average_missingness(construct):
    if construct in constructs:
        items = constructs[construct]
        missing_values = data[items].isna().mean() * 100
        print(f"Missingness for {construct}:")
        for item, missingness in missing_values.items():
            print(f"  {item}: {missingness:.2f}%")
        average_missingness = missing_values.mean()
        print(f"Average missingness for {construct}: {average_missingness:.2f}%\n")
    else:
        print(f"Construct '{construct}' not found.")

# Example: Print item-wise and average missingness for a specific construct
print_item_and_average_missingness('Teaching_and_Service_Satisfaction')

num_participants = data.shape[0]
print(f"Number of participants in the sample: {num_participants}")


Chair_Support: 15.92%
Department_feedback_recog: 16.86%
Work_life_bal: 20.85%
Inst_support: 6.23%
health_retirement: 12.39%
Relatedness: 14.56%
Volition_Autonomy: 7.57%
Competence: 7.72%
Teaching_and_Service_Satisfaction: 10.47%
Global_Satisfaction: 14.01%
Missingness for Teaching_and_Service_Satisfaction:
  q70a: 6.28%
  q70b: 6.39%
  q70d: 6.39%
  q70e: 7.00%
  q70f: 8.14%
  q70h: 8.55%
  q70j: 17.10%
  q70k: 23.89%
Average missingness for Teaching_and_Service_Satisfaction: 10.47%

Number of participants in the sample: 971
