### **Step 1: Mount Google Drive and Load the Excel File**

In [13]:
from google.colab import drive
import pandas as pd

# Mount Google Drive
drive.mount('/content/drive')

# Define the file path in Google Drive
file_path = '/content/drive/My Drive/Ethics For Analytics/Survey1.xlsx'

# Load the Excel file directly
df = pd.read_excel(file_path)

df.head()

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


Unnamed: 0,ID,Start time,Completion time,Email,Name,Do you agree to participate in this survey and consent to your data being collected for research purposes?,What is your age group?,What is your gender?,What is your highest level of education?,What is your current employment status?,...,Do you believe that healthcare resources are allocated fairly in your community?,"In your opinion, which factors should be prioritized in healthcare resource allocation? (Select up to three)","During a healthcare crisis (e.g., pandemic), which group should be prioritized for care?","On a scale from 1 to 5, how much do you trust healthcare providers to make fair prioritization decisions?",What ethical principles should guide healthcare prioritization? (Select up to three),Have you or someone you know encountered barriers in accessing healthcare?,Please briefly describe the experience.,How likely are you to support changes to improve healthcare resource allocation in your community?,What would you suggest as a solution to improve fairness in healthcare resource allocation?,How do you usually receive updates regarding healthcare issues in your community?
0,6,2024-11-11 23:35:10,2024-11-11 23:35:21,anonymous,,"No, I do not agree (choose this if you do not ...",,,,,...,,,,,,,,,,
1,7,2024-11-11 23:39:07,2024-11-11 23:40:20,anonymous,,"Yes, I agree and consent.",18-24,Female,Graduate degree,Employed part-time,...,Yes,Severity of condition;,Children,4.0,Individual rights;Equity;,Yes,,,,
2,8,2024-11-11 23:37:46,2024-11-11 23:45:38,anonymous,,"Yes, I agree and consent.",45-54,Female,Prefer not to answer,Haha,...,Yes,Likelihood of recovery;,Patients with chronic conditions,3.0,Maximization of benefits;,No,,4.0,Hehe,News outlets
3,9,2024-11-17 00:29:17,2024-11-17 00:29:34,anonymous,,"No, I do not agree (choose this if you do not ...",,,,,...,,,,,,,,,,
4,10,2024-11-17 00:29:39,2024-11-17 00:31:15,anonymous,,"Yes, I agree and consent.",18-24,Male,Bachelor’s degree,Student,...,Yes,Age;Socioeconomic status;Severity of condition;,Elderly,4.0,Equity;Individual rights;Community needs;,Yes,Nothing,,,


### **Step 2: Remove Irrelevant Columns and Filter Based on Consent**

In [15]:
# Step 1: Remove irrelevant columns (retain 'ID' as the unique identifier)
columns_to_remove = ['Start time', 'Completion time', 'Email', 'Name']  # Do NOT remove 'ID'
df_cleaned = df.drop(columns=columns_to_remove)

# Step 2: Filter rows based on explicit consent
consent_column = 'Do you agree to participate in this survey and consent to your data being collected for research purposes?'
df_cleaned = df_cleaned[df_cleaned[consent_column] == 'Yes, I agree and consent.']

# Step 3: Drop the consent column as it is no longer needed
df_cleaned = df_cleaned.drop(columns=[consent_column])

df_cleaned.head()

Unnamed: 0,ID,What is your age group?,What is your gender?,What is your highest level of education?,What is your current employment status?,How would you rate your understanding of healthcare prioritization in your community?,Do you believe that healthcare resources are allocated fairly in your community?,"In your opinion, which factors should be prioritized in healthcare resource allocation? (Select up to three)","During a healthcare crisis (e.g., pandemic), which group should be prioritized for care?","On a scale from 1 to 5, how much do you trust healthcare providers to make fair prioritization decisions?",What ethical principles should guide healthcare prioritization? (Select up to three),Have you or someone you know encountered barriers in accessing healthcare?,Please briefly describe the experience.,How likely are you to support changes to improve healthcare resource allocation in your community?,What would you suggest as a solution to improve fairness in healthcare resource allocation?,How do you usually receive updates regarding healthcare issues in your community?
1,7,18-24,Female,Graduate degree,Employed part-time,9.0,Yes,Severity of condition;,Children,4.0,Individual rights;Equity;,Yes,,,,
2,8,45-54,Female,Prefer not to answer,Haha,7.0,Yes,Likelihood of recovery;,Patients with chronic conditions,3.0,Maximization of benefits;,No,,4.0,Hehe,News outlets
4,10,18-24,Male,Bachelor’s degree,Student,6.0,Yes,Age;Socioeconomic status;Severity of condition;,Elderly,4.0,Equity;Individual rights;Community needs;,Yes,Nothing,,,
6,12,25-34,Male,Bachelor’s degree,Student,6.0,Unsure,Severity of condition;,Patients with chronic conditions,3.0,Equity;,Yes,"They went for sever stomach paiin, but they ha...",,,
7,13,25-34,Male,Graduate degree,Student,6.0,Yes,Age;Likelihood of recovery;Severity of condition;,All equally,4.0,Equity;Individual rights;Transparency;,No,,5.0,Public campaigns,News outlets


### **Step 3: Handle Missing Values**

In [16]:
# Remove rows with more than 40% missing values
threshold = int(0.6 * len(df_cleaned.columns))  # At least 60% of columns must have data
df_cleaned = df_cleaned.dropna(thresh=threshold)

print("Dataset shape after handling missing values:", df_cleaned.shape)

Dataset shape after handling missing values: (19, 16)


### **Step 4: Rename Columns**

In [18]:
# Define a dictionary for renaming columns
column_mapping = {
    'What is your age group?': 'Age_Group',
    'What is your gender?': 'Gender',
    'What is your highest level of education?': 'Education_Level',
    'What is your current employment status?': 'Employment_Status',
    'How would you rate your understanding of healthcare prioritization in your community?': 'Healthcare_Understanding',
    'Do you believe that healthcare resources are allocated fairly in your community?': 'Fairness_Belief',
    'In your opinion, which factors should be prioritized in healthcare resource allocation? (Select up to three)': 'Priority_Factors',
    'During a healthcare crisis (e.g., pandemic), which group should be prioritized for care?': 'Crisis_Prioritization',
    'On a scale from 1 to 5, how much do you trust healthcare providers to make fair prioritization decisions?': 'Trust_Level',
    'What ethical principles should guide healthcare prioritization? (Select up to three)': 'Ethical_Principles',
    'Have you or someone you know encountered barriers in accessing healthcare?': 'Barriers_Encountered',
    'Please briefly describe the experience.': 'Experience_Description',
    'How likely are you to support changes to improve healthcare resource allocation in your community?': 'Support_Level',
    'What would you suggest as a solution to improve fairness in healthcare resource allocation?': 'Fairness_Suggestions',
    'How do you usually receive updates regarding healthcare issues in your community?': 'Healthcare_Updates'
}

# Rename columns
df_cleaned = df_cleaned.rename(columns=column_mapping)

df_cleaned.columns

Index(['ID', 'Age_Group', 'Gender', 'Education_Level', 'Employment_Status',
       'Healthcare_Understanding', 'Fairness_Belief', 'Priority_Factors',
       'Crisis_Prioritization', 'Trust_Level', 'Ethical_Principles',
       'Barriers_Encountered', 'Experience_Description', 'Support_Level',
       'Fairness_Suggestions', 'Healthcare_Updates'],
      dtype='object')

### **Step 5: Create Two DataFrames for Priority Factors and Ethical Principles**

In [23]:
# Step 5.1: Create Priority Factors DataFrame
priority_factors_df = df_cleaned[['ID', 'Priority_Factors']].copy()

# Split semicolon-separated values into lists
priority_factors_df['Priority_Factors'] = priority_factors_df['Priority_Factors'].str.split(';')

# Explode the lists into multiple rows
priority_factors_df = priority_factors_df.explode('Priority_Factors')

# Remove leading/trailing spaces and drop empty or NaN values
priority_factors_df['Priority_Factors'] = priority_factors_df['Priority_Factors'].str.strip()
priority_factors_df = priority_factors_df[priority_factors_df['Priority_Factors'] != '']  # Remove blank entries
priority_factors_df = priority_factors_df.dropna()  # Drop NaN rows

# Step 5.2: Create Ethical Principles DataFrame
ethical_principles_df = df_cleaned[['ID', 'Ethical_Principles']].copy()

# Split semicolon-separated values into lists
ethical_principles_df['Ethical_Principles'] = ethical_principles_df['Ethical_Principles'].str.split(';')

# Explode the lists into multiple rows
ethical_principles_df = ethical_principles_df.explode('Ethical_Principles')

# Remove leading/trailing spaces and drop empty or NaN values
ethical_principles_df['Ethical_Principles'] = ethical_principles_df['Ethical_Principles'].str.strip()
ethical_principles_df = ethical_principles_df[ethical_principles_df['Ethical_Principles'] != '']  # Remove blank entries
ethical_principles_df = ethical_principles_df.dropna()  # Drop NaN rows

# Step 5.3: Remove the exploded columns from the original DataFrame
df_cleaned = df_cleaned.drop(columns=['Priority_Factors', 'Ethical_Principles'])

# Display the updated main DataFrame
print("Updated Main DataFrame (without exploded columns):")
print(df_cleaned.head())

# Display the cleaned Priority Factors DataFrame
print("\nCleaned Priority Factors DataFrame:")
print(priority_factors_df.head())

# Display the cleaned Ethical Principles DataFrame
print("\nCleaned Ethical Principles DataFrame:")
print(ethical_principles_df.head())

Updated Main DataFrame (without exploded columns):
   ID Age_Group  Gender       Education_Level   Employment_Status  \
1   7     18-24  Female       Graduate degree  Employed part-time   
2   8     45-54  Female  Prefer not to answer                Haha   
4  10     18-24    Male     Bachelor’s degree             Student   
6  12     25-34    Male     Bachelor’s degree             Student   
7  13     25-34    Male       Graduate degree             Student   

   Healthcare_Understanding Fairness_Belief             Crisis_Prioritization  \
1                       9.0             Yes                          Children   
2                       7.0             Yes  Patients with chronic conditions   
4                       6.0             Yes                           Elderly   
6                       6.0          Unsure  Patients with chronic conditions   
7                       6.0             Yes                       All equally   

   Trust_Level Barriers_Encountered  \
1       

In [28]:
df_cleaned['Employment_Status'].value_counts()

Unnamed: 0_level_0,count
Employment_Status,Unnamed: 1_level_1
Student,12
Employed part-time,6


In [27]:
# Remove rows where Employment_Status is 'Haha'
df_cleaned = df_cleaned[df_cleaned['Employment_Status'] != 'Haha']

# Display the updated DataFrame
df_cleaned.head()

print(df_cleaned.shape)

(18, 14)


In [31]:
df_cleaned.to_excel('HealthCare_Prioritization.xlsx')
priority_factors_df.to_excel('Priority_Factors.xlsx')
ethical_principles_df.to_excel('Ethical_Principles.xlsx')