In [9]:
import pandas as pd

# import the file of contestant data
file_path = '/Users/ginagrainda/Desktop/survivor_data/survivor_contestants.csv'

df = pd.read_csv(file_path)

In [12]:
# One-hot encode categorical data (industry, gender, state, region, astrological_sign, tribe_color) to binary fields
# Retain a copy of the original categorical columns
original_columns = df[['industry', 'gender', 'state', 'region', 'astrological_sign', 'tribe_color']]
encoded_df = pd.get_dummies(df, columns = ['industry', 'gender', 'state','region', 'astrological_sign', 'tribe_color'], drop_first = False)
encoded_df = pd.concat([encoded_df, original_columns], axis=1)

# Add age bands
bins = [18, 25, 35, 45, 55, 65, 75]
labels = ['18-25', '26-35', '36-45', '46-55', '56-65', '66-75']

encoded_df['age_band'] = pd.cut(encoded_df['age'], bins=bins, labels=labels, right=True)
age_band_encoded = pd.get_dummies(encoded_df['age_band'], prefix='age_band', drop_first=False)
encoded_df = pd.concat([encoded_df, age_band_encoded], axis=1)

In [13]:
# Convert all boolean columns to 0 and 1, instead of True/False
boolean_columns = encoded_df.select_dtypes(include='bool').columns  
encoded_df[boolean_columns] = encoded_df[boolean_columns].astype(int)

print(encoded_df.head())

     contestant_name  age            hometown                profession  \
0  Sonja Christopher   63    Walnut Creek, CA  Musician/Retired Teacher   
1      B.B. Andersen   64  Mission Hills,  KS     Real Estate Developer   
2    Stacey Stillman   27  San Francisco,  CA                  Attorney   
3        Ramona Gray   29         Edison,  NJ                Biochemist   
4          Dirk Been   23   Spring Green,  WI              Dairy Farmer   

   num_season  finish  winner  black  asian  latinx  ...     region  \
0           1      16       0      0      0       0  ...       West   
1           1      15       0      0      0       0  ...    Midwest   
2           1      14       0      0      0       0  ...       West   
3           1      13       0      1      0       0  ...  Northeast   
4           1      12       0      0      0       0  ...    Midwest   

   astrological_sign  tribe_color  age_band  age_band_18-25 age_band_26-35  \
0           Aquarius       Orange     56-65 

In [14]:
# Save the DataFrame to a CSV file
encoded_df.to_csv('/Users/ginagrainda/Desktop/survivor_data/survivor_contestants_encoded.csv', index=False)

In [15]:
# Save the DataFrame to an Excel file
encoded_df.to_excel('/Users/ginagrainda/Desktop/survivor_data/survivor_contestants_encoded.xlsx', index=False)