In [1]:
# Standard Libraries
import os
import sys
import datetime

# Third-Party Libraries
import pandas as pd
from tqdm.auto import tqdm
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report

tqdm.pandas()

MODULE_PATH = os.path.abspath(r"C:\Users\catki\Documents\Projects\playground\code")

if MODULE_PATH not in sys.path:
    sys.path.append(MODULE_PATH)

# Local Imports
from modules.visualize import display_model_report, display_model_scores, display_multiclass_model_matrix

https://www.kaggle.com/datasets/justinveiner/survivor-cbs-dataset

In [None]:
ZODIAC_NUM = 12

def zodiac_sign(day, month): 
      
    # checks month and date within the valid range 
    # of a specified zodiac 
    if month == 'december': 
        astro_sign = 'sagittarius' if (day < 22) else 'capricorn'
          
    elif month == 'january': 
        astro_sign = 'capricorn' if (day < 20) else 'aquarius'
          
    elif month == 'february': 
        astro_sign = 'aquarius' if (day < 19) else 'pisces'
          
    elif month == 'march': 
        astro_sign = 'pisces' if (day < 21) else 'aries'
          
    elif month == 'april': 
        astro_sign = 'aries' if (day < 20) else 'taurus'
          
    elif month == 'may': 
        astro_sign = 'taurus' if (day < 21) else 'gemini'
          
    elif month == 'june': 
        astro_sign = 'gemini' if (day < 21) else 'cancer'
          
    elif month == 'july': 
        astro_sign = 'cancer' if (day < 23) else 'leo'
          
    elif month == 'august': 
        astro_sign = 'leo' if (day < 23) else 'virgo'
          
    elif month == 'september': 
        astro_sign = 'virgo' if (day < 23) else 'libra'
          
    elif month == 'october': 
        astro_sign = 'libra' if (day < 23) else 'scorpio'
          
    elif month == 'november': 
        astro_sign = 'scorpio' if (day < 22) else 'sagittarius'
          
    # print(astro_sign)
    return astro_sign

In [None]:
data_path = os.path.abspath(r"C:\Users\catki\Documents\Projects\data\survivor_data")

In [None]:
contestant_file = os.path.join(data_path, "contestant_table.csv")
season_file = os.path.join(data_path, "season_table.csv")
tribe_file = os.path.join(data_path, "tribe_table.csv")

In [None]:
if contestant_file.endswith("csv"):
    contestant_df = pd.read_csv(contestant_file)
if contestant_file.endswith("xlsx") or contestant_file.endswith("xls"):
    contestant_df = pd.read_excel(contestant_file)

In [None]:
contestant_df.info()

In [None]:
contestant_df.head()

In [None]:
contestant_df["birthdate_datetime"] = contestant_df["birthdate"].progress_apply(lambda x: datetime.datetime.strptime(x, "%Y-%m-%d"))

In [None]:
contestant_df["zodiac"] = contestant_df["birthdate_datetime"].progress_apply(lambda x: zodiac_sign(x.day, x.strftime("%B").lower()))

In [None]:
won_df = contestant_df[contestant_df["finish"] == 1]

In [None]:
won_df.info()

In [None]:
won_df

In [None]:
# First to be voted off by zodiac
contestant_df[contestant_df["finish"] == 16].groupby("zodiac")["finish"].count().nlargest(ZODIAC_NUM)

In [None]:
# Most contestants voted off by zodiac
contestant_df.groupby("zodiac")["num_boot"].count().nlargest(ZODIAC_NUM)

In [None]:
# Wins by zodiac
won_df.groupby("zodiac")["finish"].count().nlargest(ZODIAC_NUM)

In [None]:
won_df

In [None]:
X = won_df["gender"].tolist()

In [None]:
y = won_df["gender"].tolist()

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.7, random_state=42)

In [None]:
model = SVC()

In [None]:
model.fit(X_train, y_train)

In [None]:
y_pred = model.predict(X_test)