In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import learning_curve
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import cross_validate
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV
from scipy import stats
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
import warnings
warnings.filterwarnings('ignore')

In [3]:
df = pd.read_csv("../processed_data/data_cleaned_Carmen/20231128_only_deaths_ep_weights.csv")

In [4]:
df = df[df['allegiance'].map(df['allegiance'].value_counts()) > 30]

In [7]:
df = df[df['killer'].map(df['killer'].value_counts()) > 30]

In [8]:
df = df[df['killers_house'].map(df['killers_house'].value_counts()) > 30]

In [9]:
df = df[df['location'].map(df['location'].value_counts()) > 30]

In [10]:
df = df[df['method'].map(df['method'].value_counts()) > 30]

In [4]:
df

Unnamed: 0.1,Unnamed: 0,allegiance,killer,killers_house,location,method,name,episode_global_num,weights_simple
0,0,Night's Watch,White Walker,,Beyond the Wall,Ice sword,Waymar Royce,1,0.999550
1,1,Night's Watch,White Walker,,Beyond the Wall,Ice sword,Gared,1,0.999550
2,2,Night's Watch,Ned Stark,House Stark,Winterfell,Sword,Will,1,0.999550
3,3,,Direwolf,,Winterfell,Teeth,Stag,1,0.999550
4,4,,Stag,,Winterfell,Antler,Direwolf,1,0.999550
...,...,...,...,...,...,...,...,...,...
2219,2219,,Daenerys Targaryen,House Targaryen,King's Landing,Dragonfire (Dragon),King's Landing Citizen,72,0.895234
2220,2220,,Daenerys Targaryen,House Targaryen,King's Landing,Dragonfire (Dragon),King's Landing Citizen,72,0.895234
2221,2221,,Daenerys Targaryen,House Targaryen,King's Landing,Dragonfire (Dragon),King's Landing Citizen,72,0.895234
2222,2222,,Daenerys Targaryen,House Targaryen,King's Landing,Dragonfire (Dragon),King's Landing Citizen,72,0.895234


In [5]:
df = df.drop(columns=["Unnamed: 0",'name'], axis=1)

In [6]:
y = df["episode_global_num"]
y = y.to_frame(name="episode_global_num")
y.head()

Unnamed: 0,episode_global_num
0,1
1,1
2,1
3,1
4,1


In [7]:
X = df
X = X.drop(columns = ["episode_global_num"], axis=1)
X.head()

Unnamed: 0,allegiance,killer,killers_house,location,method,weights_simple
0,Night's Watch,White Walker,,Beyond the Wall,Ice sword,0.99955
1,Night's Watch,White Walker,,Beyond the Wall,Ice sword,0.99955
2,Night's Watch,Ned Stark,House Stark,Winterfell,Sword,0.99955
3,,Direwolf,,Winterfell,Teeth,0.99955
4,,Stag,,Winterfell,Antler,0.99955


In [8]:
num_transformer = Pipeline([('standard_scaler', StandardScaler())])

cat_transformer = OneHotEncoder(handle_unknown='ignore', sparse_output=False)

preprocessor = ColumnTransformer([
    ('num_transformer', num_transformer, ["weights_simple"]),
    ('cat_transformer', cat_transformer, ['allegiance','killer','killers_house','location','method'])
])

In [11]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [12]:
X_train = pd.DataFrame(preprocessor.fit_transform(X_train),
            columns=preprocessor.get_feature_names_out())

In [16]:
X_train.columns

Index(['num_transformer__weights_simple',
       'cat_transformer__allegiance_Brotherhood Without Banners',
       'cat_transformer__allegiance_Children of the Forest',
       'cat_transformer__allegiance_Dothraki',
       'cat_transformer__allegiance_Faith Militant',
       'cat_transformer__allegiance_Faith of the Seven',
       'cat_transformer__allegiance_Free Folk',
       'cat_transformer__allegiance_Good Masters',
       'cat_transformer__allegiance_Great Masters',
       'cat_transformer__allegiance_House Arryn',
       ...
       'cat_transformer__method_Shield', 'cat_transformer__method_Spear',
       'cat_transformer__method_Steak', 'cat_transformer__method_Sword',
       'cat_transformer__method_Teeth',
       'cat_transformer__method_Teeth (Dragon)',
       'cat_transformer__method_Tusk', 'cat_transformer__method_Unknown',
       'cat_transformer__method_Whip', 'cat_transformer__method_Wildfire'],
      dtype='object', length=276)

In [22]:
model = LinearRegression()
#model.fit(X_train,y_train)
cv_results = cross_validate(model, X_train, y_train, cv=5, scoring=('neg_mean_absolute_error'))
test = cv_results["test_score"].mean()
test

-1229826564721.7766

In [20]:
cv_results

{'fit_time': array([0.02716804, 0.27970505, 0.28996301, 0.54973888, 0.29789972]),
 'score_time': array([0.04364586, 0.0171001 , 0.02943397, 0.01625395, 0.04891014]),
 'test_score': array([-1.28094492e+12, -1.44602049e+12, -1.25988697e+12, -1.18035326e+12,
        -9.81927191e+11])}