In [16]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
import warnings
warnings.filterwarnings("ignore")

## Escapes Prediction

In [21]:
# Load the dataset
data = pd.read_csv('Dead by Daylight data.csv')

# Display the first few rows of the dataframe
#print(data.head())

# Encode categorical variables
label_encoders = {}
for column in data.select_dtypes(include=['object']).columns:
    label_encoders[column] = LabelEncoder()
    data[column] = label_encoders[column].fit_transform(data[column])

# Define features and target
features = data.drop(columns=['Nb Escaped Survivors', 'Comments', 'Nb Hatch escapes'])
target = data['Nb Escaped Survivors']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

# Train a Random Forest Regressor
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f'Mean Squared Error: {mse}')
print(f'R^2 Score: {r2}')

# Example prediction
example = X_test.iloc[0].values.reshape(1, -1)
predicted_escapes = model.predict(example)
print(example)
print(f'Predicted number of survivors escaping: {predicted_escapes[0]}')

# Save the model
import joblib
joblib.dump(model, 'dbd_survivor_prediction_model.pkl')

Mean Squared Error: 1.151145123927233
R^2 Score: 0.3950789889174279
[[4 21 3 7 False]]
Predicted number of survivors escaping: 0.34666666666666673


['dbd_survivor_prediction_model.pkl']

In [31]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
import joblib

# Load the dataset
data = pd.read_csv('Dead by Daylight data.csv')

# Preprocessing
# Handle missing values (if any)
data.fillna(method='ffill', inplace=True)

# Define features and target
features = data.drop(columns=['Nb Escaped Survivors','Comments','Nb Hatch escapes'])
target = data['Nb Escaped Survivors']

# Identify categorical columns
categorical_cols = features.select_dtypes(include=['object']).columns.tolist()

# Define the preprocessor
preprocessor = ColumnTransformer(
    transformers=[
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_cols)
    ],
    remainder='passthrough'
)

# Create the model pipeline
model = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('regressor', RandomForestRegressor(n_estimators=100, random_state=42))
])

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

# Train the model
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f'Mean Squared Error: {mse}')
print(f'R^2 Score: {r2}')

# Example prediction
example = X_test.iloc[0:1]
predicted_escapees = model.predict(example)
print(f'Given features: \n{example}')
print(f'Predicted number of survivors escaping for this round: {predicted_escapees[0]}')
# print the actual escape rate!

# Save the model
joblib.dump(model, 'dbd_survivor_prediction_model_2.pkl')




Mean Squared Error: 1.1066810237691445
R^2 Score: 0.41844465139163034
Given features: 
    Map  Killer  Nb Finished generators Season   SBMM
471  CP  Spirit                       3  March  False
Predicted number of survivors escaping for this round: 0.7087857142857144


['dbd_survivor_prediction_model_2.pkl']