In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score

In [None]:
# Load the dataset
url = 'https://github.com/doubavitch/Dead-by-Daylight-Data-Visualization/blob/main/Dead%20by%20Daylight%20data.csv'
data = pd.read_csv(url)

# Display the first few rows of the dataframe
print(data.head())

# Preprocessing
# Handle missing values (if any)
data.fillna(method='ffill', inplace=True)

# Encode categorical variables
label_encoders = {}
for column in data.select_dtypes(include=['object']).columns:
    label_encoders[column] = LabelEncoder()
    data[column] = label_encoders[column].fit_transform(data[column])

# Define features and target
features = data.drop(columns=['Number_of_survivors_escaped'])
target = data['Number_of_survivors_escaped']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

# Train a Random Forest Regressor
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f'Mean Squared Error: {mse}')
print(f'R^2 Score: {r2}')

# Example prediction
example = X_test.iloc[0].values.reshape(1, -1)
predicted_escapees = model.predict(example)
print(f'Predicted number of survivors escaping: {predicted_escapees[0]}')

# Save the model
import joblib
joblib.dump(model, 'dbd_survivor_prediction_model.pkl')
