In [100]:
import pandas as pd
import numpy as np
import os
import re
from scipy.stats.mstats import winsorize
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, accuracy_score
from sklearn.ensemble import RandomForestClassifier
from lightgbm import LGBMClassifier
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.metrics import confusion_matrix, roc_curve, auc, precision_recall_curve
import pickle

In [101]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [102]:
BASE_FILE_PATH = '/content/drive/MyDrive/Colab Notebooks/Final Project'
print(os.listdir(BASE_FILE_PATH))

['Keys', 'Setting up connection with Git-Hub.ipynb', 'entries.csv', 'summoner_details.csv', 'updated_dataset.csv', 'filtered_dataset.csv', 'filtered_dataset.gsheet', 'filtered_+30_matches_dataset.csv', 'EDA.ipynb', 'merged_dataset.csv', 'Merging match with entries and classifying accounts.ipynb', 'merged_dataset.gsheet', 'Feature engineering and cleaning vol 1.ipynb', 'dataset_after_normalization.csv', 'keepign only win feature .ipynb', 'Models without data normalization.ipynb', 'Normalization_Parameters.csv', 'Normalization_Parameters_by_Team_Position.csv', 'summoner_ids_used_in_model.csv', 'gbm_model.pkl', 'train_df.csv', 'test_df.csv', 'validation_df.csv', 'processed_train_df.csv', 'Normalization_Parameters_by_Game_Mode.csv', 'processed_test_df.csv', 'random_forest_model.pkl', 'processed_validation_df.csv', 'scaler.pkl']


In [103]:
validation_df = pd.read_csv(os.path.join(BASE_FILE_PATH, 'processed_validation_df.csv'))
#get the scaler file
with open(os.path.join(BASE_FILE_PATH, 'scaler.pkl'), 'rb') as f:
    scaler = pickle.load(f)

In [104]:
#Open the pkl gbm_model that is in the BASE_FILE_PATH directory

with open(os.path.join(BASE_FILE_PATH, 'random_forest_model.pkl'), 'rb') as f:
    random_forest_model = pickle.load(f)

In [105]:
def evaluate_predictions(df, model, scaler, features_col, actual_col='binary_time_group'):
    """
    Evaluates the model predictions against actual labels on a DataFrame and updates the DataFrame with predictions.
    """
    # Extracting the features and actual labels
    X = df[features_col]
    y_actual = df[actual_col]

    # Debugging: print feature names to check if they match
    print("Features used for scaling:", scaler.feature_names_in_)
    print("Features from dataframe:", X.columns.tolist())

    # Reorder features to match the training order
    X = X[scaler.feature_names_in_]

    # Scaling the features
    X_scaled = scaler.transform(X)

    # Predicting using the model
    predictions = model.predict(X_scaled)

    # Determining if each prediction is correct
    df['prediction'] = ['Correct' if act == pred else 'Incorrect' for act, pred in zip(y_actual, predictions)]

    # Summary of predictions
    correct_count = df['prediction'].value_counts().get('Correct', 0)
    total = len(predictions)
    accuracy = correct_count / total

    # Count the correct predictions by actual status
    status_correct_counts = df[df['prediction'] == 'Correct']['binary_time_group'].value_counts()
    active_correct = status_correct_counts.get('Active', 0)
    inactive_correct = status_correct_counts.get('Inactive', 0)

    print(f"Total Correct Predictions: {correct_count}/{total}")
    print(f"Accuracy: {accuracy:.2f}")
    print(f"Correct Active Predictions: {active_correct}")
    print(f"Correct Inactive Predictions: {inactive_correct}")

    return df

# Example usage assuming you have a DataFrame `validation_df` and a trained model `random_forest_model`:
features_for_prediction = list(scaler.feature_names_in_)  # This ensures the feature order is correct
updated_validation_df = evaluate_predictions(validation_df, random_forest_model, scaler, features_for_prediction)

Features used for scaling: ['deaths_normalized_late' 'kills_normalized_late'
 'assists_normalized_late' 'game_duration_normalized_late'
 'total_minions_killed_normalized_late' 'gold_earned_normalized_late'
 'total_damage_dealt_normalized_late' 'total_damage_taken_normalized_late'
 'deaths_normalized_mid' 'kills_normalized_mid' 'assists_normalized_mid'
 'game_duration_normalized_mid' 'total_minions_killed_normalized_mid'
 'gold_earned_normalized_mid' 'total_damage_dealt_normalized_mid'
 'total_damage_taken_normalized_mid' 'deaths_normalized_initial'
 'kills_normalized_initial' 'assists_normalized_initial'
 'game_duration_normalized_initial']
Features from dataframe: ['deaths_normalized_late', 'kills_normalized_late', 'assists_normalized_late', 'game_duration_normalized_late', 'total_minions_killed_normalized_late', 'gold_earned_normalized_late', 'total_damage_dealt_normalized_late', 'total_damage_taken_normalized_late', 'deaths_normalized_mid', 'kills_normalized_mid', 'assists_normalize



In [106]:
updated_validation_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 874 entries, 0 to 873
Data columns (total 23 columns):
 #   Column                                Non-Null Count  Dtype  
---  ------                                --------------  -----  
 0   deaths_normalized_late                874 non-null    float64
 1   kills_normalized_late                 874 non-null    float64
 2   assists_normalized_late               874 non-null    float64
 3   game_duration_normalized_late         874 non-null    float64
 4   total_minions_killed_normalized_late  874 non-null    float64
 5   gold_earned_normalized_late           874 non-null    float64
 6   total_damage_dealt_normalized_late    874 non-null    float64
 7   total_damage_taken_normalized_late    874 non-null    float64
 8   deaths_normalized_mid                 874 non-null    float64
 9   kills_normalized_mid                  874 non-null    float64
 10  assists_normalized_mid                874 non-null    float64
 11  game_duration_norma