In [62]:
import pandas as pd
import numpy as np
import os
import re
from scipy.stats.mstats import winsorize
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, accuracy_score
from sklearn.ensemble import RandomForestClassifier
from lightgbm import LGBMClassifier
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.metrics import confusion_matrix, roc_curve, auc, precision_recall_curve
import pickle

In [63]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [64]:
BASE_FILE_PATH = '/content/drive/MyDrive/Colab Notebooks/Final Project'
print(os.listdir(BASE_FILE_PATH))

['Keys', 'Setting up connection with Git-Hub.ipynb', 'entries.csv', 'summoner_details.csv', 'updated_dataset.csv', 'filtered_dataset.csv', 'filtered_dataset.gsheet', 'filtered_+30_matches_dataset.csv', 'EDA.ipynb', 'merged_dataset.csv', 'Merging match with entries and classifying accounts.ipynb', 'merged_dataset.gsheet', 'Feature engineering and cleaning vol 1.ipynb', 'dataset_after_normalization.csv', 'keepign only win feature .ipynb', 'Models without data normalization.ipynb', 'Normalization_Parameters.csv', 'Normalization_Parameters_by_Team_Position.csv', 'summoner_ids_used_in_model.csv', 'gbm_model.pkl', 'train_df.csv', 'test_df.csv', 'validation_df.csv', 'processed_train_df.csv', 'Normalization_Parameters_by_Game_Mode.csv', 'processed_test_df.csv', 'random_forest_model.pkl', 'processed_validation_df.csv']


In [65]:
validation_df = pd.read_csv(os.path.join(BASE_FILE_PATH, 'processed_validation_df.csv'))

In [66]:
#Open the pkl gbm_model that is in the BASE_FILE_PATH directory

with open(os.path.join(BASE_FILE_PATH, 'random_forest_model.pkl'), 'rb') as f:
    random_forest_model = pickle.load(f)

In [67]:
def evaluate_predictions(df, model, features_col, actual_col='binary_time_group'):
    """
    Evaluates the model predictions against actual labels on a DataFrame and updates the DataFrame with predictions.

    Args:
    df (DataFrame): The DataFrame containing the features and actual labels.
    model (Model): Trained machine learning model.
    features_col (list): List of columns to use as features.
    actual_col (str): Column name of the actual labels.

    Returns:
    DataFrame: Updated DataFrame with a new 'prediction' column indicating the correctness of each prediction.
    """
    # Extracting the features and actual labels
    X = df[features_col]
    y_actual = df[actual_col]

    # Predicting using the model
    predictions = model.predict(X)

    # Determining if each prediction is correct
    df['prediction'] = ['Correct' if act == pred else 'Incorrect' for act, pred in zip(y_actual, predictions)]

    # Summary of predictions
    correct_count = df['prediction'].value_counts().get('Correct', 0)
    total = len(predictions)
    accuracy = correct_count / total

    print(f"Total Correct Predictions: {correct_count}/{total}")
    print(f"Accuracy: {accuracy:.2f}")

    return df

# Prepare your dataframe by excluding non-feature columns
features_for_prediction = validation_df.columns.difference(['binary_time_group', 'summoner_id'])

# Use the function and save the updated DataFrame
updated_validation_df = evaluate_predictions(validation_df, random_forest_model, features_for_prediction)

Total Correct Predictions: 566/874
Accuracy: 0.65




In [68]:
updated_validation_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 874 entries, 0 to 873
Data columns (total 23 columns):
 #   Column                                Non-Null Count  Dtype  
---  ------                                --------------  -----  
 0   deaths_normalized_late                874 non-null    float64
 1   kills_normalized_late                 874 non-null    float64
 2   assists_normalized_late               874 non-null    float64
 3   game_duration_normalized_late         874 non-null    float64
 4   total_minions_killed_normalized_late  874 non-null    float64
 5   gold_earned_normalized_late           874 non-null    float64
 6   total_damage_dealt_normalized_late    874 non-null    float64
 7   total_damage_taken_normalized_late    874 non-null    float64
 8   deaths_normalized_mid                 874 non-null    float64
 9   kills_normalized_mid                  874 non-null    float64
 10  assists_normalized_mid                874 non-null    float64
 11  game_duration_norma