In [38]:
#imports
import os
import pandas as pd
import numpy as np
import torch
import pickle
import matplotlib.pyplot as plt
from dotenv import load_dotenv
from sqlalchemy import create_engine
from torch import nn
from torch.utils.data import Dataset, DataLoader, Subset
from torch.optim import Adam
from sklearn.svm  import LinearSVC
from sklearn.naive_bayes  import GaussianNB
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from tqdm import tqdm
from scipy import stats

#loads global variables

load_dotenv()
    
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(device)


all_champions = np.array(['Aatrox', 'Ahri', 'Akali', 'Akshan', 'Alistar', 'Amumu', 'Anivia',
                    'Annie', 'Aphelios', 'Ashe', 'AurelionSol', 'Aurora', 'Azir', 'Bard',
                    'Belveth', 'Blitzcrank', 'Brand', 'Braum', 'Briar', 'Caitlyn',
                    'Camille', 'Cassiopeia', 'Chogath', 'Corki', 'Darius', 'Diana',
                    'DrMundo', 'Draven', 'Ekko', 'Elise', 'Evelynn', 'Ezreal',
                    'FiddleSticks', 'Fiora', 'Fizz', 'Galio', 'Gangplank', 'Garen',
                    'Gnar', 'Gragas', 'Graves', 'Gwen', 'Hecarim', 'Heimerdinger',
                    'Hwei', 'Illaoi', 'Irelia', 'Ivern', 'Janna', 'JarvanIV', 'Jax',
                    'Jayce', 'Jhin', 'Jinx', 'KSante', 'Kaisa', 'Kalista', 'Karma',
                    'Karthus', 'Kassadin', 'Katarina', 'Kayle', 'Kayn', 'Kennen',
                    'Khazix', 'Kindred', 'Kled', 'KogMaw', 'Leblanc', 'LeeSin',
                    'Leona', 'Lillia', 'Lissandra', 'Lucian', 'Lulu', 'Lux',
                    'Malphite', 'Malzahar', 'Maokai', 'MasterYi', 'Milio',
                    'MissFortune', 'MonkeyKing', 'Mordekaiser', 'Morgana', 'Naafiri',
                    'Nami', 'Nasus', 'Nautilus', 'Neeko', 'Nidalee', 'Nilah',
                    'Nocturne', 'Nunu', 'Olaf', 'Orianna', 'Ornn', 'Pantheon', 'Poppy',
                    'Pyke', 'Qiyana', 'Quinn', 'Rakan', 'Rammus', 'RekSai', 'Rell',
                    'Renata', 'Renekton', 'Rengar', 'Riven', 'Rumble', 'Ryze',
                    'Samira', 'Sejuani', 'Senna', 'Seraphine', 'Sett', 'Shaco', 'Shen',
                    'Shyvana', 'Singed', 'Sion', 'Sivir', 'Skarner', 'Smolder', 'Sona',
                    'Soraka', 'Swain', 'Sylas', 'Syndra', 'TahmKench', 'Taliyah',
                    'Talon', 'Taric', 'Teemo', 'Thresh', 'Tristana', 'Trundle',
                    'Tryndamere', 'TwistedFate', 'Twitch', 'Udyr', 'Urgot', 'Varus',
                    'Vayne', 'Veigar', 'Velkoz', 'Vex', 'Vi', 'Viego', 'Viktor',
                    'Vladimir', 'Volibear', 'Warwick', 'Xayah', 'Xerath', 'XinZhao',
                    'Yasuo', 'Yone', 'Yorick', 'Yuumi', 'Zac', 'Zed', 'Zeri', 'Ziggs',
                    'Zilean', 'Zoe', 'Zyra'])

cuda:0


In [39]:
def get_data(region, game_mode, elo, version):
    # Initiates SQL engine and uses it to get data given user settings and returns it as a dataframe
    engine_name = f"postgresql://{os.getenv('DB_USER')}:{os.getenv('DB_PASSWORD')}@{os.getenv('DB_HOST')}:{os.getenv('DB_PORT')}/{os.getenv('DB_NAME')}"
    engine = create_engine(engine_name)
    
    # Initialize query parts
    query_sql = "SELECT * FROM match_data"
    params = []
    
    # Add conditions dynamically based on inputs
    if region != 'ANY':
        if params:
            query_sql += " AND"
        else:
            query_sql += " WHERE"
        query_sql += " region = %s"
        params.append(region)
    
    if game_mode != 'ANY':
        if params:
            query_sql += " AND"
        else:
            query_sql += " WHERE"
        query_sql += " game_mode = %s"
        params.append(game_mode)
    
    if elo != 'ANY':
        if params:
            query_sql += " AND"
        else:
            query_sql += " WHERE"
        query_sql += " elo LIKE %s"
        params.append(elo + '%')
    
    if version != 'ANY':
        if params:
            query_sql += " AND"
        else:
            query_sql += " WHERE"
        query_sql += " version LIKE %s"
        params.append(version + '%')
    
    df = pd.read_sql_query(query_sql, con=engine, params=tuple(params))
    return df

In [40]:
def verify_data(df = None):
    #any rows that are corrupted, either through an error in data write or read, are dropped
    df = df.dropna()
    
    blue_team = ['blue_one','blue_two','blue_three','blue_four', 'blue_five']
    red_team = ['red_one', 'red_two', 'red_three', 'red_four', 'red_five']
    
    blue_team_encoded = np.zeros((len(df),len(all_champions)))
    blue_team_columns = [f"blue_{champ}" for champ in all_champions]
    red_team_encoded = np.zeros((len(df),len(all_champions)))
    red_team_columns = [f"red_{champ}" for champ in all_champions]

    #encode the blue and red champions using bag of words 
    for idx,row in df.iterrows():
        for col in blue_team:
            champ = row[col]
            champ_index = np.where(all_champions == champ)[0]
            blue_team_encoded[idx][champ_index] = 1

        for col in red_team: 
            champ = row[col]
            champ_index = np.where(all_champions == champ)[0]
            red_team_encoded[idx][champ_index] = 1

    #convert encoded data from array to dataframe and concatenate the blue and red team dataframes
    #also drop columns we will not be using
    blue_team_encoded = pd.DataFrame(blue_team_encoded,columns=blue_team_columns)
    red_team_encoded = pd.DataFrame(red_team_encoded, columns=red_team_columns)

    match_ids = df['match_id']

    df = df.drop(columns=['id','region','match_id','game_mode','elo','version'])
    df = df.drop(columns=blue_team)
    df = df.drop(columns=red_team)
    df = pd.concat([df,blue_team_encoded,red_team_encoded],axis=1)

    return df, match_ids

In [41]:
class DraftAnalysisNN(nn.Module):
    def __init__(self,input_size,output_size):
        super().__init__()
        self.conv1 = nn.Conv1d(input_size,16,1)
        self.conv2 = nn.Conv1d(16,32,1)
        self.fc1 = nn.Linear(len(all_champions) * 32,64)
        self.fc2 = nn.Linear(64,output_size)
        self.act = nn.ReLU()
    
    def forward(self,input):
        logits = self.act(self.conv1(input))
        logits = self.act(self.conv2(logits))
        logits = logits.view(logits.size(0),-1)
        logits = self.fc1(logits)
        logits = self.fc2(logits)
        
        output = torch.softmax(logits,dim=1)
        return output

In [42]:
class MatchDataset(Dataset):
    def __init__(self,df):
        self.label = np.array(df.iloc[:,0])
        self.input =  np.array(df.iloc[:,1:df.shape[1]])
        self.data = df

    def __len__(self):
        return len(self.data)
    
    def __getitem__(self,idx):
        split = int((self.data.shape[1]-1)/2)
        blue_team = self.data.iloc[idx,1:split+1]
        red_team = self.data.iloc[idx,split+1:self.data.shape[1]]

        blue_team_tensor = torch.tensor(blue_team.values, dtype=torch.float32)
        red_team_tensor = torch.tensor(red_team.values, dtype=torch.float32)

        input = torch.stack((blue_team_tensor,red_team_tensor),dim=0).to(device)

        label = self.data.iloc[idx,0]
        label = torch.tensor(label, dtype=torch.long).to(device)
        return {
            'input': input,
            'label' : label
        }

In [43]:
def split_data(data,batch_size):
    #split the data of the dataset and array (dataset for nn, array for scipy)
    train_indices, test_indices = train_test_split(range(len(data)), test_size=0.2, random_state=42, shuffle=True)
    train_dataset = Subset(data, train_indices)
    test_dataset = Subset(data, test_indices)
    train_loader = DataLoader(train_dataset, batch_size, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=1, shuffle=True)

    train_inputs, test_inputs, train_labels, test_labels = train_test_split(data.input, data.label, test_size=0.2, random_state=42, shuffle=True)

    return train_loader,test_loader, train_inputs, test_inputs, train_labels, test_labels

In [44]:
def train_models(train_loader, train_inputs, train_labels, num_epochs, paths):
    #training loop
    SVC_model = LinearSVC(dual = 'auto')
    GNB_model = GaussianNB()
    channels = next(iter(train_loader))['input'].shape[1]
    NN_model = DraftAnalysisNN(channels,channels).to(device)
    
    SVC_model.fit(train_inputs,train_labels)
    GNB_model.fit(train_inputs,train_labels)

    criterion = nn.CrossEntropyLoss()
    optimizer = Adam(NN_model.parameters(),lr=1e-5)

    print(f'Starting training of {num_epochs} epochs')
    NN_model.zero_grad()
    NN_model.train()

    train_losses = []
    for epoch in range(num_epochs):
        train_loss = 0
        for batch in tqdm(train_loader):
            input = batch['input']
            label = batch['label']

            optimizer.zero_grad()
            output = NN_model(input)
            loss = criterion(output,label)
            train_loss += loss.item()

            loss.backward()
            optimizer.step()
        train_loss /= len(train_loader)
        train_losses.append(train_loss)

        print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {train_loss:.4f}')

    #save model
    print(f'Saving models')
    with open(paths['SVC'], 'wb') as file:
        pickle.dump(SVC_model,file)
    
    with open(paths['GNB'],'wb') as file:
        pickle.dump(GNB_model, file)
    
    torch.save(NN_model.state_dict(),paths['NN'])

    #save as onnx
    NN_model.eval()
    dummy_input = torch.randn(input.shape, requires_grad=True).to(device)

    torch.onnx.export(
        NN_model,
        dummy_input,
        paths['NN_ONNX'],
        export_params=True,
        opset_version=11,
        do_constant_folding=True,
        input_names=['input'],
        output_names=['output']
    )

    return SVC_model, GNB_model, NN_model


In [45]:
def test_models(SVC_model,GNB_model, NN_model, test_loader, test_inputs, test_labels):
    #test models
    SVC_predictions = SVC_model.predict(test_inputs)
    GNB_predictions = GNB_model.predict(test_inputs)
    
    print('Testing Models')
    NN_model.eval()
    NN_predictions = []
    labels = []
    voting_predictions = []
    with torch.no_grad():
        for batch in tqdm(test_loader):
            input = batch['input']
            label = batch['label'].cpu().numpy()

            output = NN_model(input)
            prediction = torch.argmax(output, dim=1)
            NN_pred = prediction.cpu().numpy()
            NN_predictions.extend(NN_pred)
            labels.extend(label)

            sklearn_input = input.flatten().cpu().numpy().reshape((input.shape[0], -1))
            SVC_pred = SVC_model.predict(sklearn_input)
            GNB_pred = GNB_model.predict(sklearn_input)

            #voting implementation
            combined_preds = np.stack([NN_pred, SVC_pred, GNB_pred], axis=0)
            voting_result = stats.mode(combined_preds)
            voting_predictions.extend(voting_result[0])
    
    SVC_acc = accuracy_score(test_labels,SVC_predictions)
    GNB_acc = accuracy_score(test_labels,GNB_predictions)
    NN_acc = accuracy_score(labels,NN_predictions)
    NN_prec = precision_score(labels,NN_predictions,zero_division=0)
    NN_recall = recall_score(labels,NN_predictions,zero_division=0)

    voting_acc = accuracy_score(voting_predictions, labels)

    print(f'SVC Accuracy: {SVC_acc:.2f}')
    print(f'GNB Accuracy: {GNB_acc:.2f}')
    print(f'NN Accuracy: {NN_acc:.2f}, NN Precision: {NN_prec:.2f}, NN Recall: {NN_recall:.2f}')
    print(f'Voting Accuracy: {voting_acc:.2f}')

    

In [46]:
def predict(SVC_model,GNB_model,NN_model, blue_team, red_team):
    #encode the blue_team and red_team inputs
    blue_team_encoded = np.zeros(len(all_champions))
    red_team_encoded = np.zeros(len(all_champions))

    for champ in blue_team:
        champ_index = np.where(all_champions == champ)[0]
        if champ_index.size == 0:
            raise ValueError(f"Champion '{champ}' not found in list.")
        blue_team_encoded[champ_index] = 1

    for champ in red_team:
        champ_index = np.where(all_champions == champ)[0]
        if champ_index.size == 0:
            raise ValueError(f"Champion '{champ}' not found in list.")
        red_team_encoded[champ_index] = 1

    #create sklearn input as a numpy array and pytorch nn input as a tensor
    sklearn_input = np.concatenate((blue_team_encoded,red_team_encoded),axis=None).reshape(1,-1)
    SVC_pred = SVC_model.predict(sklearn_input)
    GNB_pred = GNB_model.predict(sklearn_input)
    
    nn_input = torch.vstack((torch.tensor(blue_team_encoded),torch.tensor(red_team_encoded))).float().unsqueeze(0).to(device)
    output = NN_model(nn_input)
    prediction = torch.argmax(output, dim=1)
    NN_pred = prediction.cpu().numpy()
    #voting implementation
    combined_preds = np.stack([NN_pred, SVC_pred, GNB_pred], axis=0)
    voting_result = stats.mode(combined_preds)[0]

    #print results
    if voting_result == 0:
        winner = 'Blue Team'
    else:
        winner = 'Red Team'
    
    winner = 'Blue Team' if voting_result == 0 else 'Red Team'
    
    print(f'Predicted winner is {winner}')
    print(f'SVC model predicted {"Blue Team" if SVC_pred[0] == 0 else "Red Team"}')
    print(f'GNB model predicted {"Blue Team" if GNB_pred[0] == 0 else "Red Team"}')
    print(f'NN model predicted {"Blue Team" if NN_pred[0] == 0 else "Red Team"} with a {output[0][0]*100 if NN_pred[0] == 0 else output[0][1]*100:.2f}% chance')

    return sklearn_input



In [47]:
def get_similar_game(encoded_data, threshold, inputs, labels, match_ids):
    matching_elements = (inputs == 1) & (encoded_data == 1)
    match_counts = np.sum(matching_elements, axis=1)
    matching_ids = match_ids[match_counts >= threshold]
    matching_labels = labels[match_counts >= threshold]
    
    if matching_ids.empty:
        print(f'Found no games with {threshold} or more similar characters')
    else:
        for idx, id in enumerate(matching_ids):
            region, id_num = id.split('_')
            if region == 'NA1':
                region = 'NA'
            region = region.lower()
            winner = 'Blue Team' if matching_labels[idx] else 'Red Team'
            print(f'https://www.leagueofgraphs.com/match/{region}/{id_num}, {winner} won')


In [48]:
def main(blue_team, red_team,region='NA1',game_mode='ARAM',elo='ANY',version='14.13', threshold=5, batch_size=1, num_epochs=10, override=False):
    #error testing
    if len(blue_team) != 5 or len(red_team) != 5:
        raise ValueError("Both teams must have exactly 5 champions. "
                         f"Current sizes - Blue team: {len(blue_team)}, Red team: {len(red_team)}")
    if len(blue_team) != len(set(blue_team)):
        raise ValueError("Duplicate champions found in blue team")
    if len(red_team) != len(set(red_team)):
        raise ValueError("Duplicate champions found in red team")
    
    #label paths
    prefix = f'Models/{region}_{game_mode}_{elo}_{version}_'
    paths = {
        'SVC' : prefix+'svc_model.pkl',
        'GNB' : prefix+'gnb_model.pkl',
        'NN' : prefix+'nn_model.pth',
        'NN_ONNX': prefix+'nn_model.onnx'
    }

    #gets data
    df = get_data(region,game_mode,elo,version)
    df, match_ids = verify_data(df)
    data = MatchDataset(df) 
    print(f'Found {len(match_ids)} games with given settings')

    #train/test models if it does not exist. Otherwise load models and predict whether blue or red team will win based on the given champions
    if(override or not all(os.path.exists(path) for path in paths.values())):
        print('Override is true or model(s) missing, training/testing models')
        train_loader,test_loader, train_inputs, test_inputs, train_labels, test_labels = split_data(data,batch_size)
        SVC_model, GNB_model, NN_model = train_models(train_loader,train_inputs, train_labels, num_epochs, paths)
        test_models(SVC_model,GNB_model, NN_model, test_loader, test_inputs, test_labels)
    else:
        print('Models exist, loading models')
        with open(paths['SVC'],'rb') as file:
            SVC_model = pickle.load(file)
    
        with open(paths['GNB'],'rb') as file:
            GNB_model = pickle.load(file)

        NN_model = DraftAnalysisNN(2,2).to(device)
        NN_model.load_state_dict(torch.load(paths['NN']))

    #prediction
    encoded_data = predict(SVC_model,GNB_model,NN_model, blue_team, red_team)
    #get old games based on threshold value, if threshold = 5, get all games where 5 champions match up to our 5 champions (on same team)
    get_similar_game(encoded_data[0],threshold,data.input,data.label, match_ids)
    

In [49]:
blue_team = ['KSante','Jhin','Morgana','Yone','Bard']
red_team = ['LeeSin','Orianna','Heimerdinger','Tristana','Graves']
main(blue_team, red_team,'NA1','ARAM','ANY','14.13', threshold=4, batch_size=1, num_epochs=1, override=True)

Found 5072 games with given settings
Override is true or model(s) missing, training/testing models
Starting training of 1 epochs


100%|██████████| 4057/4057 [00:11<00:00, 349.63it/s]


Epoch [1/1], Loss: 0.6922
Saving models


RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cpu and cuda:0! (when checking argument for argument weight in method wrapper_CUDA___slow_conv2d_forward)