In [612]:
#imports
import os
import psycopg2
import pandas as pd
import numpy as np
import torch
import matplotlib.pyplot as plt
from dotenv import load_dotenv
from sqlalchemy import create_engine
from torch import nn
from torch.utils.data import Dataset, DataLoader, Subset
from torch.optim import Adam
from sklearn.svm  import LinearSVC
from sklearn.naive_bayes  import GaussianNB
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from tqdm import tqdm

load_dotenv()
    
device = "cuda" if torch.cuda.is_available() else "cpu"
print(device)

cpu


In [613]:
def get_csv(region='NA1',game_mode='ARAM',patch='14.13'):
    #gets data collected into a csv

    #if using sqlalchemy
    # engine_name = f"postgresql://{os.getenv('DB_USER')}:{os.getenv('DB_PASSWORD')}@{os.getenv('DB_HOST')}:{os.getenv('DB_PORT')}/{os.getenv('DB_NAME')}"
    conn = psycopg2.connect(
        database = os.getenv('DB_NAME'),
        host = os.getenv('DB_HOST'),
        user = os.getenv('DB_USER'),
        password = os.getenv('DB_PASSWORD'),
        port = os.getenv('5432')
    )

    os.makedirs('MatchData', exist_ok=True)
    csv_path = f'MatchData/{region}_{game_mode}_{patch}.csv'

    cursor = conn.cursor()
    patch = patch+'%'

    query_sql = """SELECT * 
    FROM match_data 
    WHERE region = %s 
    AND game_mode = %s 
    AND patch LIKE %s"""

    query = cursor.mogrify(query_sql,(region, game_mode,patch))
    query = query.decode('utf-8')

    try:
        if os.path.exists(csv_path):
            print("Csv found")
        else:
            with open(csv_path,'w') as f:
                cursor.copy_expert("COPY ({}) TO STDOUT WITH CSV HEADER".format(query),f)
            print("Copy to csv successful")

    except (Exception, psycopg2.DatabaseError) as error:
        print(error)


In [614]:
def get_data(region='NA1',game_mode='ARAM',elo='ANY',version='14.13'):
    #features extraction, encoding and data verification
    engine_name = f"postgresql://{os.getenv('DB_USER')}:{os.getenv('DB_PASSWORD')}@{os.getenv('DB_HOST')}:{os.getenv('DB_PORT')}/{os.getenv('DB_NAME')}"
    engine = create_engine(engine_name)
    
    version = version+'%'
    elo = elo+'%'

    if(elo == 'ANY%'):
        query_sql = """SELECT * 
        FROM match_data 
        WHERE region = %s 
        AND game_mode = %s 
        AND version LIKE %s"""
        params = (region,game_mode,version)
        
    else:
        query_sql = """SELECT * 
        FROM match_data 
        WHERE region = %s 
        AND game_mode = %s 
        AND elo LIKE %s
        AND version LIKE %s"""
        params = (region, game_mode, elo, version)

    df = pd.read_sql_query(query_sql,con=engine,params=params)
    return df

In [615]:
def verify_data(df = None):
    df = df.dropna()

    all_champions = np.array(['Aatrox', 'Ahri', 'Akali', 'Akshan', 'Alistar', 'Amumu', 'Anivia',
                    'Annie', 'Aphelios', 'Ashe', 'AurelionSol', 'Azir', 'Bard',
                    'Belveth', 'Blitzcrank', 'Brand', 'Braum', 'Briar', 'Caitlyn',
                    'Camille', 'Cassiopeia', 'Chogath', 'Corki', 'Darius', 'Diana',
                    'DrMundo', 'Draven', 'Ekko', 'Elise', 'Evelynn', 'Ezreal',
                    'FiddleSticks', 'Fiora', 'Fizz', 'Galio', 'Gangplank', 'Garen',
                    'Gnar', 'Gragas', 'Graves', 'Gwen', 'Hecarim', 'Heimerdinger',
                    'Hwei', 'Illaoi', 'Irelia', 'Ivern', 'Janna', 'JarvanIV', 'Jax',
                    'Jayce', 'Jhin', 'Jinx', 'KSante', 'Kaisa', 'Kalista', 'Karma',
                    'Karthus', 'Kassadin', 'Katarina', 'Kayle', 'Kayn', 'Kennen',
                    'Khazix', 'Kindred', 'Kled', 'KogMaw', 'Leblanc', 'LeeSin',
                    'Leona', 'Lillia', 'Lissandra', 'Lucian', 'Lulu', 'Lux',
                    'Malphite', 'Malzahar', 'Maokai', 'MasterYi', 'Milio',
                    'MissFortune', 'MonkeyKing', 'Mordekaiser', 'Morgana', 'Naafiri',
                    'Nami', 'Nasus', 'Nautilus', 'Neeko', 'Nidalee', 'Nilah',
                    'Nocturne', 'Nunu', 'Olaf', 'Orianna', 'Ornn', 'Pantheon', 'Poppy',
                    'Pyke', 'Qiyana', 'Quinn', 'Rakan', 'Rammus', 'RekSai', 'Rell',
                    'Renata', 'Renekton', 'Rengar', 'Riven', 'Rumble', 'Ryze',
                    'Samira', 'Sejuani', 'Senna', 'Seraphine', 'Sett', 'Shaco', 'Shen',
                    'Shyvana', 'Singed', 'Sion', 'Sivir', 'Skarner', 'Smolder', 'Sona',
                    'Soraka', 'Swain', 'Sylas', 'Syndra', 'TahmKench', 'Taliyah',
                    'Talon', 'Taric', 'Teemo', 'Thresh', 'Tristana', 'Trundle',
                    'Tryndamere', 'TwistedFate', 'Twitch', 'Udyr', 'Urgot', 'Varus',
                    'Vayne', 'Veigar', 'Velkoz', 'Vex', 'Vi', 'Viego', 'Viktor',
                    'Vladimir', 'Volibear', 'Warwick', 'Xayah', 'Xerath', 'XinZhao',
                    'Yasuo', 'Yone', 'Yorick', 'Yuumi', 'Zac', 'Zed', 'Zeri', 'Ziggs',
                    'Zilean', 'Zoe', 'Zyra'])
    
    blue_team = ['blue_one','blue_two','blue_three','blue_four', 'blue_five']
    red_team = ['red_one', 'red_two', 'red_three', 'red_four', 'red_five']

    blue_team_encoded = np.zeros((len(df),len(all_champions)))
    blue_team_columns = [f"blue_{champ}" for champ in all_champions]
    red_team_encoded = np.zeros((len(df),len(all_champions)))
    red_team_columns = [f"red_{champ}" for champ in all_champions]


    for idx,row in df.iterrows():
        for col in blue_team:
            champ = row[col]
            champ_index = np.where(all_champions == champ)[0]
            blue_team_encoded[idx][champ_index] = 1

        for col in red_team:
            champ = row[col]
            champ_index = np.where(all_champions == champ)[0]
            red_team_encoded[idx][champ_index] = 1

    blue_team_encoded = pd.DataFrame(blue_team_encoded,columns=blue_team_columns)
    red_team_encoded = pd.DataFrame(red_team_encoded, columns=red_team_columns)

    df = df.drop(columns=['id','match_id','region','game_mode','elo','version'])
    df = df.drop(columns=blue_team)
    df = df.drop(columns=red_team)
    df = pd.concat([df,blue_team_encoded,red_team_encoded],axis=1)

    return df

In [616]:
class DraftAnalysisNN(nn.Module):
    def __init__(self,input_size,output_size,kernel_size):
        super().__init__()
        self.conv1 = nn.Conv2d(1,16,1)
        self.conv2 = nn.Conv2d(16,32,1)
        self.fc1 = nn.Linear(167 * 2 * 32,64)
        self.fc2 = nn.Linear(64,2)
        self.act = nn.ReLU()
    
    def forward(self,input):
        logits = self.act(self.conv1(input))
        logits = self.act(self.conv2(logits))
        logits = logits.view(1,-1)
        logits = self.fc1(logits)
        logits = self.fc2(logits)
        
        output = torch.softmax(logits,dim=1)
        return output

In [617]:
class MatchDataset(Dataset):
    def __init__(self,df):
        blue_team = np.array(df.iloc[:,1:168])
        red_team = np.array(df.iloc[:,168:355])
        self.label = df.iloc[:,0]
        self.input = np.concatenate((blue_team,red_team),axis=1)
        self.data = df

    def __len__(self):
        return len(self.data)
    
    def __getitem__(self,idx):
        blue_team = self.data.iloc[idx,1:168]
        red_team = self.data.iloc[idx,168:335]

        blue_team_tensor = torch.tensor(blue_team.values, dtype=torch.float32).to(device)
        red_team_tensor = torch.tensor(red_team.values, dtype=torch.float32).to(device)

        input = torch.stack((blue_team_tensor,red_team_tensor),dim=0)

        label = self.data.iloc[idx,0]
        label = torch.tensor(label, dtype=torch.float32).to(device)

        return {
            'input': input,
            'label' : label
        }

In [618]:
def split_data(data,batch_size):
    train_indices, test_indices = train_test_split(range(len(data)), test_size=0.2, random_state=42, shuffle=True)
    train_dataset = Subset(data, train_indices)
    test_dataset = Subset(data, test_indices)
    train_loader = DataLoader(train_dataset, batch_size, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size, shuffle=True)

    train_inputs, test_inputs, train_labels, test_labels = train_test_split(data.input, data.label, test_size=0.2, random_state=42, shuffle=True)

    return train_loader,test_loader, train_inputs, test_inputs, train_labels, test_labels

In [619]:
def train_models(train_loader, train_inputs, train_labels, num_epochs):
    SVC_model = LinearSVC(dual = 'auto')
    GNB_model = GaussianNB()
    NN_model = DraftAnalysisNN(1,2,1)
    
    print(train_inputs[0].shape)
    SVC_model.fit(train_inputs,train_labels)
    GNB_model.fit(train_inputs,train_labels)

    criterion = nn.CrossEntropyLoss()
    optimizer = Adam(NN_model.parameters(),lr=1e-4)

    '''
    print(f'Starting training of {num_epochs} epochs')
    NN_model.zero_grad()
    NN_model.train()

    train_losses = []
    for epoch in range(num_epochs):
        train_loss = 0
        for batch in tqdm(train_loader):
            input = batch['input']
            label = batch['label'].long()
            
            print(input)
            print(input.shape)
            output = NN_model(input)
            loss = criterion(output,label)
            train_loss += loss.item()

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        train_loss /= len(train_loader)
        train_losses.append(train_loss)

        print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {train_loss:.4f}')
        '''
    return SVC_model, GNB_model, NN_model


In [620]:
def test_models(SVC_model,GNB_model, NN_model, test_loader, test_inputs, test_labels):
    SVC_predictions = SVC_model.predict(test_inputs)
    GNB_predictions = GNB_model.predict(test_inputs)

    '''
    print('Testing NN')
    NN_model.eval()
    NN_predictions = []
    labels = []
    voting_predictions = []
    with torch.no_grad():
        for batch in tqdm(test_loader):
            input = batch['input']
            label = batch['label'].cpu().numpy()

            output = NN_model(input)
            prediction = torch.argmax(output, dim=1)
            NN_pred = prediction.cpu().numpy()

            NN_predictions.extend(NN_pred)
            labels.extend(label)

            sklearn_input = input.flatten().cpu().numpy().reshape((input.shape[0], -1))
            SVC_pred = SVC_model.predict(sklearn_input)
            GNB_pred = GNB_model.predict(sklearn_input)

            combined_preds = np.stack([NN_pred, SVC_pred, GNB_pred], axis=1)
            voting_result = np.apply_along_axis(lambda x: np.argmax(np.bincount(x)), axis=1, arr=combined_preds)
            voting_predictions.extend(voting_result)
    '''
    SVC_acc = accuracy_score(SVC_predictions, test_labels)
    GNB_acc = accuracy_score(GNB_predictions, test_labels)
    #NN_acc = accuracy_score(NN_predictions,labels)
    #voting_acc = accuracy_score(voting_predictions, labels)
    
    print(f'SVC Accuracy: {SVC_acc:.2f}')
    print(f'GNB Accuracy: {GNB_acc:.2f}')
    #print(f'NN Accuracy: {NN_acc:.2f}')
    #print(f'Voting Accuracy: {voting_acc:.2f}')

In [621]:
def main(region,game_mode,elo,version, batch_size,num_epochs):
    #get_csv(region=region,game_mode=game_mode,patch=patch)
    #data extraction
    df = get_data(region,game_mode,elo,version)
    #data encoding + validation
    df = verify_data(df) 
    data = MatchDataset(df) 
    #build model + training 
    train_loader,test_loader, train_inputs, test_inputs, train_labels, test_labels = split_data(data,batch_size)
    SVC_model, GNB_model, NN_model = train_models(train_loader,train_inputs, train_labels, num_epochs)
    test_models(SVC_model,GNB_model, NN_model, test_loader, test_inputs, test_labels)

main('NA1','ARAM','ANY','14.13', batch_size = 1, num_epochs = 1)

[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 1. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.