In [None]:
%load_ext autoreload
%autoreload 2
import os
import time
import logging
import pandas as pd
from requests import exceptions
from riotwatcher import LolWatcher
import sqlite3
import requests
from requests.exceptions import ConnectionError
from riotwatcher import LolWatcher, ApiError
import tarfile
from dotenv import load_dotenv
load_dotenv()

In [None]:
api_key = os.environ.get('api_key')
api_key

In [None]:
lol_watcher = LolWatcher(api_key)

In [None]:
def get_top_players(region: str) -> list:
    """
    Utilise l'API RiotWatcher pour récupérer les joueurs dans Challenger, Grandmaster et Master.
    Retourne une liste de tous les IDs des invocateurs.

    Args:
        region (str): La région dans laquelle rechercher les joueurs (e.g., 'na1', 'euw1').

    Returns:
        list: Une liste d'IDs des invocateurs des divisions Challenger, Grandmaster et Master.
    """

    # Récupérer les joueurs challengers
    challengers = lol_watcher.league.challenger_by_queue(region, 'RANKED_SOLO_5x5')

    # Récupérer les joueurs grandmasters
    gms = lol_watcher.league.grandmaster_by_queue(region, 'RANKED_SOLO_5x5')

    # Récupérer les joueurs masters
    masters = lol_watcher.league.masters_by_queue(region, 'RANKED_SOLO_5x5')

    # Liste de tous les objets récupérés
    all_top_players = [challengers, gms, masters]

    # Liste pour stocker tous les IDs des invocateurs
    summoner_ids = []

    # Boucle à travers chaque division et concaténation de tous les IDs des invocateurs
    for division in all_top_players:
        for entry in division['entries']:
            summoner_ids.append(entry['summonerId'])

    return summoner_ids

In [None]:
# A = get_top_players('euw1')

In [None]:
import json
def save_list_to_json(my_list: list, filename: str):
    """
    Sauvegarde une liste dans un fichier JSON.

    Args:
        my_list (list): La liste à sauvegarder.
        filename (str): Le nom du fichier JSON.
    """
    with open(filename, 'w') as file:
        json.dump(my_list, file)

# Exemple d'utilisation

In [None]:
# save_list_to_json(A, 'get_top_players.json')

In [None]:
# A[0]

In [None]:
def get_puuid(summoner_ids: list, region: str = 'euw1') -> dict:
    """
    Prend en entrée une liste d'IDs d'invocateurs de l'API Riot et récupère les PUUIDs des utilisateurs.
    Cela est nécessaire car d'autres requêtes nécessitent le PUUID.
    Retourne un objet dict mappant l'ID de l'invocateur au PUUID.

    Args:
        summoner_ids (list): Liste des IDs des invocateurs.
        region (str, optional): La région dans laquelle rechercher les joueurs (par défaut 'euw1').

    Returns:
        dict: Un dictionnaire mappant les IDs des invocateurs aux PUUIDs.
    """

    # Dictionnaire pour stocker les valeurs
    summid_to_puuid = {}

    # Boucle à travers chaque ID d'invocateur et récupération du PUUID correspondant
    for summoner in summoner_ids:
        summid_to_puuid[summoner] = lol_watcher.summoner.by_id(region, summoner)['puuid']

    return summid_to_puuid

In [None]:
# B = get_puuid(A, 'euw1')

In [None]:
# save_list_to_json(B, 'get_puuid.json')

In [None]:
# key = cle = next(iter(B))
# key

In [None]:
# value = B[key]
# value

In [None]:
def get_champ_mastery(summoner_ids: list, summid_to_puuid: dict, region: str = 'euw1', points: int = 100000) -> dict:
    """
    Prend en entrée une liste d'IDs d'invocateurs et un dictionnaire mappant les IDs des invocateurs aux PUUIDs,
    et récupère l'ID du champion pour chaque champion ayant plus de 'points'.
    La valeur par défaut pour 'points' est 100 000.
    Retourne un dictionnaire mappant les PUUIDs à une liste d'IDs de champions.

    Args:
        summoner_ids (list): Liste des IDs des invocateurs.
        summid_to_puuid (dict): Dictionnaire mappant les IDs des invocateurs aux PUUIDs.
        region (str, optional): La région dans laquelle rechercher les joueurs (par défaut 'euw1').
        points (int, optional): Le nombre minimum de points de maîtrise pour inclure un champion (par défaut 100 000).

    Returns:
        dict: Un dictionnaire mappant les PUUIDs à une liste d'IDs de champions.
    """

    # Crée un dictionnaire pour stocker les maîtrises de champions pour chaque invocateur par PUUID
    mastery_dict = {}

    # Remplir le dictionnaire avec les maîtrises de champions
    for puuid in summid_to_puuid.values():
        time.sleep(1.3)
        # Faire une requête pour les maîtrises de champions, stocker dans une variable
        response = requests.get(f'https://{region}.api.riotgames.com/lol/champion-mastery/v4/champion-masteries/by-puuid/{puuid}?api_key={api_key}')
        
        masteries = response.json()
        # Convertir la réponse en DataFrame pour filtrer les champions avec des points de maîtrise élevés
        df = pd.DataFrame(masteries)

        # Ajouter les champions ayant plus de 'points' de maîtrise au dictionnaire
        high_mastery_champs = df.query(f'championPoints > {points}')['championId'].tolist()
        mastery_dict[puuid] = high_mastery_champs

    return mastery_dict

In [None]:
def load_list_from_json(filename: str) -> list:
    """
    Charge une liste à partir d'un fichier JSON.

    Args:
        filename (str): Le nom du fichier JSON.

    Returns:
        list: La liste chargée.
    """
    with open(filename, 'r') as file:
        my_list = json.load(file)
    return my_list


In [None]:
sum_ids_json = load_list_from_json('get_top_players.json')

In [None]:
puuid_json = load_list_from_json('get_puuid.json')

In [None]:
def get_first_n_items(my_dict: dict, n: int) -> dict:
    """
    Extrait les n premières clés et valeurs d'un dictionnaire.

    Args:
        my_dict (dict): Le dictionnaire d'origine.
        n (int): Le nombre d'éléments à extraire.

    Returns:
        dict: Un dictionnaire contenant les n premiers éléments.
    """
    return {key: my_dict[key] for key in list(my_dict.keys())[:n]}

In [None]:
puuid_json_10 = get_first_n_items(puuid_json,10)

In [None]:
# valeur du 1er élément
puuid_json[next(iter(puuid_json))]

In [None]:
region = 'euw1'


# summoner_ids = get_top_players(region=region)
# logging.info(f"Top players stored: {len(summoner_ids)} entries.")


# summid_to_puuid = get_puuid(summoner_ids=summoner_ids)
# logging.info("puuids retrieved.")

# mastery_dict = get_champ_mastery(summoner_ids=sum_ids_json, summid_to_puuid=puuid_json)

get_champ_mastery a pris 388 m 36.5 s

In [None]:
# save_list_to_json(mastery_dict, 'mastery_dict.json')

In [None]:
mastery_dict = load_list_from_json('mastery_dict.json')

In [None]:
def get_match_data(mastery_dict, num_matches=10, region ='europe'):
    '''
    takes in mastery_dict and returns a list of dicts of match data,
    as well as a set of all match IDs scanned
    num_matches: between 1-100
    '''

    #create list to store dict objects
    data_rows = []

    #store set of matches already looked through
    matches_scanned = set()

    #list of features we want to record
    features = ['puuid', 'championId', 'item0', 'item1', 'item2', 'item3', 'item4',
                'item5', 'item6', 'kills', 'deaths', 'assists', 'totalDamageDealtToChampions',
                'role', 'teamPosition', 'teamId', 'gameEndedInEarlySurrender', 'win',
                'longestTimeSpentLiving', 'neutralMinionsKilled', 'needVisionPings',
                'sightWardsBoughtInGame', 'timeCCingOthers', 'totalDamageShieldedOnTeammates',
                'totalAllyJungleMinionsKilled', 'totalEnemyJungleMinionsKilled', 'totalHealsOnTeammates',
                'totalMinionsKilled', 'turretKills', 'turretTakedowns', 'visionScore', 'visionClearedPings',
                'visionWardsBoughtInGame', 'wardsKilled', 'wardsPlaced']
                
    features_challenges = ['controlWardsPlaced', 'damageTakenOnTeamPercentage', 'dodgeSkillShotsSmallWindow', 'firstTurretKilled',
                           'earlyLaningPhaseGoldExpAdvantage', 'laningPhaseGoldExpAdvantage', 'junglerKillsEarlyJungle',
                           'maxCsAdvantageOnLaneOpponent', 'maxLevelLeadLaneOpponent', 'killsOnLanersEarlyJungleAsJungler',
                           'acesBefore15Minutes', 'killParticipation', 'laneMinionsFirst10Minutes', 'pickKillWithAlly',
                           'quickFirstTurret', 'quickSolokills', 'skillshotsDodged', 'skillshotsHit', 'takedownsAfterGainingLevelAdvantage',
                           'teamDamagePercentage', 'turretPlatesTaken', 'kTurretsDestroyedBeforePlatesFall', 'wardTakedowns',
                           'wardTakedownsBefore20M', 'wardsGuarded'
                ]

    #expecting API errors
    for key, value in mastery_dict.items():

        #store matchlist for each puuid
        # try:
            # match_list = lol_watcher.match.matchlist_by_puuid(region, key, count = num_matches)
        response = requests.get(f'https://{region}.api.riotgames.com/lol/match/v5/matches/by-puuid/{key}/ids?start=0&count={num_matches}&api_key={api_key}')
        match_list = response.json()
        # except ApiError as e:
        #     if e.response.status_code == 429:
        print(f'match_list : {match_list}')
        if isinstance(match_list, dict):
            # Si c'est un dictionnaire, vérifiez le code de statut
            if 'status' in match_list and match_list['status'].get('status_code') == 429:
                print(f"Rate limit exceeded: {match_list['status']['status_code']}. Waiting 120s")
                time.sleep(120)
            #     print("bad or expired API key, paste new one here:")
            #     api_key = input()
            #     update_key(api_key=api_key)
            #     match_list = lol_obj.lol_watcher.match.matchlist_by_puuid(region, key, count = num_matches)
            # else:
                # print(f"{match_list['status']['status_code']}: Waiting 10s")
                # time.sleep(10)
                response = requests.get(f'https://{region}.api.riotgames.com/lol/match/v5/matches/by-puuid/{key}/ids?start=0&count={num_matches}&api_key={api_key}')
                match_list = response.json()

        # except ConnectionError as e:
        #     print(f"Connection Error, waiting 10s then resuming")
        #     time.sleep(10)
        #     response = requests.get(f'https://{region}.api.riotgames.com/lol/match/v5/matches/by-puuid/{key}/ids?start=0&count={num_matches}&api_key={api_key}')
        #     match_list = response.json()
        # print(f'match_list : {match_list}')
        
        for match in match_list:
            if match not in matches_scanned:

                #store match data in variable
                # try:
                # match_data = lol_watcher.match.by_id(region, match)
                response = requests.get(f'https://{region}.api.riotgames.com/lol/match/v5/matches/{match}?api_key={api_key}')
                match_data = response.json()
                
                print(f'match_data : {match_data}')
                
                if isinstance(match_data, dict):
                    # Si c'est un dictionnaire, vérifiez le code de statut
                    if 'status' in match_data and match_data['status'].get('status_code') == 429:
                        print(f"Rate limit exceeded: {match_data['status']['status_code']}. Waiting 120s")
                        time.sleep(120)
                    #     print("bad or expired API key, paste new one here:")
                    #     api_key = input()
                    #     update_key(api_key=api_key)
                    #     match_data = lol_watcher.match.by_id(region, match)
                    # else:
                        # print("Connection error, waiting 10s then resuming operation")
                        # time.sleep(10)
                        response = requests.get(f'https://{region}.api.riotgames.com/lol/match/v5/matches/{match}?api_key={api_key}')
                        match_data = response.json()

                # except ConnectionError as e:
                #     print(f"Connection Error, waiting 10s then resuming")
                #     time.sleep(10)
                #     # match_data = lol_watcher.match.by_id(region, match)
                #     response = requests.get(f'https://{region}.api.riotgames.com/lol/match/v5/matches/{match}?api_key={api_key}')
                #     match_data = response.json()
                
                #store participant information in variable to iterate over (list of dicts) if classic game
                
                if match_data['info']['gameMode'] == 'CLASSIC':
                    player_info = match_data['info']['participants']
                    #create dict of champs on team1, team2
                    champions_in_game = {}
                    champions_in_game[100] = []
                    champions_in_game[200] = []
                ### CODE DE BASE QUI NOUS INTERESSE ###
                    # for player in player_info:
                    #     #add champ played to dict
                    #     champions_in_game[player['teamId']].append(player['championId'])
                    #     #check to see if player in our list of masters+ players
                    #     if player['puuid'] in mastery_dict.keys():
                    #         #check to see if player on a high mastery champ
                    #         if player['championId'] in mastery_dict[player['puuid']]:
                    #             #get player data, store in dictionary
                    #             player_data = {}
                    #             for feature in features:
                    #                 player_data[feature] = player[feature]
                    #             player_data['patch'] = match_data['info']['gameVersion']
                    #             player_data['match_id'] = match
                    #             player_data['champions_in_game'] = champions_in_game
                                
                    #             #append dictionary to list
                    #             data_rows.append(player_data)
                ### FIN DU CODE QUI NOUS INTERESSE ###
                
                # Parcours des joueurs dans player_info
                    for idx, player in enumerate(player_info):  # Ajout de 'enumerate' pour obtenir l'index du joueur
                        # Ajouter le champion joué au dictionnaire 'champions_in_game'
                        champions_in_game[player['teamId']].append(player['championId'])
                        # Vérifier si le joueur est dans la liste des joueurs avec maîtrise
                        # print(player['puuid'] in mastery_dict.keys())
                        if player['puuid'] in mastery_dict.keys():
                            # Vérifier si le joueur utilise un champion avec une haute maîtrise
                            if player['championId'] in mastery_dict[player['puuid']]:
                                # Obtenir les données du joueur, stocker dans un dictionnaire
                                player_data = {}
                                # Ajouter les variables principales du joueur à 'player_data'
                                for feature in features:
                                    player_data[feature] = player[feature]
                                # Ajouter des informations supplémentaires
                                player_data['patch'] = match_data['info']['gameVersion']
                                player_data['match_id'] = match
                                player_data['champions_in_game'] = champions_in_game
                                # Ajouter les variables issues de 'challenges' avec suffixe pour chaque participant
                                for challenge in features_challenges:
                                    player_data[challenge] = player['challenges'].get(challenge, None)  # Utilisation de .get() pour éviter les KeyError si la variable n'existe pas

                                # Ajouter le dictionnaire 'player_data' à la liste 'data_rows'
                                data_rows.append(player_data)
                
                
                    # for challenge in player_challenges:
                        
                    #print out to watch progress
                    #print('champion ID: ',player_data['championId'],', win:',player_data['win'])
                    #print('champs in game: ',player_data['champions_in_game'])

                #append match_id to matches_scanned set
                matches_scanned.add(match)

    return data_rows, matches_scanned

In [None]:
# key = list(mastery_dict.keys())[0]

In [None]:
# response = requests.get(f'https://europe.api.riotgames.com/lol/match/v5/matches/by-puuid/{key}/ids?start=0&count={10}&api_key={api_key}')
# match = response.json()
# print(match)
# response = requests.get(f'https://europe.api.riotgames.com/lol/match/v5/matches/{match[0]}?api_key={api_key}')
# match_data = response.json()
# match_data

In [None]:
# with open('match_data_test.json', 'w') as fichier_json:
#     json.dump(match_data, fichier_json)

In [None]:
with open('match_data_test.json', 'r') as fichier_json:
    match_data = json.load(fichier_json)

In [None]:
# match_data['info']['']

In [None]:
features_challenges = ['controlWardsPlaced', 'damageTakenOnTeamPercentage', 'dodgeSkillShotsSmallWindow', 'firstTurretKilled',
                           'earlyLaningPhaseGoldExpAdvantage', 'laningPhaseGoldExpAdvantage', 'junglerKillsEarlyJungle',
                           'maxCsAdvantageOnLaneOpponent', 'maxLevelLeadLaneOpponent', 'killsOnLanersEarlyJungleAsJungler',
                           'acesBefore15Minutes', 'killParticipation', 'laneMinionsFirst10Minutes', 'pickKillWithAlly',
                           'quickFirstTurret', 'quickSolokills', 'skillshotsDodged', 'skillshotsHit', 'takedownsAfterGainingLevelAdvantage',
                           'teamDamagePercentage', 'turretPlatesTaken', 'kTurretsDestroyedBeforePlatesFall', 'wardTakedowns',
                           'wardTakedownsBefore20M', 'wardsGuarded', 
                ]

In [None]:
player_info = match_data['info']['participants']
player_info

In [None]:
for i in player_info:
    print(i)

In [None]:
for i,j in enumerate(player_info):
    print(i)
    print(j['puuid'] in mastery_dict.keys())

In [None]:
mastery_dict

In [None]:
mastery_dict_10 = get_first_n_items(mastery_dict,10)

In [None]:
a,b = get_match_data(mastery_dict=mastery_dict_10, num_matches=1)


In [None]:
a

In [None]:
# data_rows, matches_scanned = get_match_data(mastery_dict=get_first_n_items(mastery_dict,1000), num_matches=10)


In [None]:
# list(matches_scanned)

In [None]:
# save_list_to_json(data_rows, 'data_rows.json')
# with open('matches_scanned.json', 'w') as fichier_json:
#     json.dump(list(matches_scanned), fichier_json)

In [None]:
data_rows = load_list_from_json('data_rows.json')

In [None]:
matches_scanned = load_list_from_json('matches_scanned.json')

In [None]:
def match_to_df(data_rows):
    '''
    converts data_rows (list of dicts) into dataframe, and manipulates columns to be sql-supported datatypes.
    '''

    df = pd.DataFrame.from_dict(data_rows)
    #drop where teamPosition empty
    df = df[df['teamPosition'] != '']
    #drop where game ended in early surrender
    df = df[df['gameEndedInEarlySurrender'] == False]

    #lets construct columns from the teamId and champions_in_game column

    #new column, list of champions on player's team
    df['teammates_championId'] = df.apply(lambda x: x['champions_in_game'].get(x['teamId']), axis=1)

    #new column, list of enemy champions
    opposite_team_dict = {100:200, 200:100}
    df['opposite_team_id'] = df['teamId'].map(opposite_team_dict)
    df['enemies_championId'] = df.apply(lambda x: x['champions_in_game'].get(x['opposite_team_id']), axis=1)

    #split list into individual columns
    player_cols = ["enemies_championId", "teammates_championId"]
    for col in player_cols:
        temp_df = df[col].apply(pd.Series)
        temp_df = temp_df.add_prefix(col[:-10])
        df = pd.concat([df, temp_df], axis=1)

    #drop redundant columns
    df = df.drop(labels=["teammates_championId", "enemies_championId"], axis=1)
    df = df.drop(labels=["champions_in_game","opposite_team_id"], axis=1)

    return df

In [None]:
data_rows

In [None]:
df_match = match_to_df(data_rows=data_rows)

In [None]:
df_match

In [None]:
def download_json(url, file_path):
    response = requests.get(url)
    with open(file_path, 'wb') as f:
        f.write(response.content)
    print(f"Downloaded and saved new JSON file at {file_path}")


In [None]:
def get_datadragon_version(local_file_path):
    # Set URL and local JSON file paths
    url = "https://ddragon.leagueoflegends.com/api/versions.json"
    local_file_path = "data/version.json"

    # Download JSON if local file doesn't exist
    if not os.path.exists(local_file_path):
        download_json(url, local_file_path)
    else:
        # Load local and remote JSON files
        with open(local_file_path, 'r') as local_file:
            local_data = json.load(local_file)

        remote_data = requests.get(url).json()

        # Compare local and remote JSON files
        if local_data != remote_data:
            download_json(url, local_file_path)
        else:
            print("Local JSON file is already up-to-date.")


In [None]:
local_file_path = "data/version.json"
get_datadragon_version(local_file_path)

In [None]:
def download_tarball(url, file_path):
    response = requests.get(url)
    with open(file_path, 'wb') as f:
        for chunk in response.iter_content(chunk_size=8192):
            if chunk:
                f.write(chunk)

def extract_tarball(file_path, version):
    with tarfile.open(file_path, 'r:gz') as tar:
        tar.extractall(path=f"data/{version}")

In [None]:
def get_datadragon(local_file_path):
    #get version
    with open(local_file_path, 'r') as local_file:
        data = json.load(local_file)
    version = data[0]
    
    #check to see if latest already downloaded
    if os.path.exists(f"data/{version}"):
        print(f"Latest Data Dragon folder present. If you believe this is an error, delete the folder at data/{version} in this project directory and re-run this script.")
        return 0

    #set url, tar path
    url = f"https://ddragon.leagueoflegends.com/cdn/dragontail-{version}.tgz"
    tar_path = f"data/{version}.tgz"

    #download tarball
    print(f"Downloading datadragon version {version}")
    download_tarball(url=url, file_path=tar_path)

    print(f"Unpacking tarball...")
    extract_tarball(file_path=tar_path, version=version)

    print("Deleting tarball...")
    os.remove(tar_path)
    print("Great Success!")

In [None]:
get_datadragon(local_file_path)

In [None]:
# def df_to_sql(df, database='data/matches.db', table_name='player_items_champions'):
#     '''
#     stores dataframe into a sql database. appends data to table if table already exists.
#     '''
#     conn = sqlite3.connect(database)
#     df.to_sql(name="player_items_champions", con=conn, if_exists='append', index=False)


In [None]:
def ajouter_colonnes_manquantes(df, conn, table_name='player_items_champions'):
    """
    Vérifie s'il manque des colonnes dans la table SQL et les ajoute si nécessaire.
    """
    cursor = conn.cursor()
    
    # Récupérer les colonnes existantes dans la table SQL
    cursor.execute(f"PRAGMA table_info({table_name});")
    colonnes_existantes = [info[1] for info in cursor.fetchall()]
    
    # Obtenir les colonnes du DataFrame
    colonnes_df = df.columns
    
    # Identifier les colonnes manquantes
    colonnes_manquantes = set(colonnes_df) - set(colonnes_existantes)
    
    # Ajouter les colonnes manquantes dans la table SQL
    for colonne in colonnes_manquantes:
        # Ici, je suppose que toutes les colonnes manquantes sont de type TEXT. Tu peux ajuster les types en fonction de tes besoins.
        ajouter_colonne_sql = f"ALTER TABLE {table_name} ADD COLUMN {colonne} TEXT;"
        cursor.execute(ajouter_colonne_sql)
        print(f"Colonne manquante ajoutée : {colonne}")

    conn.commit()

In [None]:
# Exemple d'utilisation avec la fonction pour stocker les données dans la base
def df_to_sql(df, database='data/matches.db', table_name='player_items_champions'):
    """
    Stocke le dataframe dans une base de données SQL. Ajoute des colonnes si elles n'existent pas.
    """
    conn = sqlite3.connect(database)
    
    # Ajouter les colonnes manquantes si nécessaire
    ajouter_colonnes_manquantes(df, conn, table_name)
    
    # Ensuite, insérer les données dans la table
    df.to_sql(name=table_name, con=conn, if_exists='append', index=False)
    
    print("Données stockées dans la base SQL")
    conn.close()

In [None]:
df_to_sql(df=df_match)
logging.info("Stored in sql database")

In [None]:
def ouvrir_table_sql(database='data/matches.db', table_name='player_items_champions'):
    # Connexion à la base de données
    conn = sqlite3.connect(database)
    
    # Lire la table SQL dans un DataFrame pandas
    df = pd.read_sql_query(f"SELECT * FROM {table_name}", conn)
    
    # Fermer la connexion
    conn.close()
    
    return df


In [None]:
# create_csv = pd.DataFrame(columns= ['id'])
# create_csv.to_csv('data/champ_matrix_filled.csv')

In [None]:
def load_data(db='data/matches.db',table="player_items_champions"):
    """Load data from database and return as pandas dataframe"""
    conn = sqlite3.connect(db)
    df = pd.read_sql(f"SELECT * FROM {table}", conn)
    conn.close()
    return df


In [None]:
def create_champ_df(version_filepath='data/version.json', feature_filepath='data/champ_matrix_filled.csv', save=True):
    """Create dataframe of champions and their attributes"""

    # open json file, get version
    f = open(version_filepath)
    version = json.load(f)[0]

    # open json file, get data
    f = open(f'data/{version}/{version}/data/fr_FR/champion.json', encoding="utf8")
    champ_data = json.load(f)['data']

    # define features we want to keep
    features = ['version','id','key','name', 'info', 'tags']

    #create a list of dictionaries, each dictionary is a champion
    champ_list = []
    for key, value in champ_data.items():
        champ_dict = {}
        temp = value
        for feature in features:
            champ_dict[feature] = temp[feature]
            if feature == 'info':
                for key, value in value[feature].items():
                    champ_dict[key] = value
        champ_list.append(champ_dict)

    # create dataframe from list of dictionaries
    champ_df = pd.DataFrame().from_dict(champ_list)
    champ_df = champ_df.drop(labels=['info'], axis=1)

    #load in manually-defined feature csv and join with current dataframe
    champ_features = ['version','id','mobility','poke','sustained','burst','engage','disengage','healing']
    try:
        temp_df = pd.read_csv('data/champ_matrix_filled.csv')
    except:
        print("Self-annotated data not found at data/champ_matrix_filled.csv. Create this file using the instructions from the github repository, or download it.")
        raise

    #return any champions present in local datadragon files but not in our manually-created feature matrix
    new_champs = list(set(champ_df['id']).difference(set(temp_df['id'])))
    changelist = {}
    for champ in new_champs:
        champ_entry = pd.DataFrame({"id": [champ], "version": [version]})
        print(champ_entry)
        temp_df = pd.concat([temp_df, champ_entry], ignore_index=True)

        print(f"New champion {champ} needs features added! For each prompt, provide a value from 0-3 for the character, then press enter.\n")
        champ_attr = {}
        for f in champ_features[2:]:
            print(f"{f}: ")
            champ_attr[f] = int(input()) #TODO: Add input validation
            temp_df.loc[temp_df['id'] == champ, f] = champ_attr[f]

        changelist[champ] = champ_attr

        if save:
            temp_df.to_csv('data/champ_matrix_filled.csv', index=False)



    temp_df = temp_df[champ_features]

    champ_df = champ_df.merge(temp_df, how="left", on="id")
    champ_df["version"] = champ_df["version_x"]
    champ_df = champ_df.drop(labels=["version_x", "version_y"], axis=1)

    #add new champ features
    for champ, attr in changelist.items():
        for key, value in attr.items():
            champ_df.loc[champ_df['id'] == champ, key] = value
        print(f"Updated entry for {champ}!")

    #one hot encode the tags column, and sum to get back to original row shape
    temp_df = pd.get_dummies(champ_df['tags'].explode(), columns=['tags'])
    temp_df = temp_df.groupby(temp_df.index).sum()

    #merge temp_df with champ_df
    champ_df = pd.concat([champ_df, temp_df],axis=1)

    #transform key column to int
    champ_df['key'] = champ_df['key'].astype(int)

    #drop tags and difficulty columns
    champ_df = champ_df.drop(labels=['tags','difficulty'], axis=1)

    #move version column back to front
    cols = champ_df.columns.tolist()
    cols.remove('version')
    cols.insert(0, 'version')
    champ_df = champ_df[cols]

    if save:
        champ_df.to_csv('data/champ_df.csv', index=False)

    return champ_df

In [None]:
def get_summed_features(df, champ_df):
    """Get summed features for ally and enemy teams"""

    #champ_df indices we want
    cols = champ_df.columns[4:].to_list()

    #create unique for ally and enemy sums
    ally_cols = ["ally_" + x for x in cols]
    enemy_cols = ["enemy_" + x for x in cols]

    #new dataframe to store vals in
    summed_features = pd.DataFrame(columns=ally_cols+enemy_cols)

    for index, row in df.iterrows():
        #enemies list
        enemy_ids = row[21:26].to_list()
        #ally list
        ally_ids = row[26:31].to_list()

        #list of vals to fill
        ally_stats = champ_df[champ_df['key'].isin(ally_ids)].sum()[4:].to_list()
        enemy_stats = champ_df[champ_df['key'].isin(enemy_ids)].sum()[4:].to_list()

        stats = ally_stats + enemy_stats
        summed_features.loc[len(summed_features)] = stats

    #merge with match_ids
    df = pd.concat([df, summed_features], axis=1)

    #create KDA column
    df['kda'] = (df['kills'] + df['assists']) / df['deaths']
    df.loc[df['deaths'] == 0, 'kda'] = df['kills'] + df['assists'] #where deaths = 0, set kd_ratio to kills + assists

    #move the kda column to the front
    column_to_move = df.pop("kda") #remove column
    #insert column at position 10
    df.insert(10, "kda", column_to_move)

    #return dataframe
    return df

In [None]:
def save_to_db(df, db='data/matches.db', name="match_features"):
    conn = sqlite3.connect(db)
    df.to_sql(name, conn, if_exists="append", index=False)
    conn.close()

In [None]:
logging.basicConfig(level=logging.DEBUG)
logging.info("Loading data...")
df = load_data()

In [None]:
logging.info("Creating features...")
champ_df = create_champ_df()

In [None]:
def get_summed_features(df, champ_df):
    """Get summed features for ally and enemy teams"""

    #champ_df indices we want
    cols = champ_df.columns[4:].to_list()

    #create unique for ally and enemy sums
    ally_cols = ["ally_" + x for x in cols]
    enemy_cols = ["enemy_" + x for x in cols]

    #new dataframe to store vals in
    summed_features = pd.DataFrame(columns=ally_cols+enemy_cols)

    for index, row in df.iterrows():
        #enemies list
        enemy_ids = row[21:26].to_list()
        #ally list
        ally_ids = row[26:31].to_list()

        #list of vals to fill
        ally_stats = champ_df[champ_df['key'].isin(ally_ids)].sum()[4:].to_list()
        enemy_stats = champ_df[champ_df['key'].isin(enemy_ids)].sum()[4:].to_list()

        stats = ally_stats + enemy_stats
        summed_features.loc[len(summed_features)] = stats

    #merge with match_ids
    df = pd.concat([df, summed_features], axis=1)

    #create KDA column
    df['kda'] = (df['kills'] + df['assists']) / df['deaths']
    df.loc[df['deaths'] == 0, 'kda'] = df['kills'] + df['assists'] #where deaths = 0, set kd_ratio to kills + assists

    #move the kda column to the front
    column_to_move = df.pop("kda") #remove column
    #insert column at position 10
    df.insert(10, "kda", column_to_move)

    #return dataframe
    return df

In [None]:
# df = get_summed_features(df, champ_df)

In [None]:
# logging.info("Saving to database...")
# save_to_db(df)

In [None]:
df = load_data(table="match_features")

In [None]:
def normalize_df(df):
    """Normalize the dataframe, keep min and max stored for normalizing new entries"""
    #normalize our columns
    df_scaled = df.copy()
    #store max, min in dict
    norm_dict = {}
    for column in df.columns[31:]:
        print(f"column : {column}")
        norm_dict[column] = [df_scaled[column].max(), df_scaled[column].min()]
        df_scaled[column] = (df_scaled[column] - df_scaled[column].min()) / (df_scaled[column].max() - df_scaled[column].min())

    return df_scaled, norm_dict

In [None]:
df

In [None]:
df.columns[31:]

In [None]:
df.fillna(0, inplace=True)

In [None]:
df['wardTakedowns']

In [None]:
print('Normalizing dataframe...')
df_scaled, norm_dict = normalize_df(df)

In [None]:
import numpy as np
import pandas as pd
import json
import os
import pwd
import platform
import argparse
import requests
from scipy.spatial import KDTree
from feature_build import load_data

# Disable SSL warnings
requests.packages.urllib3.disable_warnings()


In [None]:
#create list to store dict objects
data_rows = []
            
#store set of matches already looked through
matches_scanned = set()

#list of features we want to record
features = ['puuid', 'championId', 'item0', 'item1', 'item2', 'item3', 'item4', 'item5', 'item6', 
            'kills', 'deaths', 'assists', 'totalDamageDealtToChampions', 'role', 'teamPosition', 'teamId', 'gameEndedInEarlySurrender', 'win']

retries = 0
max_retries = 10
region = 'euw1'
num_matches = 1
mastery_dict = C

#expecting API errors
# while retries < max_retries:
    # for key, value in mastery_dict.items():
for key in mastery_dict['puuid']:
    #store matchlist for each puuid
    match_list = lol_watcher.match.matchlist_by_puuid(region, key, count = num_matches)
        


In [None]:
        for match in match_list:
            if match not in matches_scanned:
                #store match data in variable
                match_data = lol_watcher.match.by_id(region, match)
                #store participant information in variable to iterate over (list of dicts) if classic game
                if match_data['info']['gameMode'] == 'CLASSIC':
                    player_info = match_data['info']['participants']
                    #create dict of champs on team1, team2
                    champions_in_game = {}
                    champions_in_game[100] = []
                    champions_in_game[200] = []
                    for player in player_info:
                        #add champ played to dict
                        champions_in_game[player['teamId']].append(player['championId'])
                        #check to see if player in our list of masters+ players
                        # if player['puuid'] in mastery_dict.keys(): 
                        if player['puuid'] in list(mastery_dict['puuid']): 
                            #check to see if player on a high mastery champ
                            # if player['championId'] in mastery_dict[player['puuid']]:
                            if player['championId'] in list(mastery_dict['championId']):
                                #get player data, store in dictionary
                                player_data = {}
                                for feature in features:
                                    player_data[feature] = player[feature]
                                player_data['patch'] = match_data['info']['gameVersion']
                                player_data['match_id'] = match
                                player_data['champions_in_game'] = champions_in_game
                                #append dictionary to list
                                data_rows.append(player_data)
                                
                    #print out to watch progress
                    #print('champion ID: ',player_data['championId'],', win:',player_data['win'])
                    #print('champs in game: ',player_data['champions_in_game'])
                                
                #append match_id to matches_scanned set
                matches_scanned.add(match)
                #reset retries
                retries = 0

In [None]:
def get_match_data(mastery_dict, num_matches=10, region = 'euw1'):
    '''
    takes in mastery_dict and returns a list of dicts of match data, as well as a set of all match IDs scanned
    num_matches: between 1-100
    '''

    #create list to store dict objects
    data_rows = []
                
    #store set of matches already looked through
    matches_scanned = set()

    #list of features we want to record
    features = ['puuid', 'championId', 'item0', 'item1', 'item2', 'item3', 'item4', 'item5', 'item6', 
                'kills', 'deaths', 'assists', 'totalDamageDealtToChampions', 'role', 'teamPosition', 'teamId', 'gameEndedInEarlySurrender', 'win']
    
    retries = 0
    max_retries = 10
    
    #expecting API errors
    while retries < max_retries:
        try:
            # for key, value in mastery_dict.items():
            for key in mastery_dict['puuid']:
                #store matchlist for each puuid
                match_list = lol_watcher.match.matchlist_by_puuid(region, key, count = num_matches)
                for match in match_list:
                    if match not in matches_scanned:
                        #store match data in variable
                        match_data = lol_watcher.match.by_id(region, match)
                        #store participant information in variable to iterate over (list of dicts) if classic game
                        if match_data['info']['gameMode'] == 'CLASSIC':
                            player_info = match_data['info']['participants']
                            #create dict of champs on team1, team2
                            champions_in_game = {}
                            champions_in_game[100] = []
                            champions_in_game[200] = []
                            for player in player_info:
                                #add champ played to dict
                                champions_in_game[player['teamId']].append(player['championId'])
                                #check to see if player in our list of masters+ players
                                # if player['puuid'] in mastery_dict.keys(): 
                                if player['puuid'] in list(mastery_dict['puuid']): 
                                    #check to see if player on a high mastery champ
                                    # if player['championId'] in mastery_dict[player['puuid']]:
                                    if player['championId'] in list(mastery_dict['championId']):
                                        #get player data, store in dictionary
                                        player_data = {}
                                        for feature in features:
                                            player_data[feature] = player[feature]
                                        player_data['patch'] = match_data['info']['gameVersion']
                                        player_data['match_id'] = match
                                        player_data['champions_in_game'] = champions_in_game
                                        #append dictionary to list
                                        data_rows.append(player_data)
                                        
                            #print out to watch progress
                            #print('champion ID: ',player_data['championId'],', win:',player_data['win'])
                            #print('champs in game: ',player_data['champions_in_game'])
                                        
                        #append match_id to matches_scanned set
                        matches_scanned.add(match)
                        #reset retries
                        retries = 0
        #error handling
        except exceptions.Forbidden as e:
            logging.error(f"Error: {e}")
            retries += 1
            time.sleep(3)
            continue

        except exceptions.ServiceUnavailable as e:
            logging.error(f"Error: {e}")
            retries += 1
            time.sleep(3)
            continue

    return data_rows, matches_scanned

In [None]:
data_rows, matches_scanned = get_match_data(mastery_dict=mastery_dict, num_matches=20)

In [None]:
def match_to_df(data_rows):
    '''
    converts data_rows (list of dicts) into dataframe, and manipulates columns to be sql-supported datatypes.
    '''

    df = pd.DataFrame.from_dict(data_rows)
    #drop where teamPosition empty
    df = df[df['teamPosition'] != '']
    #drop where game ended in early surrender
    df = df[df['gameEndedInEarlySurrender'] == False]

    #lets construct columns from the teamId and champions_in_game column

    #new column, list of champions on player's team
    df['teammates_championId'] = df.apply(lambda x: x['champions_in_game'].get(x['teamId']), axis=1)

    #new column, list of enemy champions
    opposite_team_dict = {100:200, 200:100}
    df['opposite_team_id'] = df['teamId'].map(opposite_team_dict)
    df['enemies_championId'] = df.apply(lambda x: x['champions_in_game'].get(x['opposite_team_id']), axis=1)

    #split list into individual columns
    player_cols = ["enemies_championId", "teammates_championId"]
    for col in player_cols:
        temp_df = df[col].apply(pd.Series)
        temp_df = temp_df.add_prefix(col[:-10])
        df = pd.concat([df, temp_df], axis=1)

    #drop redundant columns
    df = df.drop(labels=["teammates_championId", "enemies_championId"], axis=1)
    df = df.drop(labels=["champions_in_game","opposite_team_id"], axis=1)

    return df

In [None]:
def df_to_sql(df, database='matches.db', table_name='player_items_champions'):
    '''
    stores dataframe into a sql database. appends data to table if table already exists.
    '''
    conn = sqlite3.connect(database)
    df.to_sql(name="player_items_champions", con=conn, if_exists='append', index=False)