In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

from data_apps_aws.sql import get_db_engine, get_db_data
from data_apps_aws.utils import make_outside_legend
from data_apps_aws.sql import *

from itables import show
import itables.options as opt
opt.maxBytes = 0


In [None]:
plt.rcParams['figure.figsize'] = 14, 6
sns.set()

### Get player list

In [None]:
db_con = get_db_engine('bfv_data')

In [None]:
query = """
SELECT distinct(player_name)
FROM match_participants
"""

In [None]:
unique_players = get_db_data(query, db_con)
unique_players = unique_players['player_name'].values

In [None]:
len(unique_players)

## Get nationality information

In [None]:
from data_apps_aws.password_manager import get_api_token
from data_apps_aws.sql import upload_df_to_table

import requests
import json
import pandas as pd
import numpy as np
from urllib.parse import quote
import time

In [None]:
api_token = get_api_token('name_prism')

In [None]:
def get_single_player_info(this_player_name, api_token):
    
    # ensure percent encoding
    this_player_str = quote(this_player_name)
    
    # get url
    query_url = f'https://name-prism.com/api_token/nat/json/{api_token}/{this_player_str}'
    
    # download request
    response = requests.request("GET", query_url)
    response_dict = json.loads(response.text)
    
    # transform to DataFrame
    nationality_probs = pd.DataFrame.from_dict(response_dict, orient='index').reset_index()
    nationality_probs.columns = ['nationality', 'prob']
    nationality_probs = nationality_probs.sort_values('prob', ascending=False)
    
    # assert valid probabilities
    assert np.abs(nationality_probs['prob'].sum() - 1).squeeze() < 0.00001, 'Nationality probabilities need to sum up to 1'
    
    return nationality_probs

In [None]:
from tqdm import tqdm

In [None]:
counter = 1
for ii in range(20000):
    
    if (counter % 1000) == 0:
        print(counter)
    
    counter += 1

In [None]:
all_player_info_list = []
all_failed_players = []

unique_players_ss = unique_players
counter = 1

for this_player_name in tqdm(unique_players_ss):
    
    try:
        nationality_probs = get_single_player_info(this_player_name, api_token)
        
        nationality_probs['player_name'] = this_player_name
        all_player_info_list.append(nationality_probs)
        
    except:
        
        all_failed_players.append(this_player_name)
        
    # occassional backups
    if (counter % 1_000) == 0:
        all_player_info_df = pd.concat(all_player_info_list, axis=0).reset_index(drop=True)
        all_player_info_df.to_csv(f'player_info_bkup_{counter}.csv', index=False)
        
    # comply with max API calls
    time.sleep(0.5)
    counter += 1


In [None]:
all_player_info_df = pd.concat(all_player_info_list, axis=0).reset_index(drop=True)

In [None]:
all_player_info_df.shape

In [None]:
all_player_info_df.head(3)

In [None]:
all_player_info_df.to_csv(f'name_prism_player_info.csv', index=False)

In [None]:
all_failed_players_df = pd.DataFrame(all_failed_players, columns=['player_name'])
all_failed_players_df.shape

In [None]:
all_failed_players_df.to_csv('name_prism_failed_players.csv', index=False)

## Save data to database

In [None]:
all_player_info_df = pd.read_csv(f'name_prism_player_info.csv')
all_failed_players_df = pd.read_csv('name_prism_failed_players.csv')

In [None]:
upload_df_to_table(all_player_info_df, 'nameprism_player_info', db_con)

In [None]:
upload_df_to_table(all_failed_players_df, 'nameprism_failed_players', db_con)