In [None]:
import pandas as pd
import requests
import numpy as np
import json
from datetime import *
import ndjson
import time

## Loading Teams

In [None]:
popular_teams_url = "https://lichess.org/api/team/all"
response = requests.get(popular_teams_url)
popular_teams_json = response.json()

In [None]:
team_ids = []
for team in popular_teams_json['currentPageResults']:
    ID = team['id']
    team_ids.append(ID)

In [None]:
team_ids

## Loading Players

In [None]:
team_id = ""#"lichess-swiss"
team_members_url = f"https://lichess.org/api/team/{team_id}/users"
response = requests.get(team_members_url)
items = response.json(cls=ndjson.Decoder)
#df = pd.read_json(response.content,orient='records',lines=True).sort_values("createdAt")
minn_created_date = 1564341571930 # July 28 2019
usernames = [item.get('username') for item in items if item.get('createdAt') <= minn_created_date]
usernames[:10]

In [None]:
df_usernames = pd.DataFrame(usernames)
#df_usernames.to_csv("data/lichess_swiss_users.csv",index=False)
df_usernames.head()

## Loading Ratings

In [None]:
df_usernames = pd.read_csv("data/lichess_swiss_users.csv")
df_usernames.head()

In [None]:
df_usernames.shape

In [None]:
username = "HumanSponge"
url = f'https://lichess.org/api/user/{username}/rating-history'

In [None]:
response = requests.get(url)

In [None]:
response.status_code

In [None]:
response_json = response.json()

In [None]:
rating_histories = []
for i in range(10074,len(df_usernames)):
    username = df_usernames['0'].values[i]
    if i % 2000 == 0: print(i)
    time.sleep(.5)
    url = f'https://lichess.org/api/user/{username}/rating-history'
    response = requests.get(url)
    if response.status_code != 200:
        print(username)
        print(response.status_code)
        continue
    else:
        response_json = response.json()
    for x in response_json:
        time_control = x['name']
        if time_control not in ['Bullet','Blitz','Rapid','Classical']: continue
        data = pd.DataFrame(x['points'])
        data['time_control'] = time_control
        data['username'] = username
        rating_histories.append(data)
df = pd.concat(rating_histories,axis=0)
df.columns = ['year','month','day','rating','time_control','username']
df['month'] = df['month']+1
df = df.astype(int,errors='ignore')
df['date'] = pd.to_datetime(df.year*10000+df.month*100+df.day,format='%Y%m%d')
ids = list(range(df['username'].nunique()))
id_assignments = {k:v+10075 for k,v in zip(df['username'].unique(), ids)}
df['user_id'] = df['username'].map(id_assignments)
df = df[['user_id','time_control','date','rating']].sort_values(['user_id','time_control','date'])
df = df[df['date']>=datetime(2019,1,1)]
#df.to_csv("data/lichess_swiss_rating_histories_2.csv",index=False)
df.head()

In [None]:
df.sample(10)

In [None]:
df.shape

## Features
- Current rating (likely nonlinear relationship)
- Rating growth in last 30 days / 90 days / 180 days
- Rating volatility measures
- Peak historical rating relative to current rating
- Rating in other time controls + puzzles
- Rating growth in other time controls + puzzles
- Difference between other time control ratings + target time control rating
- How long you've been on lichess
- How many games you've played (ever, and within last 30 days)

## Outcomes
- Will you ever achieve a rating that's X rating points higher than your current rating in the next Y months (X is calculated from target rating submitted by user, Y = 24?)
- If so, when will you first reach the target rating? (point estimate + prediction interval of dates) - use number of days as outcome, then transform to date for the bot message

## Notes:
- Might want to train multiple models for various values of X between 0 and 500 (with multiples of 10 to speed up, maybe)
- Might want to exclude certain accounts (if they stop playing on lichess or in that time control, if they are very new, if something else is weird)
- Use cross-validation since sample size might be constrained