In [None]:
from codebase import web_scrape_functions as wsf
from codebase import analysis_functions as af
from codebase import match_data
import pandas as pd
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
import utils
from utils import logger
import os
import logging
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import codebase.graphing_functions as gf
from codebase.settings import CAREERS

%load_ext autoreload
%autoreload 2
logger.handlers[1].setLevel(logging.INFO)
# logger.disabled = True
pd.get_option("display.max_columns")

In [None]:
PLAYER_ID = '253802'
ROOT_PLAYER_ID = 303669
WILLIAMSON_PLAYER_ID = 277906
SPD_SMITH_ID = 267192

In [None]:
test_match_list = wsf.player_match_list(PLAYER_ID, _format='test')
test_match_list

In [None]:
match_101 = match_data.MatchData(test_match_list[-1])

In [None]:
match_101.__dict__.keys()

In [None]:
match_101.legacy_scorecard_url

In [None]:
match_101.team_1_players

In [None]:
match_101.date

In [None]:
%%script false --no-raise-error
match_101_contr = af._get_player_contribution(PLAYER_ID, match_101, 'bat', True, True)
match_101_contr[-1]

We now have the ability to get the match list for a particular player. We now need to just get the comms from this match, and also get the individual player stats for the match. 

#### Comms for each match

In [None]:
# all_comms = []

# for m_id in test_match_list:
#     try:
#         logger.info(f'Grabbing data for matchID {m_id}')
#         _match = match_data.MatchData(m_id, serialize=False)
#         comms = af.pre_transform_comms(_match)
#         comm_w_labels = af.create_labels(comms, ['isWicket', 'isFour', 'isSix'], null_category='noEvent')
#         all_comms.append(comm_w_labels)
#     except utils.NoMatchCommentaryError:
#         continue

# try:
#     all_comms = pd.concat(all_comms, ignore_index=True)
#     print(all_comms.size)
#     print(all_comms.groupby('labels').size())
#     all_comms.to_csv('./commentary_labels.csv')
# except ValueError:
#     print('No commentary to show')

Lets get the player scores for every match, we will run the player contribution function for each match in the list.  

In [None]:
%%script false --no-raise-error
contributions = []
for match_id in test_match_list:
    match = match_data.MatchData(match_id)
    contribution = af._cricket_totals(PLAYER_ID, match, _type='bat', by_innings=True, is_object_id=True)
    for i,inning in enumerate(contribution['bat']):
        contributions.append({**inning, **{key:contribution[key] for key in contribution.keys() if key not in ['bat', 'bowl']}, **{'inning': i}})

contributions[-5:]

In [None]:
#innings = [inning for match in contributions for inning in match]
#innings[:5]

In [None]:
%%script false --no-raise-error
innings_df = pd.DataFrame(contributions)
innings_df.head()
innings_df.shape

Now we will try to graph the running average on this graph. 

In [None]:
%%script false --no-raise-error
running_average = []

total_runs = 0
out = 0

for i,innings in enumerate(zip(innings_df.runs, innings_df.not_out)):
    total_runs += innings[0]
    if innings[1] == False:
        out += 1
    try:
        running_average.append(round(total_runs/out,2))
    except ZeroDivisionError:
        running_average.append(None)

running_average

We now can map the average of the last X innings, this is the 'form guide'

In [None]:
%%script false --no-raise-error
last_x_average = []

window_runs = 0
window_out = 0

#moving window of length x

window_size = 12

for i,innings in enumerate(zip(innings_df.runs, innings_df.not_out)):
    if i>=window_size:
        window_runs -= innings_df.runs.iloc[i-window_size]
        if innings_df.not_out.iloc[i-window_size] == False:
            window_out -= 1
    
    window_runs += innings[0]
    if innings[1] == False:
        window_out += 1
    try:
        last_x_average.append(round(window_runs/window_out,2))
    except ZeroDivisionError:
        last_x_average.append(None)

last_x_average

In [None]:
%%script false --no-raise-error
y_range = [0, max(innings_df.runs) + 20]

fig, ax1 = plt.subplots(figsize=(18,10))
#sns.set_theme()
sns.lineplot(data = {'Average': running_average, f'Last {window_size} Innings': last_x_average}, sort = False, ax=ax1, palette='rocket')

ax1.set_ylim(y_range)

ax2 = ax1.twinx()

sns.barplot(data = innings_df, x=innings_df.index, y=innings_df.runs, alpha=0.5, ax=ax2, hue=innings_df.continent, palette='mako', dodge=False)
ax2.set_ylim(y_range)

In [None]:
%%script false --no-raise-error
SPD_SMITH_TEST_MATCHES = wsf.player_match_list(SPD_SMITH_ID)
spd_smith_contributions = af.get_cricket_totals(SPD_SMITH_ID, SPD_SMITH_TEST_MATCHES, 'bat', True, True)

In [None]:
%%script false --no-raise-error
# spd_smith_contributions[-5:]

In [None]:
%%script false --no-raise-error
spd_innings = pd.DataFrame(spd_smith_contributions)
spd_innings.tail()

In [None]:
%%script false --no-raise-error
spd_ave = af.get_running_average(SPD_SMITH_ID)
spd_recent_form = af.get_recent_form_average(SPD_SMITH_ID, window_size=12)

In [None]:
%%script false --no-raise-error
y_range = [0, max(spd_innings.runs) + 20]

fig, ax1 = plt.subplots(figsize=(18,10))
#sns.set_theme()
sns.lineplot(data = {'Average': spd_ave, f'Last {window_size} Innings': spd_recent_form}, sort = False, ax=ax1, palette='rocket')

ax1.set_ylim(y_range)

ax2 = ax1.twinx()

sns.barplot(data = innings_df, x=spd_innings.index, y=spd_innings.runs, alpha=0.5, ax=ax2, hue=spd_innings.continent, palette='mako', dodge=False)
ax2.set_ylim(y_range)

In [None]:
%%script false --no-raise-error
contributions = af.get_player_contributions(PLAYER_ID, test_match_list, 'bat', False, True)

In [None]:
%%script false --no-raise-error
contributions[-5:]

In [None]:
%%script false --no-raise-error
innings = af.get_cricket_totals(PLAYER_ID, test_match_list, 'bat', True, True)
innings[-5:]

In [None]:
#Williamson
#af.get_career_batting_graph(277906)

In [None]:
#Root
#af.get_career_batting_graph(303669)

In [None]:
#Smith
# af.get_career_batting_graph(SPD_SMITH_ID)

In [None]:
#Kohli
# af.get_career_batting_graph(PLAYER_ID, dates='2017-01-01:')

In [None]:
#Cook
gf.get_career_batting_graph(11728, player_age='27:33')

In [None]:
%%script false --no-raise-error

sns.set_theme()
innings_df.head()

In [None]:
%%script false --no-raise-error
fig_dims = (15,10)
fig,ax = plt.subplots(figsize=fig_dims);
bar = sns.barplot(innings_df.index, innings_df.runs, palette='mako');
x_dates = innings_df.date.dt.strftime('%d-%m-%Y')
bar.set_xticklabels(labels=x_dates, rotation=90);

_ = plt.setp(ax.patches, linewidth=0)
ax.xaxis.set_major_locator(plt.MaxNLocator(5))

In [None]:
top_players = wsf.read_statsguru('https://stats.espncricinfo.com/ci/engine/stats/index.html?class=1;filter=advanced;orderby=batting_average;qualmin1=30;qualval1=matches;template=results;type=batting', table_name='Overall figures')


In [None]:
top_players = pd.DataFrame(top_players[0])

In [None]:
top_players.head()

In [None]:
top_players.Player = [wsf.player_id_from_link(player, playername=False)[1] for player in top_players.Player]

In [None]:
top_players[top_players.Player == '267192']

In [None]:
%%script false --no-raise-error
match_list = []
for player in top_players.Player:
    match_list += wsf.player_match_list(player)

In [None]:
# set([int(id) for id in match_list])

In [None]:
# len(match_list)

In [None]:
%%script false --no-raise-error
recent_form = {}
for player in [PLAYER_ID, SPD_SMITH_ID, ROOT_PLAYER_ID, WILLIAMSON_PLAYER_ID]:
    player_match_list = wsf.player_match_list(player)
    player_innings_df = af.get_cricket_totals(player, player_match_list, _type='bat', by_innings=True, is_object_id=True)
    player_innings_df = pd.DataFrame(player_innings_df)
    recent_form[player] = af.calculate_recent_form_average(player_innings_df)

In [None]:
%%script false --no-raise-error
top_players.Player

In [None]:
all_recent_form = utils.load_data('all_recent_form','dict',file_ext='p')

In [None]:
all_running_ave  = utils.load_data('all_running_ave','dict',file_ext='p')

In [None]:
all_running_ave.keys()

In [None]:
problem_players = []
for player in all_recent_form:
    try:
        [int(x) for x in all_recent_form[player]]
    except (ValueError, TypeError):
        problem_players.append(player)

problem_players
        

In [None]:
full_df = af.normalized_career_length(all_recent_form)

In [None]:
full_ave_df = af.normalized_career_length(all_running_ave)

In [None]:
full_ave_df.shape

In [None]:
combined_df = pd.concat([full_df[['35320', '7133', '50710','52337']], full_ave_df[['35320', '7133', '50710', '52337']]], axis=1, join='inner')
combined_df.columns = ['35320', '7133', '50710','52337', '35320-ra', '7133-ra', '50710-ra', '52337-ra']

In [None]:
full_df[['35320', '7133', '50710', '52337']]

In [None]:
i = 2
fig, ax1 = plt.subplots(figsize=(18,10))
sns.set_theme()
sns.lineplot(data = combined_df.iloc[:, [i, i+4]], sort = False, ax=ax1, palette='mako', lw=2.5)

#ax1.set_ylim([0,250])

In [None]:
fig, ax1 = plt.subplots(figsize=(18,10))
sns.set_theme()
sns.lineplot(data = [all_recent_form['35320'], all_recent_form['7133'],all_recent_form['50710'], all_recent_form['253802']], sort = False, ax=ax1, palette='mako', lw=2.5)

In [None]:
combined_df.shape

In [None]:
fig, ax1 = plt.subplots(nrows=combined_df.shape[1]//2, figsize=(18, combined_df.shape[1]*5))
sns.set_theme()
for i in range(combined_df.shape[1]//2):
    print(i)
    sns.lineplot(data = combined_df.iloc[:, [i, i+4]], sort = False, ax=ax1[i], palette='rocket', lw=2)

In [None]:
cols = full_df.columns
full_ave_df = full_ave_df.reindex(columns=cols)
cols = {col:f'{col}_rf' for col in cols}
full_df.rename(columns=cols, inplace=True)
full_df.head()


In [None]:
full_ave_df.head()

In [None]:
full_combined_df = pd.concat([full_df, full_ave_df], axis=1, join='inner')

In [None]:
top_players.head()

In [None]:
k = full_combined_df.shape[1]//2
fig, ax1 = plt.subplots(nrows=k, figsize=(18, k*5))
sns.set_theme()
for i in range(k):
    sns.lineplot(data = full_combined_df.iloc[:, [i, i+k]], sort = False, ax=ax1[i], palette='rocket', lw=2)
    name = wsf.get_player_json(full_combined_df.columns[i+k])["name"]
    ax1[i].set_title(f'{name} Career Summary')


In [None]:
all_running_ave_mod = {f'{key}_rf':all_running_ave[key] for key in sorted(all_running_ave)}
combined_averages = {**{k:all_recent_form[k] for k in sorted(all_recent_form)}, **all_running_ave_mod}

In [None]:
k = len(combined_averages)//2
fig, ax1 = plt.subplots(nrows=k, figsize=(18, k*5))
sns.set_theme()
for i in range(k):
    first_column = list(combined_averages.keys())[i]
    second_column = list(combined_averages.keys())[i+k]
    sns.lineplot(data = {'recent form':combined_averages[first_column], 'career ave':combined_averages[second_column]}, sort = False, ax=ax1[i], palette='rocket', lw=2)
    name = wsf.get_player_json(first_column)["name"]
    ax1[i].set_title(f'{name} Career Summary')
    ax1[i].set_xlabel('Innings')
    ax1[i].set_ylabel('Runs')

In [None]:
all_recent_form_30 = utils.load_data('all_recent_form_30','dict',file_ext='p')
all_running_ave_30 = utils.load_data('all_running_ave_30','dict',file_ext='p')

In [None]:
all_innings = {}
for player in top_players.Player:
    inning = utils.load_data(player, 'career_innings_stats', data_folder=CAREERS)
    innings_df = pd.read_json(inning)
    all_innings[player] = innings_df

In [None]:
all_innings

In [None]:
test_players = ['14334', '10777', '11886']
test_recent_form = {k:all_recent_form[k] for k in test_players}
test_running_ave = {k:all_running_ave[k] for k in test_players}
test_innings = {k:all_innings[k] for k in test_players}

In [None]:
gf.graph_career_batting_summary(test_recent_form, test_running_ave, test_innings)

In [None]:
from datetime import datetime
all_innings_30 = {}
for player in top_players.Player:
    inning = utils.load_data(player, 'career_innings_stats', data_folder=CAREERS)
    innings_df = pd.read_json(inning)
    innings_df['date'] = pd.to_datetime(innings_df['date'], unit='ms')
    #print(innings_df.head())
    dates = af.dates_from_age(player, '30:')
    #print(dates)
    cuttof = datetime.strptime(dates.split(':')[0], '%Y-%m-%d') 
    #print(cuttof)
    all_innings_30[player] = innings_df[innings_df.date >= cuttof]

In [None]:
all_innings_30

In [None]:
gf.graph_career_batting_summary(all_recent_form_30, all_running_ave_30, all_innings_30)

In [None]:
match_1 = test_match_list[0]
match_1_innings = af.get_player_contributions(PLAYER_ID, test_match_list[0],'bat', True, True)
match_1_innings