In [1]:
import pickle

import altair as alt
import pandas as pd

from utils.data import notes, resultats

In [2]:
pd.options.display.max_colwidth = 200
DEFAULT_WIDTH = 800

_ = alt.data_transformers.disable_max_rows()

In [3]:
with open("canebiere.pickle", "rb") as src:
    df: pd.DataFrame = pickle.load(src)


In [4]:
def date_to_season(date: pd.Timestamp) -> str:
    # La saison N/N+1 dure du 1er Juillet N au 30 Juin N+1
    month = date.month
    year = date.year

    if month <=6:
        season = f'{year-1}-{year}'
    else:
        season = f'{year}-{year+1}'

    return season
    
df['Saison'] = df['Date'].apply(date_to_season)

In [5]:
blaah = df[df['Auteur'] == 'Blaah'].reset_index()

matchs = resultats(blaah)
notation = notes(blaah)

In [6]:
matchs['Saison'] = matchs['Date'].apply(date_to_season)
notation['Saison'] = notation['Date'].apply(date_to_season)

In [7]:
matchs_saison = matchs[matchs['Saison'] == '2021-2022']
notation_saison = notation[notation['Saison'] == '2021-2022']

# Saison 2021-2022

# Des notes

In [8]:
overall = notation_saison.groupby(['Note_num', 'Note_txt']).agg({"Joueur": "count", "sort": "max"}).reset_index()

alt.Chart(overall).mark_circle().encode(
    x=alt.X(
        'Note_num:Q',
        title='Note',
        axis=None
    ),
    y=alt.datum("Toutes les notes"),
    color=alt.Color(
        'Note_num:Q', 
        legend=None, 
        scale=alt.Scale(domain=[0.0,5.0], scheme="redyellowgreen")
    ),
    size=alt.Size(
        "Joueur:Q",
        legend=None,
        scale=alt.Scale(domain=[0.0, overall["Joueur"].max()], range=[0.0, 4000.0])
    ),
    tooltip=[alt.Tooltip("Note_txt:N", title="Note"), alt.Tooltip("Joueur:N", title="# Attributions")]
).properties(
    title='2021-2022',
    width=800
).configure_axis(
    grid=False
).configure_view(
    strokeWidth=0
).configure_axisY(
    labelPadding=20, 
    labelFontSize=18,
    grid=False,
)


# Des matchs, des notes

In [38]:
by_match_by_note = notation_saison.groupby(["Saison", "Match", "Match_Teams", "Note_num", "Note_txt"]).agg({"Joueur": "count", "sort": "max"}).reset_index()
fuse = notation_saison.set_index('Date').join(matchs_saison.set_index('Date'), how='inner', lsuffix='_notes', rsuffix='_matchs')

In [71]:
base_circles = alt.Chart(by_match_by_note).mark_circle().encode(
    x=alt.X(
        'Note_num:Q',
        title='Note',
        axis=alt.Axis(grid=False, ticks=False, labels=False)
    ),
    y=alt.Y(
        'Match_Teams:N',
        title=None,
        axis=alt.Axis(ticks=False, grid=False, labelPadding=180, labelFontSize=14, labelAlign='left'),
        sort=alt.EncodingSortField("sort", op="max", order="ascending")
    ),
    color=alt.Color(
        'Note_num:Q', 
        legend=None, 
        scale=alt.Scale(domain=[0.0,5.0], scheme="redyellowgreen")
    ),
    size=alt.Size(
        "Joueur:Q",
        legend=None,
        scale=alt.Scale(domain=[0.0, by_match_by_note["Joueur"].max()], range=[0.0, 4000.0])
    ),
    tooltip=[alt.Tooltip("Match_Teams:N", title="Match"), alt.Tooltip("Note_txt:N", title="Note"), alt.Tooltip("Joueur:N", title="# Joueurs")]
).properties(
    title='2021-2022',
    width=800
)

dots = alt.Chart(fuse).mark_circle(size=200).encode(
    y=alt.Y(
        'Match_Teams:N',
        axis=alt.Axis(ticks=False, labels=False, title=""), 
        sort=alt.EncodingSortField("sort", op="max", order="ascending")
    ),
    color=alt.Color('Résultat', legend=None, scale=alt.Scale(domain=['Victoire', 'Nul', 'Défaite'], range=['#54a24b', '#d8b5a5', '#e45756'])),
    tooltip=[alt.Tooltip('Match_Teams:N', title="Match"), 'Résultat:N']
)

line = alt.Chart(fuse).mark_line().encode(
    y=alt.Y('Match_Teams:N', sort=alt.EncodingSortField("sort", op="max", order="ascending")),
)



In [72]:
(line + dots | base_circles).configure_view(strokeWidth=0)

# Des joueurs, des notes

In [77]:
step = 30
overlap = 0.5

alt.Chart(fuse[['Joueur', 'Note_num']], height=step, width=800).transform_joinaggregate(
    mean_note='mean(Note_num)', groupby=['Joueur']
).transform_bin(
    ['bin_max', 'bin_min'], 'Note_num'
).transform_aggregate(
    value='count()', groupby=['Joueur', 'mean_note', 'bin_min', 'bin_max']
).transform_impute(
    impute='value', groupby=['Joueur', 'mean_note'], key='bin_min', value=0
).mark_area(
    interpolate='monotone',
    fillOpacity=0.8,
    stroke='lightgray',
    strokeWidth=0.5
).encode(
    x=alt.X('bin_min:Q', bin='binned', title='Note / 5'),
    y=alt.Y(
        'value:Q',
        scale=alt.Scale(range=[step, -step * overlap]),
        axis=None
    ),
    fill=alt.Fill(
        'mean_note:Q',
        legend=alt.Legend(title='Note Moyenne'),
        scale=alt.Scale(domain=[1, 3], scheme='redyellowgreen')
    ),
    tooltip=['Joueur:N', alt.Tooltip('mean_note:Q', title='Moyenne', format='.1f')]
).facet(
    row=alt.Row(
        'Joueur:N',
        title=None,
        header=alt.Header(labelAngle=0, labelAlign='left', labelFontSize=14, labelBaseline='top'),
    )
).properties(
    title='OM 2021-2022',
    bounds='flush',
).configure_facet(
    spacing=0,
).configure_view(
    stroke=None
).configure_title(
    anchor='end'
)

In [61]:
# from sklearn.ensemble import HistGradientBoostingClassifier
# from sklearn.metrics import classification_report
# from sklearn.metrics import ConfusionMatrixDisplay
# from sklearn.inspection import permutation_importance

# pivot = fuse[['Joueur', 'Note_num']].pivot(columns='Joueur', values='Note_num')
# pivot['Resultat'] = matchs_saison.set_index('Date')['Résultat']

# X = pivot[[x for x in pivot.columns if x != 'Resultat']]
# y = pivot['Resultat']

# clf: HistGradientBoostingClassifier = HistGradientBoostingClassifier()
# _ = clf.fit(X, y)

# print(classification_report(y_true=y, y_pred=clf.predict(X)))

# _ = ConfusionMatrixDisplay.from_estimator(clf, X, y, normalize='pred', values_format='.2f', cmap='Blues')


# result = permutation_importance(
#     clf, X, y, n_repeats=10, random_state=42, n_jobs=2
# )

# sorted_importances_idx = result.importances_mean.argsort()[::-1]
# importances = pd.DataFrame(
#     result.importances[sorted_importances_idx].T,
#     columns=X.columns[sorted_importances_idx],
# )

# _ = alt.Chart(importances.melt()).mark_boxplot().encode(
#     x='value',
#     y=alt.X('Joueur', sort=alt.EncodingSortField('mean(value)'), axis=alt.Axis(labelFontSize=14)),
#     color=alt.Color('mean(value)', scale=alt.Scale(domain=[0, 0.16], scheme='redyellowgreen'))
# )