In [None]:
import pandas as pd
from mplsoccer import Pitch
from statsbomb2023.common.databases import Database, connect
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
import numpy as np
import seaborn as sns
from distinctipy import distinctipy
from bokeh.plotting import ColumnDataSource, figure, output_notebook, show, save, output_file, reset_output
from bokeh import __version__
import base64
from tqdm.auto import tqdm
from PIL import Image
import io
import matplotlib.pyplot as plt


In [None]:
def reduce_dimensions(embeddings, method="tsne", n_components=2, **kwargs):
    """Project embeddings in a lower dimensional space.

    Parameters
    ----------
    embeddings : numpy.ndarray
        The embeddings to reduce.
    method : 'pca' | 'tsne' | 'umap'
        The method to use for dimensionality reduction. Defaults to 'umap'.
    n_components : int
        The number of components to keep. Defaults to 2.
    **kwargs
        Additional arguments to pass to the dimensionality reduction method.

    Returns
    -------
    numpy.ndarray
        The reduced embeddings.
    """
    if method == "pca":
        reducer = PCA(n_components=n_components, **kwargs)
    elif method == "tsne":
        reducer = TSNE(n_components=n_components, **kwargs)
    else:
        raise ValueError(f"Unknown method: {method}")

    principals = reducer.fit_transform(embeddings)
    principals -= np.mean(principals, axis=0)
    return principals

In [None]:
def _tensor2images(tensor, size=64):
    """Convert tensor images back to in memory images encoded in base 64.

    Parameters
    ----------
    tensor: array-like
        4D tensor that represent an image list.
    size: int
        Image size to output in pixels. Defaults to 64.

    Returns
    -------
    list[str]
        list of images encoded as base64 strings
    """

    # casting as iterating over a Tensor is slow.
    data = np.array(tensor)

    # if image provided are scaled between [0,1] then rescale
    if np.max(data) <= 1:
        data = data * 255

    # cast as int so PIL accepts its
    data = data.astype(np.uint8)

    imgs_b64 = []
    for a in tqdm(data, desc="generating displayable images"):
        # if single channel, treat it as black and white
        if a.shape[-1] == 1:
            a = np.reshape(a, (a.shape[0], a.shape[1]))
            img = Image.fromarray(a, "L")
        else:
            img = Image.fromarray(a)

        img_resized = img.resize((size, size))
        buffer = io.BytesIO()
        img_resized.save(buffer, format="JPEG")
        img_bytes = buffer.getvalue()
        img64 = "data:image/png;base64,%s" % str(base64.b64encode(img_bytes))[2:-1]
        imgs_b64.append(img64)

    return imgs_b64

In [None]:

def projector(
    embeddings,
    labels=None,
    method="tsne",
    class_mapping=None,
    images=None,
    image_size=256,
    tooltips_info=None,
    pt_size=3,
    colorize=True,
    pastel_factor=0.1,
    plot_size=600,
    active_drag="box_zoom",
    **kwargs,
):
    """Visualize the embeddings using a 2D projection.

    Parameters
    ----------
    embeddings: array-like
        The embeddings outputed by the model that are to be visualized.
    labels: array-like
        Labels associated with the embeddings. If not supplied treat each example as its own classes.
    method: str
        Method to use for dimensionality reduction. Defaults to 'umap'.
    class_mapping: list or dict
        Dictionary or list that maps the class numerical ids to their name.
    images: array-like
        Images to display in tooltip on hover.
    image_size: int
        Size of the images displayed in the tool tip. Defaults to 64.
    pt_size: int
        Size of the points displayed on the visualization. Defaults to 3.
    tooltips_info: dict
        Dictionary of information to display in the tooltips.
    colorize: bool
        Colorize the clusters. Defaults to true.
    pastel_factor: float
        Modify the color palette to be more pastel.
    **kwargs
        Additional arguments to pass to the dimensionality reduction method.
    """

    #cords = reduce_dimensions(embeddings, method=method, n_components=2, **kwargs)
    cords = embeddings
    
    # sample id
    _idxs = list(range(len(embeddings)))

    # labels?
    if labels is not None:
        # if labels are already names just use them.
        if isinstance(labels[0], str):
            _labels = labels
        else:
            _labels = [int(i) for i in labels]
    else:
        # treat each examples as its own class
        _labels = _idxs

    # class name mapping?
    if class_mapping:
        _labels_txt = [class_mapping[i] for i in _labels]
    else:
        _labels_txt = [str(i) for i in _labels]

    class_list = sorted(set(_labels_txt))
    num_classes = len(class_list)

    # generate data
    data = dict(
        id=_idxs,
        x=[i[0] for i in cords],
        y=[i[1] for i in cords],
        labels=_labels,
        labels_txt=_labels_txt,
    )

    # colors if needed
    if labels is not None and colorize:
        # generate colors
        colors = {}
        for idx, c in enumerate(
            distinctipy.get_colors(num_classes, pastel_factor=pastel_factor)
        ):
            # this is needed as labels can be strings or int or else
            cls_id = class_list[idx]
            colors[cls_id] = distinctipy.get_hex(c)

        # map point to their color
        _colors = [colors[i] for i in _labels_txt]
        data["colors"] = _colors
    else:
        _colors = []

    # building custom tooltips
    tooltips = '<div style="border:1px solid #ABABAB">'

    if images is not None:
        #imgs = _tensor2images(images, image_size)
        #data["imgs"] = imgs
        data["imgs"] = images
        # have to write custom tooltip html.
        tooltips += '<center><img src="@imgs"/></center>'  # noqa

    # adding user info
    if tooltips_info:
        for k, v in tooltips_info.items():
            data[k] = v
            tooltips += "%s:@%s <br>" % (k, k)


    tooltips += "Class:@labels_txt <br>ID:@id </div>"
    

    # to bokeh format
    source = ColumnDataSource(data=data)
    #output_file(filename="custom_filename2.html", title="Static HTML file")
    output_notebook()
    # Bokeh backward compatibility
    if int(__version__.split(".")[0]) >= 3:
        fig = figure(
            tooltips=tooltips,
            width=plot_size,
            height=plot_size,
            active_drag=active_drag,
            active_scroll="wheel_zoom",
        )
    else:
        fig = figure(
            tooltips=tooltips,
            plot_width=plot_size,
            plot_height=plot_size,
            active_drag=active_drag,
            active_scroll="wheel_zoom",
        )

    # remove grid and axis
    fig.xaxis.visible = False
    fig.yaxis.visible = False
    fig.xgrid.visible = False
    fig.ygrid.visible = False

    # draw points
    if len(_colors):
        #fig.circle("x", "y", size=pt_size, color="colors", legend_group="labels_txt", source=source)
        fig.circle("x", "y", size=pt_size, color="colors", source=source)
    else:
        fig.circle("x", "y", size=pt_size, source=source)
    # render
    reset_output()
    #save(fig)
    output_notebook()

    show(fig, notebook_handle=True)

In [None]:

def plot_action(
    action: pd.Series,
    surface=None,
    show_action=True,
    show_visible_area=True,
    ax=None,
    surface_kwargs={},
) -> None:
    """Plot a SPADL action with 360 freeze frame.

    Parameters
    ----------
    action : pandas.Series
        A row from the actions DataFrame.
    surface : np.arry, optional
        A surface to visualize on top of the pitch.
    ax : matplotlib.axes.Axes, optional
        The axes to plot on.
    surface_kwargs : dict, optional
        Keyword arguments to pass to the surface plotting function.
    """
    # parse freeze frame
    freeze_frame = pd.DataFrame.from_records(action["freeze_frame_360"])
    visible_area = action["visible_area_360"]
    teammate_locs = freeze_frame[freeze_frame.teammate]
    opponent_locs = freeze_frame[~freeze_frame.teammate]
    # set up pitch
    p = Pitch(pitch_type="custom", pitch_length=105, pitch_width=68)
    """
    if ax is None:
        _, ax = p.draw(figsize=(12, 8))
    else:
        p.draw(ax=ax)

    # plot action
    if show_action:
        p.arrows(
            action["start_x"],
            action["start_y"],
            action["end_x"],
            action["end_y"],
            color="black",
            headwidth=5,
            headlength=5,
            width=1,
            ax=ax,
        )
    # plot visible area
    if show_visible_area:
        p.polygon([visible_area], color=(236 / 256, 236 / 256, 236 / 256, 0.5), ax=ax)
    # plot freeze frame
    p.scatter(teammate_locs.x, teammate_locs.y, c="#6CABDD", s=80, ec="k", ax=ax)
    p.scatter(opponent_locs.x, opponent_locs.y, c="#C8102E", s=80, ec="k", ax=ax)
    p.scatter(action["start_x"], action["start_y"], c="w", s=40, ec="k", ax=ax)

    # plot surface
    if surface is not None:
        ax.imshow(surface, extent=[0.0, 105.0, 0.0, 68.0], origin="lower", **surface_kwargs)

    return ax
    """
    p = Pitch(pitch_type="custom", pitch_length=105, pitch_width=68)

    fig, ax = p.draw(figsize=(3, 2))
    plt.close()

    # plot action
    if show_action:
        p.arrows(
            action["start_x"],
            action["start_y"],
            action["end_x"],
            action["end_y"],
            color="black",
            headwidth=5,
            headlength=5,
            width=1,
            ax=ax,
        )
    # plot visible area
    if show_visible_area:
        p.polygon([visible_area], color=(236 / 256, 236 / 256, 236 / 256, 0.5), ax=ax)
    # plot freeze frame
    p.scatter(teammate_locs.x, teammate_locs.y, c="#6CABDD", s=20, ec="k", ax=ax)
    p.scatter(opponent_locs.x, opponent_locs.y, c="#C8102E", s=20, ec="k", ax=ax)
    p.scatter(action["start_x"], action["start_y"], c="w", s=10, ec="k", ax=ax)
    #fig.canvas.print_to_buffer()
    #data = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)
    #data = data.reshape(fig.canvas.get_width_height()[::-1] + (3,))
    #return data

    #s, (width, height) = fig.canvas.print_to_buffer()
    #img64 = "data:image/png;base64,%s" % s
    #return img64
    buffer = io.BytesIO()
    fig.savefig(buffer, format='jpeg')
    img_bytes = buffer.getvalue()
    img64 = "data:image/png;base64,%s" % str(base64.b64encode(img_bytes))[2:-1]
    return img64
    




In [None]:
team_name = {23: 'Watford', 
    101: 'Leeds United', 
    35: 'Brighton & Hove Albion', 
    56: 'Norwich City', 
    31: 'Crystal Palace', 
    33: 'Chelsea', 
    37: 'Newcastle United', 
    40: 'West Ham United', 
    1: 'Arsenal', 
    38: 'Tottenham Hotspur', 
    25: 'Southampton', 
    34: 'Burnley', 
    22: 'Leicester City', 
    36: 'Manchester City', 
    24: 'Liverpool', 
    93: 'Brentford', 
    46: 'Wolverhampton Wanderers', 
    29: 'Everton', 
    39: 'Manchester United', 
    59: 'Aston Villa', 
    43: 'Nottingham Forest', 
    28: 'AFC Bournemouth', 
    55: 'Fulham'}

In [None]:
db = connect("sqlite:///cw/dtaijupiter/NoCsBack/dtai/deniz/statsbomb2023/stores/database_23.sql", mode="r")
df_actions = db.actions(3837533)


In [None]:
print(df_actions.keys())

In [None]:
df_img = pd.read_parquet("/cw/dtaijupiter/NoCsBack/dtai/deniz/statsbomb2023/notebooks/img.parquet")
df_vectors = pd.read_parquet("/cw/dtaijupiter/NoCsBack/dtai/deniz/statsbomb2023/stores/datasets/enc/predict/vectors.parquet")
df_goalscore = pd.read_parquet("/cw/dtaijupiter/NoCsBack/dtai/deniz/statsbomb2023/stores/datasets/enc/predict/x_goalscore.parquet")
df_homeaway = pd.read_parquet("/cw/dtaijupiter/NoCsBack/dtai/deniz/statsbomb2023/stores/datasets/enc/predict/x_home_away.parquet")
df_passteamid = pd.read_parquet("/cw/dtaijupiter/NoCsBack/dtai/deniz/statsbomb2023/stores/datasets/enc/predict/x_pass_team_id.parquet")
df_remainmin = pd.read_parquet("/cw/dtaijupiter/NoCsBack/dtai/deniz/statsbomb2023/stores/datasets/enc/predict/x_remain_min.parquet")
df_play_name = pd.read_parquet("/cw/dtaijupiter/NoCsBack/dtai/deniz/statsbomb2023/stores/datasets/enc/predict/x_play_pattern_name.parquet")
df_results = pd.concat([df_vectors, df_goalscore, df_homeaway, df_passteamid, df_remainmin, df_play_name, df_img],axis=1)

In [None]:
print(df_results.keys())
print(len(df_results))

In [None]:
print(df_results["goalscore_diff"].head())


## Teams

In [None]:
vec_list = []
team_list = []
img_list = []
type_list = []
counter = 0
for index, row in df_results.iterrows():
    vec_list.append(row["vector"])
    team_list.append(team_name[row["possession_team_id_a0"]])
    img_list.append(row["img"])
    type_list.append(row["play_pattern_name_a0"])
    


In [None]:
reducer = TSNE(n_components=2)
principals = reducer.fit_transform(np.array(vec_list))
principals -= np.mean(principals, axis=0)
projector(principals,team_list,images=img_list,tooltips_info={"PassType" : type_list})

## Home-Away joint cluster

In [None]:
vec_list = []
team_list = []
img_list = []
type_list = []
ha_list = []
counter = 0


for index, row in df_results.iterrows():
    if row["possession_team_id_a0"] == 36:
        vec_list.append(row["vector"])
        img_list.append(row["img"])
        ha_list.append(row["is_home_a0"])
        type_list.append(row["play_pattern_name_a0"])

    


In [None]:
reducer = TSNE(n_components=2)
principals = reducer.fit_transform(np.array(vec_list))
principals -= np.mean(principals, axis=0)
projector(principals,ha_list,images=img_list,tooltips_info={"PassType" : type_list})

## Difference of 0-0 and 0-1 in the second half

In [None]:
df_results["is_end"] = df_results["time_seconds_overall_a0"] > 2700

vec_list = []
team_list = []
img_list = []
type_list = []

vec_list_2 = []
team_list_2 = []
img_list_2 = []
type_list_2 = []

for n, g in df_results.groupby(["goalscore_diff","is_end"]):
    if n[0] == 0 and n[1]:
        for index, row in g.iterrows():
            vec_list.append(row["vector"])
            team_list.append(team_name[row["possession_team_id_a0"]])
            img_list.append(row["img"])
            type_list.append(row["play_pattern_name_a0"])
    elif n[0] == 1 and n[1]:
        for index, row in g.iterrows():
            vec_list_2.append(row["vector"])
            team_list_2.append(team_name[row["possession_team_id_a0"]])
            img_list_2.append(row["img"])
            type_list_2.append(row["play_pattern_name_a0"])

In [None]:
reducer = TSNE(n_components=2)
principals = reducer.fit_transform(np.array(vec_list + vec_list_2))
principals -= np.mean(principals, axis=0)
projector(principals[:len(vec_list)],team_list,images=img_list,tooltips_info={"PassType" : type_list})
projector(principals[len(vec_list):],team_list_2,images=img_list_2,tooltips_info={"PassType" : type_list_2})

## Team's behaviour when they are away and home and loosing in the last 10 min

In [None]:
df_results["is_end"] = df_results["time_seconds_overall_a0"] > 4800

vec_list = []
team_list = []
img_list = []
type_list = []

vec_list_2 = []
team_list_2 = []
img_list_2 = []
type_list_2 = []

for n, g in df_results.groupby(["goalscore_diff","is_end"]):
    if n[0] < 0 and n[1]:
        for index, row in g.iterrows():
            vec_list.append(row["vector"])
            team_list.append(team_name[row["possession_team_id_a0"]])
            img_list.append(row["img"])
            type_list.append(row["play_pattern_name_a0"])
    elif n[0] > 0 and n[1]:
        for index, row in g.iterrows():
            vec_list_2.append(row["vector"])
            team_list_2.append(team_name[row["possession_team_id_a0"]])
            img_list_2.append(row["img"])
            type_list_2.append(row["play_pattern_name_a0"])

In [None]:
reducer = TSNE(n_components=2)
principals = reducer.fit_transform(np.array(vec_list + vec_list_2))
principals -= np.mean(principals, axis=0)
projector(principals[:len(vec_list)],team_list,images=img_list,tooltips_info={"PassType" : type_list})
projector(principals[len(vec_list):],team_list_2,images=img_list_2,tooltips_info={"PassType" : type_list_2})