# Explorative Data Analysis


### Import necessary packages

In [88]:
import json
from typing import List, Dict, Union, Tuple
from collections import Counter
import numpy as np

### Exploring the data

In [89]:
match_1_file = open("../data/match_1.json")
match_2_file = open("../data/match_2.json")


match_1_data = json.load(fp=match_1_file)
match_2_data = json.load(fp=match_2_file)

In [103]:
print(f"Number of gaits in match 1: {len(match_1_data)}")
print(f"Number of gaits in match 2: {len(match_2_data)}")

Number of gaits in match 1: 574
Number of gaits in match 2: 613


## Label

In [104]:
def count_actions(
    match_data: List[Dict[str, Union[str, float, List[float]]]]
) -> Dict[str, int]:
    """Returns the different actions performed in a match with their number of occurences.

    Args:
        match_data : JSON file containing match information.
    """
    return Counter(gait["label"] for gait in match_data)

In [105]:
count_actions(match_1_data)

Counter({'walk': 245,
         'run': 209,
         'dribble': 42,
         'rest': 25,
         'pass': 24,
         'tackle': 20,
         'shot': 6,
         'cross': 3})

In [106]:
count_actions(match_2_data)

Counter({'run': 343,
         'walk': 190,
         'dribble': 36,
         'shot': 12,
         'tackle': 11,
         'rest': 10,
         'pass': 8,
         'no action': 2,
         'cross': 1})

In [None]:
def extract_sequences(
    match_data: List[Dict[str, Union[str, List[float]]]]
) -> List[Tuple[str]]:
    """Returns the different sequences of actions performed by a player.

    Args:
        match_data : JSON file containing match information.
    """
    actions = [gait["label"] for gait in match_data]
    first_action = actions[0]
    allowed_repetitive_actions = ["run", "walk", "dribble"]
    sequences = []
    sequence = []
    for index, action in enumerate(actions[1:-1]):
        if action in allowed_repetitive_actions or action != actions[index - 1]:
            sequence.append(action)
        elif action == first_action and action != actions[index - 1]:
            sequences.append(tuple(sequence))
            sequence = []
    return sequences

## Norm

In [91]:
accelerometer_dimension = len(match_1_data[0]["norm"])
print(f"Norm dimension: {accelerometer_dimension}")

Norm dimension: 72


In [92]:
# Consistency check
all(len(gait["norm"]) == accelerometer_dimension for gait in match_1_data)

False

In [93]:
def find_min_max(
    match_data: List[Dict[str, Union[str, List[float]]]]
) -> Dict[str, int]:
    """Finds the minimum and maximum of the accelerometer number of dimensions.

    Args:
        match_data: Object containing a JSON document.
    """
    norm_lengths = [len(gait["norm"]) for gait in match_data]
    return {"MIN": min(norm_lengths), "MAX": max(norm_lengths)}

In [94]:
find_min_max(match_1_data)

{'MIN': 1, 'MAX': 227}

In [95]:
find_min_max(match_2_data)

{'MIN': 3, 'MAX': 723}

In [99]:
match_1_data_with_averaged_norm = [
    {"label": gait["label"], "norm": np.mean(gait["norm"])} for gait in match_1_data
]
with open("../data/match_1_data_with_averaged_norm.json", "w") as file:
    json.dump(match_1_data_with_averaged_norm, file)

In [100]:
match_2_data_with_averaged_norm = [
    {"label": gait["label"], "norm": np.mean(gait["norm"])} for gait in match_2_data
]
with open("../data/match_2_data_with_averaged_norm.json", "w") as file:
    json.dump(match_2_data_with_averaged_norm, file)

In [102]:
def mean_norm_for_each_action(
    match_data_averaged: List[Dict[str, Union[str, float]]], action: str
) -> float:
    """Returns the avearge norm of the actions.

    Args:
        match_data_averaged : JSON file containing match information with average norm.
    """

    return np.mean(
        [gait["norm"] for gait in match_data_averaged if gait["label"] == action]
    )

In [114]:
mean_norm_for_each_action_dict = {
    action: mean_norm_for_each_action(match_1_data_with_averaged_norm, action)
    for action in list(count_actions(match_1_data_with_averaged_norm).keys())
}

mean_norm_for_each_action_dict

{'walk': 28.09486687642306,
 'rest': 21.83356684290428,
 'run': 54.40459298904966,
 'tackle': 70.66858172485196,
 'dribble': 52.88252588755935,
 'pass': 64.13327808012248,
 'cross': 46.708457756829354,
 'shot': 113.27871688343295}

In [115]:
mean_norm_for_each_action_dict = {
    action: mean_norm_for_each_action(match_2_data_with_averaged_norm, action)
    for action in list(count_actions(match_2_data_with_averaged_norm).keys())
}

mean_norm_for_each_action_dict

{'no action': 38.65222535277646,
 'run': 56.22519161734227,
 'pass': 73.00031950031808,
 'rest': 22.712307380205683,
 'walk': 29.869326616095396,
 'dribble': 65.56369756735364,
 'shot': 98.46478534250633,
 'tackle': 66.86037467805848,
 'cross': 93.89217051873887}