# Explorative Data Analysis


### Import necessary packages

In [2]:
import json
from typing import List, Dict, Union
import numpy as np
from match import Match

### Exploring the data

In [3]:
match_1_instance = Match("../data/match_1.json")
match_2_instance = Match("../data/match_2.json")

In [4]:
match_1_instance.info()
match_2_instance.info()

Number of gaits in this match is: 574
Number of gaits in this match is: 613


## Label

In [5]:
match_1_instance.count_actions()

Counter({'walk': 245,
         'run': 209,
         'dribble': 42,
         'rest': 25,
         'pass': 24,
         'tackle': 20,
         'shot': 6,
         'cross': 3})

In [6]:
match_2_instance.count_actions()

Counter({'run': 343,
         'walk': 190,
         'dribble': 36,
         'shot': 12,
         'tackle': 11,
         'rest': 10,
         'pass': 8,
         'no action': 2,
         'cross': 1})

In [7]:
match_1_instance.extract_sequences()

[['walk', 'walk', 'walk', 'walk', 'rest'],
 ['walk',
  'walk',
  'walk',
  'run',
  'run',
  'run',
  'run',
  'run',
  'run',
  'run',
  'run',
  'run',
  'run',
  'run',
  'run',
  'run',
  'run',
  'run'],
 ['walk',
  'walk',
  'walk',
  'walk',
  'tackle',
  'run',
  'run',
  'run',
  'run',
  'run',
  'run',
  'run'],
 ['walk',
  'walk',
  'walk',
  'run',
  'run',
  'run',
  'run',
  'run',
  'run',
  'run',
  'run',
  'run',
  'run',
  'run'],
 ['walk', 'run', 'run', 'run', 'run', 'run'],
 ['walk', 'run', 'run', 'tackle', 'dribble', 'run', 'dribble', 'pass', 'pass'],
 ['walk',
  'walk',
  'walk',
  'walk',
  'walk',
  'walk',
  'walk',
  'walk',
  'walk',
  'walk',
  'walk',
  'walk',
  'walk',
  'run',
  'run',
  'run'],
 ['walk', 'run'],
 ['walk', 'walk', 'run', 'run', 'rest'],
 ['walk', 'walk', 'walk', 'walk', 'walk', 'walk', 'rest'],
 ['walk',
  'run',
  'run',
  'run',
  'run',
  'dribble',
  'run',
  'pass',
  'run',
  'run',
  'run',
  'run'],
 ['walk', 'run', 'run', 'run

In [8]:
with open("../data/sequences_match_1.json", "w") as file:
    json.dump(extract_sequences(match_1_data), file)

NameError: name 'extract_sequences' is not defined

In [None]:
with open("../data/sequences_match_2.json", "w") as file:
    json.dump(extract_sequences(match_2_data), file)

In [9]:
match_1_instance.extract_sequences()

[['walk', 'walk', 'walk', 'walk', 'rest'],
 ['walk',
  'walk',
  'walk',
  'run',
  'run',
  'run',
  'run',
  'run',
  'run',
  'run',
  'run',
  'run',
  'run',
  'run',
  'run',
  'run',
  'run',
  'run'],
 ['walk',
  'walk',
  'walk',
  'walk',
  'tackle',
  'run',
  'run',
  'run',
  'run',
  'run',
  'run',
  'run'],
 ['walk',
  'walk',
  'walk',
  'run',
  'run',
  'run',
  'run',
  'run',
  'run',
  'run',
  'run',
  'run',
  'run',
  'run'],
 ['walk', 'run', 'run', 'run', 'run', 'run'],
 ['walk', 'run', 'run', 'tackle', 'dribble', 'run', 'dribble', 'pass', 'pass'],
 ['walk',
  'walk',
  'walk',
  'walk',
  'walk',
  'walk',
  'walk',
  'walk',
  'walk',
  'walk',
  'walk',
  'walk',
  'walk',
  'run',
  'run',
  'run'],
 ['walk', 'run'],
 ['walk', 'walk', 'run', 'run', 'rest'],
 ['walk', 'walk', 'walk', 'walk', 'walk', 'walk', 'rest'],
 ['walk',
  'run',
  'run',
  'run',
  'run',
  'dribble',
  'run',
  'pass',
  'run',
  'run',
  'run',
  'run'],
 ['walk', 'run', 'run', 'run

## Norm

In [None]:
accelerometer_dimension = len(match_1_data[0]["norm"])
print(f"Norm dimension: {accelerometer_dimension}")

Norm dimension: 72


In [None]:
# Consistency check
all(len(gait["norm"]) == accelerometer_dimension for gait in match_1_data)

False

In [8]:
match_1_instance.find_min_max()

{'MIN': 1, 'MAX': 227}

In [9]:
match_2_instance.find_min_max()

{'MIN': 3, 'MAX': 723}

In [99]:
match_1_data_with_averaged_norm = [
    {"label": gait["label"], "norm": np.mean(gait["norm"])} for gait in match_1_data
]
with open("../data/match_1_data_with_averaged_norm.json", "w") as file:
    json.dump(match_1_data_with_averaged_norm, file)

In [100]:
match_2_data_with_averaged_norm = [
    {"label": gait["label"], "norm": np.mean(gait["norm"])} for gait in match_2_data
]
with open("../data/match_2_data_with_averaged_norm.json", "w") as file:
    json.dump(match_2_data_with_averaged_norm, file)

In [102]:
def mean_norm_for_each_action(
    match_data_averaged: List[Dict[str, Union[str, float]]], action: str
) -> float:
    """Returns the avearge norm of the actions.

    Args:
        match_data_averaged : JSON file containing match information with average norm.
    """

    return np.mean(
        [gait["norm"] for gait in match_data_averaged if gait["label"] == action]
    )

In [114]:
mean_norm_for_each_action_dict = {
    action: mean_norm_for_each_action(match_1_data_with_averaged_norm, action)
    for action in list(count_actions(match_1_data_with_averaged_norm).keys())
}

mean_norm_for_each_action_dict

{'walk': 28.09486687642306,
 'rest': 21.83356684290428,
 'run': 54.40459298904966,
 'tackle': 70.66858172485196,
 'dribble': 52.88252588755935,
 'pass': 64.13327808012248,
 'cross': 46.708457756829354,
 'shot': 113.27871688343295}

In [115]:
mean_norm_for_each_action_dict = {
    action: mean_norm_for_each_action(match_2_data_with_averaged_norm, action)
    for action in list(count_actions(match_2_data_with_averaged_norm).keys())
}

mean_norm_for_each_action_dict

{'no action': 38.65222535277646,
 'run': 56.22519161734227,
 'pass': 73.00031950031808,
 'rest': 22.712307380205683,
 'walk': 29.869326616095396,
 'dribble': 65.56369756735364,
 'shot': 98.46478534250633,
 'tackle': 66.86037467805848,
 'cross': 93.89217051873887}