# Imports

In [1]:
from match import Match
from encoder import LabelEncoder

In [2]:
match_1_instance = Match("../data/match_1.json")
match_2_instance = Match("../data/match_2.json")

In [3]:
match_2_instance.actions

{'cross',
 'dribble',
 'no action',
 'pass',
 'rest',
 'run',
 'shot',
 'tackle',
 'walk'}

In [4]:
match_2_instance.info()

Number of gaits in this match is: 613


In [5]:
# Drop "no action" as it only figures twice in the second match
for index, element in enumerate(match_2_instance.data):
    if element["label"] == "no action":
        match_2_instance.data.pop(index)

In [6]:
match_2_instance.actions

{'cross', 'dribble', 'pass', 'rest', 'run', 'shot', 'tackle', 'walk'}

In [7]:
match_2_instance.info()

Number of gaits in this match is: 611


In [8]:
match_1_sequences = match_1_instance.extract_sequences()
match_2_sequences = match_2_instance.extract_sequences()

In [9]:
match_1_clean_sequences = []
for element in match_1_sequences:
    match_1_clean_sequences.append(
        [nested_element["label"] for nested_element in element]
    )

In [10]:
match_2_clean_sequences = []
for element in match_2_sequences:
    match_2_clean_sequences.append(
        [nested_element["label"] for nested_element in element]
    )

In [11]:
all_data = match_1_clean_sequences + match_2_clean_sequences

In [12]:
print(all_data[10])
print(all_data[700])

['walk', 'walk', 'walk', 'walk', 'rest', 'walk', 'walk', 'walk', 'run', 'run', 'run']
['run', 'pass', 'rest', 'walk', 'run', 'run', 'run', 'run', 'run', 'run', 'run', 'run', 'run', 'run', 'run', 'run', 'run', 'run', 'run', 'run', 'run', 'walk', 'walk', 'walk', 'run', 'walk', 'walk', 'run', 'run', 'run', 'run', 'walk', 'walk', 'walk', 'walk', 'walk', 'dribble', 'dribble', 'dribble', 'shot', 'dribble', 'shot', 'walk', 'walk', 'tackle', 'dribble', 'dribble', 'cross', 'run', 'run', 'run', 'run', 'walk', 'dribble', 'pass', 'dribble', 'pass', 'walk', 'rest', 'run', 'run', 'run', 'run', 'run', 'walk', 'walk', 'walk', 'run', 'run', 'tackle', 'run', 'tackle', 'run', 'run', 'run', 'run', 'run', 'tackle', 'tackle', 'run', 'run', 'run', 'run', 'run', 'walk', 'walk', 'walk', 'walk', 'run', 'run', 'run', 'dribble', 'shot', 'shot', 'run', 'run', 'run', 'run', 'run', 'run', 'run', 'run', 'walk', 'walk', 'walk', 'walk', 'run', 'run', 'run', 'run', 'run', 'run', 'run', 'run', 'run', 'walk', 'walk', 'wal

In [13]:
X, y = [], []  # X: data, y: labels or targets
for element in all_data[1:]:
    X.append(element[:-1])
    y.append(element[-1])

for index, element in enumerate(X[:10]):
    print(f"Data point number: {index+1}: data: {X[index]}, target: {y[index]} \n")

Data point number: 1: data: ['walk'], target: walk 

Data point number: 2: data: ['walk', 'walk'], target: walk 

Data point number: 3: data: ['walk', 'walk', 'walk'], target: walk 

Data point number: 4: data: ['walk', 'walk', 'walk', 'walk'], target: rest 

Data point number: 5: data: ['walk', 'walk', 'walk', 'walk', 'rest'], target: walk 

Data point number: 6: data: ['walk', 'walk', 'walk', 'walk', 'rest', 'walk'], target: walk 

Data point number: 7: data: ['walk', 'walk', 'walk', 'walk', 'rest', 'walk', 'walk'], target: walk 

Data point number: 8: data: ['walk', 'walk', 'walk', 'walk', 'rest', 'walk', 'walk', 'walk'], target: run 

Data point number: 9: data: ['walk', 'walk', 'walk', 'walk', 'rest', 'walk', 'walk', 'walk', 'run'], target: run 

Data point number: 10: data: ['walk', 'walk', 'walk', 'walk', 'rest', 'walk', 'walk', 'walk', 'run', 'run'], target: run 



In [14]:
from sklearn.model_selection import train_test_split

X_train, _X, y_train, _y = train_test_split(
    X, y, train_size=0.7, random_state=1234, shuffle=True
)
X_val, X_test, y_val, y_test = train_test_split(
    _X, _y, train_size=0.5, random_state=1234, shuffle=True
)

In [15]:
print(
    f"Size of training set: {len(X_train)} \n"
    f"Size of validation set: {len(X_val)} \n"
    f"Size of test set: {len(X_test)}"
)

Size of training set: 828 
Size of validation set: 178 
Size of test set: 178


In [16]:
classes = set(element for sequence in X_train for element in sequence)
classes

{'cross', 'dribble', 'pass', 'rest', 'run', 'shot', 'tackle', 'walk'}

In [17]:
label_encoder = LabelEncoder()
label_encoder.fit(X_train)

In [18]:
label_encoder.class_to_index

{'walk': 0,
 'dribble': 1,
 'rest': 2,
 'run': 3,
 'pass': 4,
 'shot': 5,
 'tackle': 6,
 'cross': 7}

In [19]:
X_train_transformed = label_encoder.encode(X_train)

In [20]:
print(X_train[0])
print(X_train_transformed[0])

['run', 'pass', 'rest', 'walk', 'run', 'run', 'run', 'run', 'run', 'run', 'run', 'run', 'run', 'run', 'run', 'run', 'run', 'run', 'run', 'run', 'run', 'walk', 'walk', 'walk', 'run', 'walk', 'walk', 'run', 'run', 'run', 'run', 'walk', 'walk', 'walk', 'walk', 'walk', 'dribble', 'dribble', 'dribble', 'shot', 'dribble', 'shot', 'walk', 'walk', 'tackle', 'dribble', 'dribble', 'cross', 'run', 'run', 'run', 'run', 'walk', 'dribble', 'pass', 'dribble', 'pass', 'walk', 'rest', 'run', 'run', 'run', 'run', 'run', 'walk', 'walk', 'walk', 'run', 'run', 'tackle', 'run', 'tackle', 'run', 'run', 'run', 'run', 'run', 'tackle', 'tackle', 'run', 'run', 'run', 'run', 'run', 'walk', 'walk', 'walk', 'walk', 'run', 'run', 'run', 'dribble', 'shot', 'shot', 'run', 'run', 'run', 'run', 'run', 'run', 'run', 'run', 'walk', 'walk', 'walk', 'walk', 'run', 'run', 'run', 'run', 'run', 'run', 'run', 'run', 'run', 'walk', 'walk', 'walk', 'run', 'run', 'run', 'run', 'run', 'walk', 'walk', 'walk', 'tackle', 'walk', 'walk

In [21]:
y_train_transformed = label_encoder.encode(y_train)

In [22]:
y_train_transformed[0]

3