In [1]:
import os
import pandas as pd
import numpy as np
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Rescaling, Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping

In [2]:
# from google.colab import drive
# drive.mount('/content/drive')

In [3]:
! ls '/content/drive/'

MyDrive


In [4]:
os.chdir('/content/drive/MyDrive/Bundesliga')

In [5]:
! pwd

/content/drive/MyDrive/Bundesliga


In [6]:
# ! unzip dfl-bundesliga-data-shootout.zip

In [7]:
df_train = pd.read_csv('/content/drive/MyDrive/Bundesliga/train.csv')

In [8]:
df_train['time_ms'] = round(df_train['time'] * 1000).astype(int)

In [9]:
display(df_train.head())
df_train.shape

Unnamed: 0,video_id,time,event,event_attributes,time_ms
0,1606b0e6_0,200.265822,start,,200266
1,1606b0e6_0,201.15,challenge,['ball_action_forced'],201150
2,1606b0e6_0,202.765822,end,,202766
3,1606b0e6_0,210.124111,start,,210124
4,1606b0e6_0,210.87,challenge,['opponent_dispossessed'],210870


(11218, 5)

In [10]:
df_events = df_train[~df_train['event_attributes'].isna()].copy()

In [11]:
df_events['frame_id'] = df_events['video_id'] + df_events['time_ms'].astype(str)

In [12]:
# Reordering columns
df_events = df_events[['video_id', 'frame_id', 'time', 
                       'time_ms', 'event', 'event_attributes']]

In [13]:
display(df_events.head())
df_events.shape

Unnamed: 0,video_id,frame_id,time,time_ms,event,event_attributes
1,1606b0e6_0,1606b0e6_0201150,201.15,201150,challenge,['ball_action_forced']
4,1606b0e6_0,1606b0e6_0210870,210.87,210870,challenge,['opponent_dispossessed']
7,1606b0e6_0,1606b0e6_0219230,219.23,219230,throwin,['pass']
10,1606b0e6_0,1606b0e6_0224430,224.43,224430,play,"['pass', 'openplay']"
13,1606b0e6_0,1606b0e6_0229390,229.39,229390,play,"['pass', 'openplay']"


(4382, 6)

Building very basic baseline model:

In [14]:
df_pass = df_train[df_train['event_attributes'].str[:6] == "['pass"]

In [15]:
display(df_pass.head())
df_pass.shape

Unnamed: 0,video_id,time,event,event_attributes,time_ms
7,1606b0e6_0,219.23,throwin,['pass'],219230
10,1606b0e6_0,224.43,play,"['pass', 'openplay']",224430
13,1606b0e6_0,229.39,play,"['pass', 'openplay']",229390
16,1606b0e6_0,236.71,play,"['pass', 'openplay']",236710
17,1606b0e6_0,239.35,play,"['pass', 'openplay']",239350


(3622, 5)

In [16]:
df_events['event_attributes'].value_counts()

['pass', 'openplay']                  3337
['ball_action_forced']                 239
['pass']                               154
['opponent_dispossessed']              138
['pass', 'freekick']                   127
['fouled']                             111
['cross', 'openplay']                   80
['challenge_during_ball_transfer']      53
['possession_retained']                 44
['opponent_rounded']                    39
['cross', 'corner']                     33
['cross']                               18
['cross', 'freekick']                    5
['pass', 'corner']                       4
Name: event_attributes, dtype: int64

In [17]:
print(f"Accuracy if classifying every class as {df_events['event_attributes'].describe()['top']}: "
      f"{df_events['event_attributes'].describe()['freq'] / df_events['event_attributes'].describe()['count']:.2%}")

Accuracy if classifying every class as ['pass', 'openplay']: 76.15%


In [18]:
print(f'Accuracy if classifying every class as a pass (in general) {df_pass.shape[0] / df_events.shape[0]:.2%}')

Accuracy if classifying every class as a pass (in general) 82.66%


In [19]:
df_events['event'].value_counts()

play         3586
challenge     624
throwin       172
Name: event, dtype: int64

In [20]:
df_video1 = df_events[df_events['video_id'] == '1606b0e6_0']
df_video1

Unnamed: 0,video_id,frame_id,time,time_ms,event,event_attributes
1,1606b0e6_0,1606b0e6_0201150,201.15,201150,challenge,['ball_action_forced']
4,1606b0e6_0,1606b0e6_0210870,210.87,210870,challenge,['opponent_dispossessed']
7,1606b0e6_0,1606b0e6_0219230,219.23,219230,throwin,['pass']
10,1606b0e6_0,1606b0e6_0224430,224.43,224430,play,"['pass', 'openplay']"
13,1606b0e6_0,1606b0e6_0229390,229.39,229390,play,"['pass', 'openplay']"
...,...,...,...,...,...,...
986,1606b0e6_0,1606b0e6_02861430,2861.43,2861430,play,"['pass', 'openplay']"
989,1606b0e6_0,1606b0e6_02900110,2900.11,2900110,challenge,['ball_action_forced']
992,1606b0e6_0,1606b0e6_02903430,2903.43,2903430,challenge,['fouled']
995,1606b0e6_0,1606b0e6_03000870,3000.87,3000870,play,"['pass', 'freekick']"


In [21]:
y_train_val = df_video1['event']
y_train_val.to_frame()

Unnamed: 0,event
1,challenge
4,challenge
7,throwin
10,play
13,play
...,...
986,play
989,challenge
992,challenge
995,play


In [22]:
from sklearn.preprocessing import OneHotEncoder

encoder = OneHotEncoder(sparse=False, handle_unknown='ignore')

current_feat = ['event']

print('Shape of y_train_val before transformation: ', y_train_val.shape)
y_train_val = y_train_val.to_frame()

# Fit and transform data
encoder.fit(y_train_val[current_feat])
y_train_val[encoder.get_feature_names_out(list(current_feat))] = encoder.transform(y_train_val[current_feat])
y_train_val = y_train_val.drop(columns=current_feat)

print('Shape of y_train_val after transformation: ', y_train_val.shape)

Shape of y_train_val before transformation:  (396,)
Shape of y_train_val after transformation:  (396, 3)


Files without compressing:

In [23]:
# X_train = np.load('/content/drive/MyDrive/Bundesliga/PicsFromVids/1606b0e6_0.npy')
# X_train2 = np.load('/content/drive/MyDrive/Bundesliga/PicsFromVids/1606b0e6_1.npy')
# X_train3 = np.load('/content/drive/MyDrive/Bundesliga/PicsFromVids/35bd9041_0.npy')
# X_train4 = np.load('/content/drive/MyDrive/Bundesliga/PicsFromVids/35bd9041_1.npy')
# X_train5 = np.load('/content/drive/MyDrive/Bundesliga/PicsFromVids/3c993bd2_0.npy')

In [24]:
# X_train.shape
# X_train2.shape, X_train3.shape, X_train4.shape, X_train5.shape

In [25]:
# X_train.max()
# X_train2.max(), X_train3.max(), X_train4.max(), X_train5.max()

Compressed files:

In [26]:
X_train_val = np.load('/content/drive/MyDrive/Bundesliga/PicsFromVids/CompressedPic/1606b0e6_0.npz')
# X_train2 = np.load('/content/drive/MyDrive/Bundesliga/PicsFromVids/CompressedPic/1606b0e6_1.npz').f.arr_0
# X_train3 = np.load('/content/drive/MyDrive/Bundesliga/PicsFromVids/CompressedPic/1606b0e6_0.npz').f.arr_0
# X_train4 = np.load('/content/drive/MyDrive/Bundesliga/PicsFromVids/CompressedPic/1606b0e6_0.npz').f.arr_0
# X_train5 = np.load('/content/drive/MyDrive/Bundesliga/PicsFromVids/CompressedPic/1606b0e6_0.npz').f.arr_0
# X_train6 = np.load('/content/drive/MyDrive/Bundesliga/PicsFromVids/CompressedPic/1606b0e6_0.npz').f.arr_0
# X_train7 = np.load('/content/drive/MyDrive/Bundesliga/PicsFromVids/CompressedPic/1606b0e6_0.npz').f.arr_0
# X_train8 = np.load('/content/drive/MyDrive/Bundesliga/PicsFromVids/CompressedPic/1606b0e6_0.npz').f.arr_0
# X_train9 = np.load('/content/drive/MyDrive/Bundesliga/PicsFromVids/CompressedPic/1606b0e6_0.npz').f.arr_0
# X_train10 = np.load('/content/drive/MyDrive/Bundesliga/PicsFromVids/CompressedPic/1606b0e6_0.npz').f.arr_0
# X_train11 = np.load('/content/drive/MyDrive/Bundesliga/PicsFromVids/CompressedPic/1606b0e6_0.npz').f.arr_0
# X_train12 = np.load('/content/drive/MyDrive/Bundesliga/PicsFromVids/CompressedPic/1606b0e6_0.npz').f.arr_0

In [27]:
X_train_val.f.arr_0.shape

(396, 480, 640, 3)

In [28]:
X_train_val = X_train_val.f.arr_0

In [29]:
# rows = y_train_val.shape[0]
# TRAIN_SIZE = 0.8
# index_val = round(rows * TRAIN_SIZE)

# X_train = X_train_val[:index_val]
# y_train = y_train_val[:index_val]
# X_val = X_train_val[index_val:]
# y_val = y_train_val[index_val:]

In [30]:
assert X_train_val.shape[0] == X_train.shape[0] + X_val.shape[0]
assert y_train_val.shape[0] == y_train.shape[0] + y_val.shape[0]

In [31]:
input_shape = X_train_val.shape[1], X_train_val.shape[2], X_train_val.shape[3]

model = Sequential([
    Rescaling(1/255, 
              input_shape=input_shape),   
    Conv2D(96, 
           11, 
           strides=4, 
           activation='relu'),
    MaxPooling2D(pool_size=3, 
                 strides=2),
    Conv2D(256, 
           5, 
           activation='relu'),
    MaxPooling2D(pool_size=3, 
                 strides=2),
    Conv2D(384, 
           3, 
           activation='relu'),
    Conv2D(384, 
           3, 
           activation='relu'),
    Conv2D(256, 
           3, 
           activation='relu'),
    MaxPooling2D(),
    Flatten(),
    Dense(256, 
          activation='relu'),
    Dense(256, 
          activation='relu'),
    Dense(3, 
          activation='softmax')
])

In [32]:
model.input_shape, model.output_shape

((None, 480, 640, 3), (None, 3))

In [33]:
X_train_val.shape, y_train_val.shape

((396, 480, 640, 3), (396, 3))

In [34]:
y_train_val.value_counts()

event_challenge  event_play  event_throwin
0.0              1.0         0.0              319
1.0              0.0         0.0               56
0.0              0.0         1.0               21
dtype: int64

In [35]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 rescaling (Rescaling)       (None, 480, 640, 3)       0         
                                                                 
 conv2d (Conv2D)             (None, 118, 158, 96)      34944     
                                                                 
 max_pooling2d (MaxPooling2D  (None, 58, 78, 96)       0         
 )                                                               
                                                                 
 conv2d_1 (Conv2D)           (None, 54, 74, 256)       614656    
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 26, 36, 256)      0         
 2D)                                                             
                                                                 
 conv2d_2 (Conv2D)           (None, 24, 34, 384)       8

In [36]:
model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics='accuracy',
              )

In [37]:
callback = EarlyStopping(monitor='val_loss',
                         patience=5,
                         restore_best_weights=True
                         )

In [38]:
EPOCHS = 100
model.fit(X_train_val, 
          y_train_val, 
          validation_split=0.2, 
          batch_size=16, 
          epochs=EPOCHS,
          callbacks=[callback])

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100


<keras.callbacks.History at 0x7f3315000650>