Loading and Preprocessing AFL Match Data for Modelling

In [None]:
import pandas as pd
import numpy as np

pd.options.display.max_rows = 100
pd.options.display.max_columns = 999

%load_ext autoreload
%autoreload 2

import warnings
warnings.filterwarnings('ignore')

In [None]:
afl_data = pd.read_csv("data/merged-data/afl_match_data.csv")
afl_data = afl_data.drop(columns = ['Umpires'])
afl_data.head(2)

In [None]:
list(afl_data)

In [None]:
afl_data['ModellingFilter'] = np.where(afl_data['Date'] < "2019-01-01", True, False)
afl_data = afl_data[afl_data['Year'] >= 2010]

In [None]:
training_data = afl_data[afl_data['ModellingFilter']]
test_data = afl_data[~afl_data['ModellingFilter']]

Create Preprocessor

In [None]:
from data_processing import DataPreprocessor

In [None]:
afl_rename_dict = {
    'Total Game Score':'Total_Game_Score',
    'Home Win':'Home_Win'
    }

In [None]:
preprocessor = DataPreprocessor(rename_dict=afl_rename_dict)

In [None]:
preprocessor.fit(training_data)

In [None]:
training_data_preproc = preprocessor.transform(training_data)

In [None]:
training_data_preproc.head()

In [None]:
test_data_preproc = preprocessor.transform(test_data)

In [None]:
test_data_preproc.head()

Export modelling data .csv

In [None]:
modelling_data = pd.concat([training_data_preproc, test_data_preproc], axis=0)

In [None]:
modelling_data.to_csv("/data/modelling-data/preprocessed-data/modelling_data.csv", index = False)

In [None]:
training_data_preproc.to_csv("/data/modelling-data/preprocessed-data/training_data.csv", index = False)

In [None]:
test_data_preproc.to_csv("/data/modelling-data/preprocessed-data/test_data.csv", index = False)