In [None]:
import pandas as pd
import joblib

pd.options.display.max_rows = 100
pd.options.display.max_columns = 999

from expected_disposal_model.data_preparation.preprocessing import convert_chains_to_schema, filter_disposals, create_labels
from expected_disposal_model.config import raw_file_path, preprocessor_file_path, model_v1_file_path, scored_disposal_output

In [None]:
# Load data
chains = pd.read_csv(raw_file_path)
print("Chain data loaded.")

In [None]:
# Processing
preproc = joblib.load(preprocessor_file_path)
chain_features = preproc.transform(chains)

In [None]:
schema_chains = convert_chains_to_schema(chains)
disposals = filter_disposals(schema_chains)

labels = create_labels(chains)

schema_chains = pd.concat([disposals, chain_features, labels], axis=1)
schema_chains = schema_chains.rename(columns={'Disposal':'Disposal_Label'})
print("Preprocessing.. Complete.")

In [None]:
# Load model
exp_disposal_model = joblib.load(model_v1_file_path)

# Scoring Model
schema_chains['xDisposal'] = exp_disposal_model.predict_proba(chain_features)[:, 1]
print("Scoring.. complete.")

In [None]:
schema_chains.head()

In [None]:
# Merge back to chains
chains = chains.merge(schema_chains, how = "left", left_on=['Match_ID', 'Chain_Number', 'Order'], right_on=['match_id', 'chain_number', 'order'])


In [None]:
chains.head() 

In [None]:
# Export data
chains.to_csv("/Users/ciaran/Documents/Projects/AFL/git-repositories/expected-disposal-model/data/predictions/disposal_scored_chains.csv", index=False)
print("Exporting.. complete.")