![title](../assets/problem.png)

In [None]:
import json
import pandas as pd
import numpy as np
import requests
import matplotlib.pyplot as plt
import plotly.express as px
from typing import Dict, List, Union, Any
import warnings

warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.filterwarnings('ignore')

In [None]:
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 5000)
pd.set_option('max_colwidth', 5000)

In [None]:
BASE_PATH = "/Users/seanariel/Desktop/la-maniee/data/mlops"

PATH_TO_SYNTHETIC_DATA = f"{BASE_PATH}/synthetic_data_contract.csv"
PATH_TO_EXPLODED_FEATURES = f"{BASE_PATH}/exploded_features.csv"
PATH_TO_FEATURE_STORE = f"{BASE_PATH}/feature_store.csv"
PATH_TO_DEV_TRAINING_DATA = f"{BASE_PATH}/dev_training.csv"
PATH_TO_DEV_TESTING_DATA = f"{BASE_PATH}/dev_testing.csv"
PATH_TO_AUTOML_TRAINING_DATA = f"{BASE_PATH}/automl_training.csv"
PATH_TO_PRECISION_RECALL = f"{BASE_PATH}/precision_recall.csv"
PATH_TO_OPTIMAL_MODEL = f"{BASE_PATH}/optimal_model.pickle"
PATH_TO_PRODUCTION_MODEL = f"{BASE_PATH}/production_model.pickle"
PATH_TO_TRAINING_DATA = f"{BASE_PATH}/training.csv"
PATH_TO_EXPERIMENTATION_DATA = f"{BASE_PATH}/experimentation.csv"

# Table of Content:
* [Overview](#first-bullet)
* [Feature Engineering](#second-bullet)
* [Model Development](#third-bullet)
* [Model Training](#fourth-bullet)
* [Model Serving](#fifth-bullet)
* [Model Experimentation](#sixth-bullet)

# Overview  <a class="anchor" id="first-bullet"></a>

### Load the data

In [None]:
synthetic_game_data = pd.read_csv(PATH_TO_SYNTHETIC_DATA)

### Get a first feeling of the features

In [None]:
synthetic_game_data.head(5)

In [None]:
synthetic_game_data.info()

### Ensure the balance/stability in target & covariates

In [None]:
synthetic_game_data.describe()

In [None]:
DIMENSION = "contract"
distribution = synthetic_game_data[DIMENSION].value_counts()
fig, (ax1, ax2, ax3, ax4) = plt.subplots(4, figsize=(12, 12))
fig.suptitle("Overview of Distributions")
ax1.bar(distribution.index, distribution.values)
ax2.hist(synthetic_game_data["p1_face_value"], bins=np.linspace(-0, 200, 20))
ax3.hist(synthetic_game_data["p2_face_value"], bins=np.linspace(-0, 100, 20))
ax4.hist(synthetic_game_data["reward"], bins=np.linspace(-400, 400, 50))
;

In [None]:
import plotly.express as px

DIMENSION = "starter"  # last_bidder, contract, starter

fig = px.histogram(
    synthetic_game_data.sample(
        min(10000, synthetic_game_data.shape[0])
    ), 
    x="reward", 
    color=DIMENSION,
    marginal="box",
    hover_data=synthetic_game_data.columns
)

fig.update_layout(
    title_text=f"Distribution of reward dimensionalized by {DIMENSION}"
)

fig.show()

In [None]:
synthetic_game_data_unbalanced = synthetic_game_data[
    (
        (synthetic_game_data.reward >= 160) &
        (synthetic_game_data.reward <= 170)
    ) |
    (
        (synthetic_game_data.reward >= -170) &
        (synthetic_game_data.reward <= -160)
    )
]
synthetic_game_data_balanced = synthetic_game_data[
    ~(
        (synthetic_game_data.reward >= 160) &
        (synthetic_game_data.reward <= 170)
    ) &
    ~(
        (synthetic_game_data.reward >= -170) &
        (synthetic_game_data.reward <= -160)
    )
]

synthetic_game_data_rebalanced = synthetic_game_data_unbalanced.sample(frac=0.25)

synthetic_game_data = pd.concat(
    [synthetic_game_data_balanced, synthetic_game_data_rebalanced]
)

In [None]:
print("synthetic_game_data_unbalanced", synthetic_game_data_unbalanced.shape)
print("synthetic_game_data_balanced", synthetic_game_data_balanced.shape)
print("synthetic_game_data_rebalanced", synthetic_game_data_rebalanced.shape)
print("synthetic_game_data_final", synthetic_game_data.shape)

#### Credit

Note:
This content has been developed by Sean Ariel for educational purposes. 
It is a practical training that cannot be copied, reproduced, distributed without the explicit consent from the author. © Sean Ariel