![title](../assets/problem.png)

In [None]:
import json
import pandas as pd
import numpy as np
import requests
import matplotlib.pyplot as plt
import plotly.express as px
from typing import Dict, List, Union, Any
import warnings

warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.filterwarnings('ignore')

In [None]:
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 5000)
pd.set_option('max_colwidth', 5000)

In [None]:
BASE_PATH = "/Users/seanariel/Desktop/la-maniee/data/mlops"

PATH_TO_SYNTHETIC_DATA = f"{BASE_PATH}/synthetic_data_contract.csv"
PATH_TO_EXPLODED_FEATURES = f"{BASE_PATH}/exploded_features.csv"
PATH_TO_FEATURE_STORE = f"{BASE_PATH}/feature_store.csv"
PATH_TO_DEV_TRAINING_DATA = f"{BASE_PATH}/dev_training.csv"
PATH_TO_DEV_TESTING_DATA = f"{BASE_PATH}/dev_testing.csv"
PATH_TO_AUTOML_TRAINING_DATA = f"{BASE_PATH}/automl_training.csv"
PATH_TO_PRECISION_RECALL = f"{BASE_PATH}/precision_recall.csv"
PATH_TO_OPTIMAL_MODEL = f"{BASE_PATH}/optimal_model.pickle"
PATH_TO_PRODUCTION_MODEL = f"{BASE_PATH}/production_model.pickle"
PATH_TO_TRAINING_DATA = f"{BASE_PATH}/training.csv"
PATH_TO_EXPERIMENTATION_DATA = f"{BASE_PATH}/experimentation.csv"

# Table of Content:
* [Overview](#first-bullet)
* [Feature Engineering](#second-bullet)
* [Model Development](#third-bullet)
* [Model Training](#fourth-bullet)
* [Model Serving](#fifth-bullet)
* [Model Experimentation](#sixth-bullet)

# Experimentation <a class="anchor" id="sixth-bullet"></a>

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn

In [None]:
experimentation_df = pd.read_csv(PATH_TO_EXPERIMENTATION_DATA)

In [None]:
experimentation_df.head()

In [None]:
experimentation_df.info()

In [None]:
experimentation_df = experimentation_df.rename(
    columns={
        "model": "Model",
        "contract": "Contract",
        "reward": "Reward"
    }
)

In [None]:
SEED = 42
CI_LEVEL = 95
BOOTSTRAPPING = 5000
PALETTE = {"rf_v1": "#FF33F6", "rf_v2": "#33F0FF"}

fig, ax = plt.subplots(figsize=(12, 6))

seaborn.barplot(
    data=experimentation_df,
    x="Model", 
    y="Reward", 
    estimator='mean', 
    errorbar=('ci', CI_LEVEL), 
    n_boot=BOOTSTRAPPING, 
    seed=SEED, 
    palette=PALETTE, 
    capsize=0.1, 
    ax=ax
)
;

In [None]:
SEED = 42
CI_LEVEL = 95
BOOTSTRAPPING = 5000
PALETTE = {"rf_v1": "#FF33F6", "rf_v2": "#33F0FF"}
ORDER = ["clubs", "diamonds", "hearts", "spades", "sans_atouts", "tout_atouts"]


fig, ax = plt.subplots(figsize=(12, 6))

seaborn.barplot(
    data=experimentation_df,
    x="Contract", 
    y="Reward", 
    hue="Model", 
    order=ORDER, 
    estimator='mean', 
    errorbar=('ci', CI_LEVEL), 
    n_boot=BOOTSTRAPPING, 
    seed=SEED, 
    palette=PALETTE, 
    capsize=0.1, 
    ax=ax
)
;

In [None]:
import numpy as np


class Environment:

    def initialize_state(self):
        pass

    @staticmethod
    def apply_step(action) -> None:
        print(f"Rolling out new policy: {action}")

    @staticmethod
    def compute_reward() -> None:
        return np.random.choice([1, 2, 3])


class Agent:

    def __init__(self, env):
        self.env = env
        self.actions = ["deploy_rf_v1", "deploy_rf_v2"]
        self.epsilon = 0.5
        self.rewards = {
            action: {"applied": 0, "rewards": 0} for action in self.actions
        }
        self.total_actions_taken = 0
        self.total_reward_accumulated = 0
        self.current_action = None

    def _take_random_action(self) -> str:
        return np.random.choice(self.actions)

    def _take_optimal_action(self) -> str:
        estimates = []
        for action in self.actions:
            bandit_record = self.rewards[action]
            if not bandit_record["applied"]:
                estimates.append(0)
            else:
                estimates.append(bandit_record["rewards"] / bandit_record["applied"])

        return self.actions[np.argmax(estimates)]

    def take_action(self) -> None:
        epsilon = self.epsilon or 1 / (1 + self.total_actions_taken)
        p = np.random.uniform(0, 1, 1)
        action = self._take_random_action() if p < epsilon else self._take_optimal_action()
        self.env.apply_step(action)
        self.current_action = action

    def observe_reward(self) -> None:
        assert self.current_action, "Can't observe a reward before having executed an action."
        reward = self.env.compute_reward()
        self.rewards[self.current_action]["rewards"] += reward
        self.rewards[self.current_action]["applied"] += 1
        self.current_action = None


if __name__ == '__main__':
    environment = Environment()
    agent = Agent(environment)

    for _ in range(0, 10):
        agent.take_action()
        agent.observe_reward()

    print(agent.rewards)

### [OPTIONAL] Assignment 6 - Airflow

Let's schedule a pipeline to monitor continuously the drifting of our production model. For this, we will need a workflow scheduling system with an internal ETL capability that allows us to:
- Retrieve the latest real life data (coming from people actually playing with the production model)
- Run the ROC metric to evaluate that the performance of the model is still adecuate

To do this, <a>Airflow</a> is a fantastic tool that's going to make our lives much easier !


#### Credit

Note:
This content has been developed by Sean Ariel for educational purposes. 
It is a practical training that cannot be copied, reproduced, distributed without the explicit consent from the author. © Sean Ariel