# Model Training Notebook

In [19]:
import pickle
from pathlib import Path
from enum import Enum

import numpy as np
import pandas as pd
from numpy.random import default_rng
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import PassiveAggressiveRegressor
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split

from simulator.objects.policies.architectures import ModelTask
from simulator.objects.policies.architectures.perceptron import MultiLayerPerceptron
from simulator.objects.stock import Stock

np.random.seed(0)
rng = default_rng()

In [20]:
def generate_example_stock_features(n_stocks: int) -> np.ndarray:
    output = []
    for _ in range(n_stocks):
        cash = rng.uniform(-10000, 100000, size=1)[0]
        earning_value_of_assets = rng.uniform(10000, 30000, size=1)[0]
        latest_quarterly_earnings = rng.uniform(10000, 30000, size=1)[0]
        start_price = rng.uniform(10.0, 1010.0, size=1)[0]
        price_slope = rng.uniform(-0.005, 0.005, size=1)[0]
        growth_component = start_price + price_slope * np.arange(0, 1825)
        noise_component = np.random.normal(loc=0, scale=0.001, size=(1825,))
        price_history = growth_component + noise_component
        quality_of_leadership = np.random.uniform(0.0, 1.0, size=1)[0]
        stock = Stock(
            cash=cash,
            earning_value_of_assets=earning_value_of_assets,
            latest_quarterly_earnings=latest_quarterly_earnings,
            price_history=price_history,
            quality_of_leadership=quality_of_leadership,
            stock_volatility=0.5,
        )

        output.append(np.append(stock.get_stock_features(), 0))

    return np.array(output)

In [21]:
N_SAMPLES = 5000
TEST_RATIO = 0.2

# NOTE: SWITCH TO PERCENT ERROR LOSS OR SOME VARIANT

stock_features = generate_example_stock_features(N_SAMPLES)
stock_labels = stock_features[:, 0]

stock_dataset = pd.DataFrame(stock_features)
X = stock_dataset
y = stock_dataset[0]

test_length = int(N_SAMPLES * TEST_RATIO)
train_length = N_SAMPLES - test_length

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=TEST_RATIO, random_state=0
)

In [22]:
class ModelType(Enum):
    RANDOM_FOREST = "random_forest"
    PASSIVE_AGGRESSIVE = "passive_aggressive"

In [None]:
model_type = ModelType.PASSIVE_AGGRESSIVE

if model_type == ModelType.RANDOM_FOREST:
    model = RandomForestRegressor(
        n_estimators=100,
        random_state=0,
    )
elif model_type == ModelType.PASSIVE_AGGRESSIVE:
    model = PassiveAggressiveRegressor()

model.fit(X_train, y_train)

y_pred = model.predict(X_test)

print(mean_squared_error(y_test, y_pred))
# for feature, importance in zip(X.columns, model.feature_importances_):
#     print(f"{feature}: {importance}")

0.004427363636332812


AttributeError: 'PassiveAggressiveRegressor' object has no attribute 'feature_importances_'

In [None]:
outfile = Path("passive_aggressive.pkl")
with outfile.open("wb") as model_file:
    pickle.dump(model, model_file)