In [None]:
from fastbook import *
from pandas.api.types import is_string_dtype, is_numeric_dtype, is_categorical_dtype
from fastai.tabular.all import *
from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import r2_score, make_scorer
from sklearn.model_selection import cross_val_score
from IPython.display import Image, display_svg, SVG
import matplotlib.pyplot as plt
import joblib
import numpy as np

# Experiment 1 vs. legacy
# 1.0.0 (loss = 95)
# 1.0.1 (loss = 71)
# 1.0.2 (loss = 68)
# 1.0.3 (loss = 90) (range: -10,000 - 10,000)

# Experiment 2 vs. legacy-1 (accurate item price readings)
# 1.0.4 (loss = 142000, loss / mean_pred = 2.76) (range: -1000 - 1500) (dep_var = weighted_profit)

# Experiment 3 - (1.0.5 vs 1.1.0) (weighted_profit vs gst)
# 1.0.5 - (dep_var = weighted_profit) (range: [-400, 700]) (loss = 92000, loss / mean_pred = 2.06)
# 1.1.0 - (dep_var = gst) (range: [-400, 700]) (loss = 54, loss / mean_pred = 1.74)
# 1.1.0 wins

# Experiment 4 - (1.1.1 vs 1.2.0) (gst RF vs. gst NN)
# 1.1.1 - (dep_var = gst) (range: [-400, 700]) (loss = 70, loss / mean_pred = 1.43)
# 1.2.0 - (dep_var = gst) (range: [-400, 700]) (loss = 77, loss / mean_pred = 1.56)
# 1.2.0 wins

#Experiment 5 - (1.2.1 vs 1.2.2) (NN 49 day lookback vs. NN 140 day lookback)
# 1.2.1 - (dep_var = gst) (range: [-400, 700]) (loss = 67, loss / mean_pred = 1.53)
# 1.2.2 - (dep_var = gst) (range: [-400, 700]) (loss = 63, loss / mean_pred = 1.45)

# Experiment 6 - (legacy-2 vs 1.1.2 vs 1.2.3) (baseline vs RF GST vs NN GST)
# 63 day lookback and 14 day test set
# 1.1.2 - (dep_var = gst) (range: [-180, 400]) (loss = 53, loss / mean_pred = 1.31)
# 1.2.3 - (dep_var = gst) (range: [-180, 400]) (loss = 57, loss / mean_pred = 1.40)

model_version = '1.2.3'
model_can_be_dumped = True
dep_var = 'gst'
num_epochs = 10
y_range_mod = 0.025
layer_sizes = [200, 100]
validation_lookback = 14
train_start_days_past = 63
wanted_columns = [
    "high_price_1h",
    "high_price_5m",
    "high_volume_1h",
    "high_volume_5m",
    "low_price_1h",
    "low_price_5m",
    "low_volume_1h",
    "low_volume_5m",
    "player_count",
    "timestampElapsed",
    "gst"
  ]

In [None]:
path = Path.cwd()
Path.BASE_PATH = path
dsets_path = path.parent.parent/'fastai/datasets'
path.ls()

In [None]:
df = pd.read_csv(dsets_path/'osrs_trades.csv', low_memory=False)
make_date(df, 'timestamp')
df = add_datepart(df, 'timestamp')

In [None]:
def r_mse(pred,y): return round(math.sqrt(((pred-y)**2).mean()), 6)
def drop_unwanted_columns(df, required_columns):
  df = df[required_columns]
  return df

In [None]:
procs = [Categorify, FillMissing, Normalize]
max_day = df["timestampDayofyear"].max()
minCond = (df.timestampDayofyear>(max_day - train_start_days_past)) # day 117 is when I fixed date recording on completed_trades
maxCond = (df.timestampDayofyear<=(max_day - validation_lookback))
train_idx = np.where((maxCond) & (minCond))[0]
valid_idx = np.where(~maxCond)[0]

df = drop_unwanted_columns(df, wanted_columns)
df = df.sort_index(axis=1)

splits = (list(train_idx),list(valid_idx))
cont,cat = cont_cat_split(df, max_card=9000, dep_var=dep_var)

In [None]:
to = TabularPandas(df, procs, cat, cont, y_names=dep_var, splits=splits)
save_pickle(path/'preprocessed_train_valid.pkl', to)
to = load_pickle(path/'preprocessed_train_valid.pkl')

In [None]:
dls = to.dataloaders(1024)
xs,y = to.train.xs,to.train.y
min_y = y.min() - (y_range_mod * y.min())
max_y = y.max() + (y_range_mod * y.max())
learn = tabular_learner(dls, y_range=(min_y, max_y), layers=layer_sizes,
                        n_out=1, loss_func=F.mse_loss)
valley, steep = learn.lr_find(suggest_funcs=(valley, steep))

In [None]:
learn.fit_one_cycle(num_epochs, steep)

In [None]:
valid_xs,valid_y = to.valid.xs,to.valid.y

preds, targs = learn.get_preds()
rmse = r_mse(preds, targs)
rmse, rmse / valid_y.mean()

In [None]:
if model_can_be_dumped:
    learn.export(f'/home/tristan/Documents/dev/ge_bot/prediction_api/{model_version}.pkl')
    print("dumped new model")
else:
    print("did not dump new model")