# Libraries + Torch check

In [1]:
import pandas as pd
import requests
import json
from pathlib import Path
import re
from datetime import date
import time
import os
from fastai.tabular import *
import matplotlib.pyplot as plt
import numpy as np
from sklearn import linear_model
import sklearn.metrics
from sklearn.model_selection import train_test_split
import math

In [2]:
import torch

In [3]:
torch.cuda.device(0)

<torch.cuda.device at 0x7fba1279b790>

In [4]:
torch.cuda.get_device_name(0)

'GeForce RTX 2080'

# Data Retrieval

In [5]:
cleaned_item_names = [name[:-10] for name in list(os.walk(r'data/wiki_prices'))[0][2]]

In [6]:
cleaned_item_names[0:5]

['Potato%20seed', 'Tin%20ore', 'Gout%20tuber', 'Bronze%20bolts', 'Gold%20seal']

In [7]:
#pd.Series(cleaned_item_names).to_csv('./data/cleaned_item_names', index=False, header=False)

In [8]:
len(cleaned_item_names)

3147

In [9]:
# cleaned_item_names

In [10]:
def get_item_df(item_name, sleppy_time=1):
    base_url = r"https://oldschool.runescape.wiki/api.php?action=query&prop=revisions&rvprop=content&format=json&titles=Module%3AExchange%2F"
    postfix_url = r"%2FData"
    full_url = f'{base_url}{item_name}{postfix_url}'
    resp = requests.get(full_url)
    jsond = resp.json()
    try:
        regex_text = jsond['query']['pages'][next(iter(jsond['query']['pages'].keys()))]['revisions'][0]['*']
        #get all of the ones that have volume as well as price
        m = re.findall(r'[\d]+:[\d]+:[\d]+', regex_text)
        item_df = pd.DataFrame()
        item_df['dt'] = [int(string.split(':')[0]) for string in m]
        item_df[f'{item_name} price'] = [int(string.split(':')[1]) for string in m]
        item_df[f'{item_name} volume'] = [int(string.split(':')[2]) for string in m]
        item_df['date_obj'] = item_df['dt'].apply(lambda x: date.fromtimestamp(x))
        #take the ones that just have price so we can get the most recent price as well
        n = re.findall(r'[\d]+:[\d]+', regex_text)
        last_time = date.fromtimestamp(int(n[-1].split(':')[0]))
        last_price = int(n[-1].split(':')[1])
        item_df.loc[-1, 'date_obj'] = last_time
        item_df.loc[-1, f'{item_name} price'] = last_price
        item_df = item_df.fillna(method='ffill')
        item_df.set_index('date_obj', inplace=True)
        item_df = item_df.drop('dt', axis=1)
        return item_df
    except:
        pass



In [11]:
def retrieve_dfs(xslice = True):
    if xslice:
        item_dfs = [get_item_df(item_name,1) for item_name in cleaned_item_names[40:45]]
    else:
        item_dfs = [get_item_df(item_name,1) for item_name in cleaned_item_names]
    return item_dfs

In [12]:
item_dfs = retrieve_dfs(False)

In [13]:
item_dfs

[            Potato%20seed price  Potato%20seed volume
 date_obj                                             
 2018-12-07                  1.0              836492.0
 2018-12-09                  1.0              686407.0
 2018-12-10                  1.0              401364.0
 2018-12-11                  1.0              593491.0
 2018-12-12                  1.0              674040.0
 ...                         ...                   ...
 2020-08-02                  1.0             1062819.0
 2020-08-03                  1.0              973729.0
 2020-08-04                  1.0              931344.0
 2020-08-05                  1.0              675435.0
 2020-08-06                  1.0              675435.0
 
 [606 rows x 2 columns],
             Tin%20ore price  Tin%20ore volume
 date_obj                                     
 2018-12-09             52.0          776948.0
 2018-12-10             51.0          330517.0
 2018-12-11             50.0          604209.0
 2018-12-12            

In [14]:
def save_dfs(item_dfs):
    for df in item_dfs:
        try:
            df.to_csv(f'./data/wiki_prices/{df.columns[0]}.csv')
        except:
            pass

In [15]:
save_dfs(item_dfs)

In [16]:
def load_dfs(drop_blank=True):
    item_dfs = []
    for name in list(os.walk('./data/wiki_prices'))[0][2]:
        df = pd.read_csv(f'./data/wiki_prices/{name}')
        df = df.set_index(df.columns[0])
#         if drop_blank:
#             if df.empty:
#                 item_dfs.append(df)
#         else:
#             item_dfs.append(df)
        item_dfs.append(df)
    return item_dfs

In [17]:
item_dfs = load_dfs()

In [18]:
len(item_dfs)

3147

In [19]:
type(item_dfs)

list

In [20]:
cleaned_item_names[0]

'Potato%20seed'

In [21]:
for item_df in item_dfs:
    try:
        col = item_df.columns[0]
    except:
        invalid_dfs += 1 
        print(item_df)

In [22]:
retrieved_names = [item_df.columns[0][:-6] for item_df in item_dfs]

In [23]:
len(retrieved_names)

3147

In [24]:
len(item_dfs)

3147

In [25]:
df = pd.DataFrame({'name': retrieved_names, 'price_df': item_dfs})

# Feature creation

In [26]:
#item_df_training = tl_df['price_df'][300]

In [27]:
days_back = 90

rolling_averages = [3,7,14,21,30,60,90]

days_ahead_to_predict = 4

In [28]:
def create_predictive_features(price_df):
    item_df_training = price_df
    item_name = item_df_training.columns[0]
    for i in range(days_back):
        item_df_training[f'{item_name} {i} days ago'] = item_df_training[item_name].shift(i)
    for i in range(days_back):
        item_df_training[f'{item_name} {i} delta days ago'] = item_df_training[item_name].shift(i) - item_df_training[item_name]
    for rolling_average in rolling_averages:
        item_df_training[f'{item_name} {rolling_average} day rolling average'] = item_df_training[item_name].rolling(rolling_average).mean()
    for rolling_average in rolling_averages:
        item_df_training[f'{item_name} delta {rolling_average} day rolling average'] = item_df_training[item_name] - item_df_training[f'{item_name} {rolling_average} day rolling average']
    item_df_training['Y'] = item_df_training[item_name].shift(-1*days_ahead_to_predict)
    return item_df_training

        

In [29]:
#price_df = tl_df['price_df'][300]

In [30]:
def trim_df(item_df_training):
    trimmed_df = item_df_training.dropna(how='any')
    return trimmed_df
    

# SGD LR

In [31]:
def create_model(tl_df_row):
    trimmed_df = tl_df_row['trimmed_df']
    try:
        X = trimmed_df.drop(['Y'], axis=1)
        y = trimmed_df['Y']
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, shuffle=False)
        regr = linear_model.LinearRegression()
        regr.fit(X_train, y_train)
        y_pred = regr.predict(X_test)
        mean_squared_error = sklearn.metrics.mean_squared_error(y_test, y_pred)
        r2_score = sklearn.metrics.r2_score(y_test, y_pred)
        return (regr, y_pred, y_test, mean_squared_error, r2_score)
    except:
        return (0, 0, 0, 0, 0)

In [32]:
#plt.scatter(y_test, y_pred)

# "Backtest"

In [33]:
starting_cash = 10**7

In [34]:
slippage = 0
thresh_buy = 0.00
thresh_sell = 0
max_volume_buy = 0.01

In [35]:
def should_buy(row):
    if row['perc predicted increase'] > thresh_buy:
        return True

def should_sell(row):
    if row['perc predicted increase'] < thresh_sell:
        return True
    
def buy_price(row):
    return row['current_price']

def sell_price(row):
    return row['current_price']

def max_buy(row):
    d_stock = max(
        min(
        (row['starting_cash']//buy_price(row)),
        math.floor(row['volume']*max_volume_buy)
        ),
        0
    )
    d_cash = -1 * d_stock * buy_price(row)
    return (d_stock, d_cash)

def max_sell(row):
    d_stock = -1 * max(
        0,
        min(row['starting_stock'],
        math.floor(row['volume'] * max_volume_buy)
        )
    )
    d_cash = d_stock *-1 * sell_price(row)
    return (d_stock, d_cash)

In [36]:
def get_ending_cash(record_df):
    last_row = record_df.iloc[-1,:]
    cash = last_row['ending_cash']
    cash += sell_price(last_row) * last_row['ending_stock']
    return cash

In [37]:
def create_backtest_df(tl_df_row):
    try:
        y_pred = tl_df_row['y_pred']
        y_test = tl_df_row['y_test']
        item_df_training = tl_df_row['item_df_training']
        item_name = tl_df_row['name']

        bt_df = pd.concat([pd.Series(y_pred, index= y_test.index, name='Prediction'), y_test.rename(index='Y')], axis=1)
        bt_df['Y + 1'] = bt_df['Y'].shift(-1)
        bt_df['Y - 1'] = bt_df['Y'].shift(1)


        bt_df = bt_df.fillna(method='ffill').fillna(method='bfill')

        volume_column = f'{item_name} volume'
        volume = item_df_training[volume_column]

        bt_df = bt_df.join(item_df_training[volume_column].rename(index='volume'))
        bt_df = bt_df.join(item_df_training[f'{item_name} price'].rename(index='current_price'))
        bt_df['perc predicted increase'] = (bt_df['Prediction'] - bt_df['current_price'])/bt_df['current_price']

        bt_df['starting_stock'] = 0
        bt_df['starting_cash'] = 0
        bt_df['ending_stock'] = 0
        bt_df['ending_cash'] = 0

        bt_df.loc[bt_df.index[0], 'starting_cash'] = starting_cash
        bt_df.loc[bt_df.index[0], 'ending_cash'] = starting_cash

        bt_df['action'] = ''

        copy_df = bt_df.copy()
        record_df = bt_df.copy()

        first_row = True
        for i, row in copy_df.iterrows():
            current_row = row
            if first_row:
                prev_row = current_row
                first_row = False
                continue
            current_row['starting_cash'] = prev_row['ending_cash']
            current_row['starting_stock'] = prev_row['ending_stock']
            cash = current_row['starting_cash']
            stock = current_row['starting_stock']
            if should_buy(current_row):
                d_stock, d_cash = max_buy(current_row)
                cash += d_cash
                stock += d_stock
                current_row['action'] = f'Buying + {d_stock} stock, - {d_cash} cash'
            elif should_sell(row):
                d_stock, d_cash = max_sell(current_row)
                cash += d_cash
                stock += d_stock
                current_row['action'] = f'Selling + {d_stock} stock, + {d_cash} cash'
            else:
                current_row['action'] = 'hold'
            current_row['ending_cash'] = cash
            current_row['ending_stock'] = stock
            record_df.loc[i, :] = current_row
            prev_row = current_row
        roi = get_ending_cash(record_df)/starting_cash
        last_perc_prediction = bt_df
        return (roi, record_df)
    except:
        return (0, 0)


# Predict for right now

In [38]:
def make_predictions(tl_df_row):
    try:
        item_name = tl_df_row['name']
        regr = tl_df_row['regr']
        item_df_training = tl_df_row['item_df_training']
        current_price = item_df_training.loc[item_df_training.index[-1], f'{item_name} price']
        pred_price = regr.predict(np.array(item_df_training.drop('Y', axis=1).loc[item_df_training.index[-1],:]).reshape(1, -1))[0]
        perc_pred_increase = (pred_price - current_price)/(current_price)
        last_day = item_df_training.index[-1]
        return (perc_pred_increase, last_day, current_price)
    except:
        return(0, 0, 0)

# Filter and find trades

In [39]:
r2_score_thresh = .85
roi_thresh = 1.05
perc_pred_increase_thresh = 0.03

In [40]:
batch_size = 100


In [41]:
df_list = []

In [42]:
bought = pd.read_csv('data/bought')


In [59]:
bought['name'].values

array(['Black%20shield%20(h1)', 'Avantoe%20seed', 'Iron%20ore', 'Black%20wizard%20hat%20(g)',
       'Bottomless%20compost%20bucket', 'Mole%20slippers', 'Wooden%20shield%20(g)', "Ahrim's%20robeskirt"],
      dtype=object)

In [None]:
tl_df['item_df_training']

In [None]:
for i in range(0,df.shape[0],batch_size):
    tl_df = df.iloc[i:i+100,:]
    
    tl_df['item_df_training'] = tl_df['price_df'].apply(create_predictive_features)

    tl_df['trimmed_df'] = tl_core'] = zip(*tl_df.apply(create_model, axis=1))
df['item_df_training'].apply(trim_df)

    tl_df['regr'], tl_df['y_pred'], tl_df['y_test'], tl_df['mean_squared_error'], tl_df['r2_score'] = zip(*tl_df.apply(create_model, axis=1))


    tl_df['roi'], tl_df['record_df'] = zip(*tl_df.apply(create_backtest_df, axis=1))


    tl_df.sort_values(by='roi', ascending = False)

    tl_df['perc_pred_increase'], tl_df['last_day'], tl_df['current_price'] = zip(*tl_df.apply(make_predictions, axis=1))


    filter_tl_df = tl_df.copy()

    filter_tl_df = filter_tl_df.dropna(how='any')

    filter_tl_df =filter_tl_df.loc[filter_tl_df['r2_score'].apply(lambda x: type(x)) != list]

    filter_tl_df = filter_tl_df.loc[filter_tl_df['r2_score'] > r2_score_thresh]

    print(filter_tl_df.shape)

    filter_tl_df = filter_tl_df.loc[filter_tl_df['roi'] > roi_thresh]

    print(filter_tl_df.shape)

    filter_tl_df = filter_tl_df.loc[
            (filter_tl_df['perc_pred_increase'] > perc_pred_increase_thresh) \
        | (filter_tl_df['name'].isin(bought['name']))
                                   ]

    print(filter_tl_df.shape)

    print(filter_tl_df)
    df_list.append(filter_tl_df)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
  return array(a, dtype, copy=False, order=order)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value 

(24, 14)
(3, 14)
(0, 14)
Empty DataFrame
Columns: [name, price_df, item_df_training, trimmed_df, regr, y_pred, y_test, mean_squared_error, r2_score, roi, record_df, perc_pred_increase, last_day, current_price]
Index: []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
  return array(a, dtype, copy=False, order=order)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value 

(26, 14)
(2, 14)
(1, 14)
                           name  \
162  Black%20wizard%20hat%20(g)   

                                              price_df  \
162              Black%20wizard%20hat%20(g) price  ...   

                                      item_df_training  \
162              Black%20wizard%20hat%20(g) price  ...   

                                            trimmed_df                regr  \
162              Black%20wizard%20hat%20(g) price  ...  LinearRegression()   

                                                y_pred  \
162  [379372.0437362031, 374438.4890487031, 355055....   

                                                y_test  mean_squared_error  \
162  date_obj
2020-04-10    364179.0
2020-04-11    ...        1.045895e+08   

     r2_score      roi                                          record_df  \
162  0.956981  1.22286                 Prediction         Y     Y + 1 ...   

     perc_pred_increase    last_day  current_price  
162            0.045882  2020-0

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
  return array(a, dtype, copy=False, order=order)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value 

(27, 14)
(1, 14)
(0, 14)
Empty DataFrame
Columns: [name, price_df, item_df_training, trimmed_df, regr, y_pred, y_test, mean_squared_error, r2_score, roi, record_df, perc_pred_increase, last_day, current_price]
Index: []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
  return array(a, dtype, copy=False, order=order)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value 

(25, 14)
(0, 14)
(0, 14)
Empty DataFrame
Columns: [name, price_df, item_df_training, trimmed_df, regr, y_pred, y_test, mean_squared_error, r2_score, roi, record_df, perc_pred_increase, last_day, current_price]
Index: []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
  return array(a, dtype, copy=False, order=order)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value 

(16, 14)
(1, 14)
(1, 14)
                      name                                           price_df  \
403  Black%20shield%20(h1)              Black%20shield%20(h1) price  Black...   

                                      item_df_training  \
403              Black%20shield%20(h1) price  Black...   

                                            trimmed_df                regr  \
403              Black%20shield%20(h1) price  Black...  LinearRegression()   

                                                y_pred  \
403  [2406.5847933266073, 2445.6079866859823, 2518....   

                                                y_test  mean_squared_error  \
403  date_obj
2020-04-10    2522.0
2020-04-11    25...       193299.285328   

     r2_score       roi                                          record_df  \
403  0.934113  1.086539               Prediction       Y   Y + 1   Y - ...   

     perc_pred_increase    last_day  current_price  
403            0.018047  2020-08-06         9198.0  


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
  return array(a, dtype, copy=False, order=order)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value 

(25, 14)
(5, 14)
(2, 14)
               name                                           price_df  \
537  Avantoe%20seed              Avantoe%20seed price  Avantoe%20se...   
566  Molten%20glass              Molten%20glass price  Molten%20gla...   

                                      item_df_training  \
537              Avantoe%20seed price  Avantoe%20se...   
566              Molten%20glass price  Molten%20gla...   

                                            trimmed_df                regr  \
537              Avantoe%20seed price  Avantoe%20se...  LinearRegression()   
566              Molten%20glass price  Molten%20gla...  LinearRegression()   

                                                y_pred  \
537  [903.9546602037217, 995.6339051988389, 1060.42...   
566  [151.23334600354846, 151.58675614458878, 149.2...   

                                                y_test  mean_squared_error  \
537  date_obj
2020-04-10    1017.0
2020-04-11    10...        17219.925381   
566  date_o

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
  return array(a, dtype, copy=False, order=order)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value 

(27, 14)
(1, 14)
(0, 14)
Empty DataFrame
Columns: [name, price_df, item_df_training, trimmed_df, regr, y_pred, y_test, mean_squared_error, r2_score, roi, record_df, perc_pred_increase, last_day, current_price]
Index: []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
  return array(a, dtype, copy=False, order=order)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value 

(27, 14)
(3, 14)
(0, 14)
Empty DataFrame
Columns: [name, price_df, item_df_training, trimmed_df, regr, y_pred, y_test, mean_squared_error, r2_score, roi, record_df, perc_pred_increase, last_day, current_price]
Index: []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
  return array(a, dtype, copy=False, order=order)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value 

(27, 14)
(4, 14)
(1, 14)
                      name                                           price_df  \
813  Black%20dragon%20mask              Black%20dragon%20mask price  Black...   

                                      item_df_training  \
813              Black%20dragon%20mask price  Black...   

                                            trimmed_df                regr  \
813              Black%20dragon%20mask price  Black...  LinearRegression()   

                                                y_pred  \
813  [14779.960978550194, 14565.765177768944, 14224...   

                                                y_test  mean_squared_error  \
813  date_obj
2020-04-10    13752.0
2020-04-11    1...       835969.557629   

     r2_score       roi                                          record_df  \
813  0.931644  1.079196                Prediction        Y    Y + 1    ...   

     perc_pred_increase    last_day  current_price  
813            0.050376  2020-08-06        28100.0  


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
  return array(a, dtype, copy=False, order=order)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value 

(28, 14)
(3, 14)
(1, 14)
                    name                                           price_df  \
916  Ahrim's%20robeskirt              Ahrim's%20robeskirt price  Ahrim's...   

                                      item_df_training  \
916              Ahrim's%20robeskirt price  Ahrim's...   

                                            trimmed_df                regr  \
916              Ahrim's%20robeskirt price  Ahrim's...  LinearRegression()   

                                                y_pred  \
916  [2209740.230924187, 2214525.230924187, 2302695...   

                                                y_test  mean_squared_error  \
916  date_obj
2020-04-10    2296793.0
2020-04-11   ...        1.280292e+10   

     r2_score       roi                                          record_df  \
916   0.85545  1.213372                Prediction          Y      Y + 1...   

     perc_pred_increase    last_day  current_price  
916            0.013726  2020-08-06      1707268.0  


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
  return array(a, dtype, copy=False, order=order)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value 

(25, 14)
(2, 14)
(0, 14)
Empty DataFrame
Columns: [name, price_df, item_df_training, trimmed_df, regr, y_pred, y_test, mean_squared_error, r2_score, roi, record_df, perc_pred_increase, last_day, current_price]
Index: []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
  return array(a, dtype, copy=False, order=order)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value 

(21, 14)
(1, 14)
(0, 14)
Empty DataFrame
Columns: [name, price_df, item_df_training, trimmed_df, regr, y_pred, y_test, mean_squared_error, r2_score, roi, record_df, perc_pred_increase, last_day, current_price]
Index: []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
  return array(a, dtype, copy=False, order=order)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value 

(25, 14)
(0, 14)
(0, 14)
Empty DataFrame
Columns: [name, price_df, item_df_training, trimmed_df, regr, y_pred, y_test, mean_squared_error, r2_score, roi, record_df, perc_pred_increase, last_day, current_price]
Index: []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
  return array(a, dtype, copy=False, order=order)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value 

(34, 14)
(2, 14)
(0, 14)
Empty DataFrame
Columns: [name, price_df, item_df_training, trimmed_df, regr, y_pred, y_test, mean_squared_error, r2_score, roi, record_df, perc_pred_increase, last_day, current_price]
Index: []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
  return array(a, dtype, copy=False, order=order)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value 

(28, 14)
(0, 14)
(0, 14)
Empty DataFrame
Columns: [name, price_df, item_df_training, trimmed_df, regr, y_pred, y_test, mean_squared_error, r2_score, roi, record_df, perc_pred_increase, last_day, current_price]
Index: []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
  return array(a, dtype, copy=False, order=order)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value 

(34, 14)
(4, 14)
(0, 14)
Empty DataFrame
Columns: [name, price_df, item_df_training, trimmed_df, regr, y_pred, y_test, mean_squared_error, r2_score, roi, record_df, perc_pred_increase, last_day, current_price]
Index: []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
  return array(a, dtype, copy=False, order=order)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value 

(26, 14)
(7, 14)
(2, 14)
                       name  \
1627  Wooden%20shield%20(g)   
1679                   Logs   

                                               price_df  \
1627              Wooden%20shield%20(g) price  Woode...   
1679              Logs price  Logs volume  Logs pric...   

                                       item_df_training  \
1627              Wooden%20shield%20(g) price  Woode...   
1679              Logs price  Logs volume  Logs pric...   

                                             trimmed_df                regr  \
1627              Wooden%20shield%20(g) price  Woode...  LinearRegression()   
1679              Logs price  Logs volume  Logs pric...  LinearRegression()   

                                                 y_pred  \
1627  [76851.75533658994, 73501.19283658994, 76714.3...   
1679  [34.0231566269284, 34.6637318204956, 33.649976...   

                                                 y_test  mean_squared_error  \
1627  date_obj
2020-04-07    8

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
  return array(a, dtype, copy=False, order=order)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value 

(27, 14)
(1, 14)
(0, 14)
Empty DataFrame
Columns: [name, price_df, item_df_training, trimmed_df, regr, y_pred, y_test, mean_squared_error, r2_score, roi, record_df, perc_pred_increase, last_day, current_price]
Index: []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
  return array(a, dtype, copy=False, order=order)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value 

(25, 14)
(4, 14)
(1, 14)
            name                                           price_df  \
1868  Iron%20ore              Iron%20ore price  Iron%20ore volum...   

                                       item_df_training  \
1868              Iron%20ore price  Iron%20ore volum...   

                                             trimmed_df                regr  \
1868              Iron%20ore price  Iron%20ore volum...  LinearRegression()   

                                                 y_pred  \
1868  [43.61558479355354, 45.38732281106253, 41.8277...   

                                                 y_test  mean_squared_error  \
1868  date_obj
2020-02-25    43.0
2020-02-26    43.0...           119.40746   

      r2_score       roi                                          record_df  \
1868  0.930788  3.374128              Prediction     Y  Y + 1  Y - 1    ...   

      perc_pred_increase    last_day  current_price  
1868            0.026681  2020-08-06           78.0  


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
  return array(a, dtype, copy=False, order=order)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value 

(31, 14)
(8, 14)
(2, 14)
                 name                                           price_df  \
1903  Team%20cape%20x              Team%20cape%20x price  Team%20cape...   
1974    Raw%20lobster              Raw%20lobster price  Raw%20lobster...   

                                       item_df_training  \
1903              Team%20cape%20x price  Team%20cape...   
1974              Raw%20lobster price  Raw%20lobster...   

                                             trimmed_df                regr  \
1903              Team%20cape%20x price  Team%20cape...  LinearRegression()   
1974              Raw%20lobster price  Raw%20lobster...  LinearRegression()   

                                                 y_pred  \
1903  [1709621.8247604927, 1659467.3247604927, 16482...   
1974  [116.07391029436896, 113.52144485828204, 109.3...   

                                                 y_test  mean_squared_error  \
1903  date_obj
2020-04-21    1641159.0
2020-04-22   ...        3.910958e+

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
  return array(a, dtype, copy=False, order=order)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value 

(27, 14)
(2, 14)
(0, 14)
Empty DataFrame
Columns: [name, price_df, item_df_training, trimmed_df, regr, y_pred, y_test, mean_squared_error, r2_score, roi, record_df, perc_pred_increase, last_day, current_price]
Index: []


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
  return array(a, dtype, copy=False, order=order)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value 

(27, 14)
(4, 14)
(1, 14)
            name                                           price_df  \
2158  Red%20bead              Red%20bead price  Red%20bead volum...   

                                       item_df_training  \
2158              Red%20bead price  Red%20bead volum...   

                                             trimmed_df                regr  \
2158              Red%20bead price  Red%20bead volum...  LinearRegression()   

                                                 y_pred  \
2158  [782.3452955083033, 777.6791578129908, 804.422...   

                                                 y_test  mean_squared_error  \
2158  date_obj
2020-04-24     810.0
2020-04-25     8...         19451.37449   

      r2_score       roi                                          record_df  \
2158  0.856609  1.346445               Prediction       Y   Y + 1   Y - ...   

      perc_pred_increase    last_day  current_price  
2158            0.040103  2020-08-06         2567.0  


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
  return array(a, dtype, copy=False, order=order)


In [None]:
filter_tl_df

In [None]:
df_list

In [None]:
trading_df = pd.concat(df_list, axis=0).sort_values(by='perc_pred_increase', ascending=False)
trading_df.columns

In [None]:
trading_df['pred_price'] = (trading_df['perc_pred_increase'] + 1) * trading_df['current_price']

In [49]:
#trading_df.loc[:,['name', 'mean_squared_error', 'r2_score', 'roi', 'perc_pred_increase','last_day', 'current_price']]

In [50]:
to_buy = trading_df.loc[~trading_df['name'].isin(bought['name'])].loc[:,['name', 'mean_squared_error', 'r2_score', 'roi', 'perc_pred_increase','last_day', 'current_price', 'pred_price']]
to_buy

Unnamed: 0,name,mean_squared_error,r2_score,roi,perc_pred_increase,last_day,current_price,pred_price
813,Black%20dragon%20mask,835969.6,0.931644,1.079196,0.050376,2020-08-06,28100.0,29515.56
2616,Frog%20slippers,22214610.0,0.960047,1.078733,0.043556,2020-08-06,130210.0,135881.4
566,Molten%20glass,39.31459,0.875348,1.072711,0.043016,2020-08-06,116.0,120.9899
2895,Dragon%20harpoon,658199300.0,0.856366,1.332873,0.040301,2020-08-06,474534.0,493658.2
2158,Red%20bead,19451.37,0.856609,1.346445,0.040103,2020-08-06,2567.0,2669.945
1974,Raw%20lobster,186.9886,0.870432,1.66684,0.03713,2020-08-06,215.0,222.9829
2373,Dragon%20boots%20ornament%20kit,13402570.0,0.880747,1.060218,0.036207,2020-08-06,51870.0,53748.04
1903,Team%20cape%20x,3910958000.0,0.91063,1.364283,0.031985,2020-08-06,2070510.0,2136735.0
1679,Logs,134.8888,0.88786,1.792911,0.030923,2020-08-06,54.0,55.66985


In [51]:
to_sell = trading_df.loc[trading_df['name'].isin(bought['name'])].loc[:,['name', 'mean_squared_error', 'r2_score', 'roi', 'perc_pred_increase','last_day', 'current_price', 'pred_price']]
to_sell

Unnamed: 0,name,mean_squared_error,r2_score,roi,perc_pred_increase,last_day,current_price,pred_price
537,Avantoe%20seed,17219.93,0.888172,1.077821,0.089764,2020-08-05,482.0,525.26629
162,Black%20wizard%20hat%20(g),104589500.0,0.956981,1.22286,0.045882,2020-08-06,278412.0,291186.168736
1868,Iron%20ore,119.4075,0.930788,3.374128,0.026681,2020-08-06,78.0,80.081084
3020,Mole%20slippers,336539700.0,0.968196,2.013312,0.026402,2020-08-06,493025.0,506042.075941
403,Black%20shield%20(h1),193299.3,0.934113,1.086539,0.018047,2020-08-06,9198.0,9363.994217
1627,Wooden%20shield%20(g),8101713.0,0.909752,1.090331,0.016458,2020-08-06,85373.0,86778.067837
2469,Bottomless%20compost%20bucket,1370366000.0,0.938613,1.206505,-0.045255,2020-08-06,308859.0,294881.514264



# Update with actual trades

In [55]:
bought.loc[11] = "Ahrim's%20robeskirt"

In [56]:
bought

Unnamed: 0,name
0,Black%20shield%20(h1)
1,Avantoe%20seed
2,Iron%20ore
3,Black%20wizard%20hat%20(g)
4,Bottomless%20compost%20bucket
5,Mole%20slippers
6,Wooden%20shield%20(g)
10,Ahrim's%20robeskirt


In [289]:
currently_buying = to_buy['name'][0:2]
currently_buying


2499           Rune%20arrow
916     Ahrim's%20robeskirt
Name: name, dtype: object

In [173]:
bought.merge(currently_buying, how='outer')

Unnamed: 0,name
0,Black%20shield%20(h1)
1,Avantoe%20seed
2,Iron%20ore
3,Black%20wizard%20hat%20(g)
4,Bottomless%20compost%20bucket
5,Mole%20slippers
6,Wooden%20shield%20(g)


In [174]:
#bought = bought.merge(currently_buying, how='outer')

In [154]:
#bought = bought.drop([0,2,3])

In [161]:
#bought.loc[-1] = 'Bottomless%20compost%20bucket'

In [175]:
bought

Unnamed: 0,name
0,Black%20shield%20(h1)
1,Avantoe%20seed
2,Iron%20ore
3,Black%20wizard%20hat%20(g)
4,Bottomless%20compost%20bucket
5,Mole%20slippers
6,Wooden%20shield%20(g)


In [57]:
#bought.merge(currently_buying)
bought.to_csv('data/bought', index=False)

In [58]:
pd.read_csv('data/bought')

Unnamed: 0,name
0,Black%20shield%20(h1)
1,Avantoe%20seed
2,Iron%20ore
3,Black%20wizard%20hat%20(g)
4,Bottomless%20compost%20bucket
5,Mole%20slippers
6,Wooden%20shield%20(g)
7,Ahrim's%20robeskirt


In [None]:
trading_

In [None]:
trading_df.loc[:,['name', 'mean_squared_error', 'r2_score', 'roi', 'perc_pred_increase','last_day']].to_csv('data/trading_8.2.csv')

# fastai tabular data

In [None]:
procs = [Normalize]

In [None]:
valid_idx = range(int(len(df)*.8), len(df)-60)

In [None]:
dep_var = 'Y'

In [None]:
path = Path(f'./data/wiki_urls/{item_name}')

In [None]:
trimmed_df['date_obj'] = trimmed_df.index

In [None]:
trimmed_df['date_obj'] = trimmed_df['date_obj'].apply(lambda x: pd.to_datetime(x))

In [None]:
time_trimmed_df = trimmed_df.loc[trimmed_df['date_obj'] > pd.to_datetime('2018-05-01')]

In [None]:
time_trimmed_df[item_name].plot()

In [None]:
time_trimmed_df = time_trimmed_df.drop(columns=['date_obj'], axis=1)

In [None]:
trimmed_df = time_trimmed_df

In [None]:
df = trimmed_df

In [None]:
df

In [None]:
data = TabularDataBunch.from_df(path, df, dep_var, valid_idx=valid_idx, procs=procs, cat_names=[])

In [None]:
learn = tabular_learner(data, layers=[500,100], emb_szs={'native-country': 10}, metrics=mean_squared_error)

In [None]:
data.show_batch()

In [None]:
learn.save('stage-0')

In [None]:
learn.lr_find(end_lr=1e3)

In [None]:
learn.recorder.plot()

In [None]:
learn.fit_one_cycle(5, max_lr=1e-2)

In [None]:
learn.fit_one_cycle(5, max_lr=1e-2)

In [None]:
learn.show_results(ds_type=DatasetType.Train)

In [None]:
learn.save('stage-1')

In [None]:
learn = learn.load('stage-1')

In [None]:
learn.lr_find()

In [None]:
learn.recorder.plot()

In [None]:
learn.fit_one_cycle(5, 3e-5)

In [None]:
learn.show_results(ds_type=DatasetType.Train)

In [None]:
learn.save('stage-2')

In [None]:
learn.lr_find()

In [None]:
learn.recorder.plot()

In [None]:
learn.fit_one_cycle(5, 3e-6)

In [None]:
learn.save('stage-3')


In [None]:
learn.show_results(ds_type=DatasetType.Train)

In [None]:
learn.lr_find()
learn.recorder.plot()

In [None]:
learn.fit_one_cycle(5, 3e-4)

In [None]:
learn.save('stage-4')

In [None]:
learn.lr_find(end_lr=1e2)
learn.recorder.plot()

In [None]:
learn.show_results(ds_type=DatasetType.Train)

In [None]:
learn.fit_one_cycle(10, 1e-4)

In [None]:
learn = learn.load('stage-4')

In [None]:
learn.show_results(ds_type=DatasetType.Train)

# Plot results

In [None]:
int(learn.predict(df.iloc[0])[1])

In [None]:
learn.predict(df.iloc[0])

In [None]:
plot_x = [float(learn.predict(df.iloc[i])[1]) for i in range(len(df))]

In [None]:
plot_y = df['Y']

In [None]:
plot_y

In [None]:
plot_x

In [None]:
plot_df = pd.DataFrame(plot_y)

In [None]:
plot_df['x'] = plot_x

In [None]:
plot_df.head()

In [None]:
plot_df.plot()

In [None]:
plt.plot(plot_x)

In [None]:
plt.plot(plot_y)

In [None]:
plt.plot(range(len(plot_x)),np.array([plot_x,plot_y]).T)

In [None]:
len(plot_x)

In [None]:
plt.plot(range(len(plot_x[len(plot_x)-60:])), np.array([plot_x[len(plot_x)-60:], plot_y[len(plot_y)-60:]]).T)
plt.legend()

In [None]:
plt.plot(plot_x[len(plot_x)-60:])

In [None]:
len(plot_x)

In [None]:
plt.plot(plot_y[len(plot_y)-60:])

In [None]:
x_series = pd.Series(plot_x, name='Predict')

In [None]:
type(x_series)

In [None]:
type(plot_y)

In [None]:
y_series = plot_y.reindex(x_series.index)

In [None]:
y_series

In [None]:
x_series

In [None]:
pd.concat([y_series, x_series],  axis=1).iloc[1705-60:].plot()

In [None]:
learn.drop

In [None]:
learn.save('stage-final')
learn.export()

# Load in the full data (recent rows will have been dropped because Ys were N/A)

In [None]:
prediction_df = item_df_training

In [None]:
prediction_df['constant'] = time_trimmed_df['Y'].mean() *scaling_factor

In [None]:
prediction_df.columns

In [None]:
prediction_df = prediction_df.fillna(method='ffill').fillna(method='bfill').fillna(0)

In [None]:
predict_data = TabularDataBunch.from_df(path, prediction_df, dep_var, valid_idx=valid_idx, procs=procs, cat_names=[])

In [None]:
predict_learn = tabular_learner(data, layers=[500,100], emb_szs={'native-country': 10}, metrics=mean_squared_error)

In [None]:
predict_learn =predict_learn.load('stage-final')

In [None]:
predict_learn.show_results(ds_type=DatasetType.Train)

In [None]:
predict_learn.predict(prediction_df.iloc[0])

In [None]:
predict_learn.predict(prediction_df.iloc[100])

In [None]:
prediction_df

In [None]:
predictions = [float(predict_learn.predict(prediction_df.iloc[i])[1]) for i in range(len(prediction_df))]


In [None]:
predictions

In [None]:
prediction_df.tail()

In [None]:
prediction_df['predictions'] = predictions

In [None]:
prediction_df['predictions'].tail(30)

In [None]:
prediction_df

#### 