# Huge Stock Market Dataset
kaggle : https://www.kaggle.com/borismarjanovic/price-volume-data-for-all-us-stocks-etfs  
* XGBoost for stock trend & prices prediction  
https://www.kaggle.com/mtszkw/xgboost-for-stock-trend-prices-prediction

# Modules

In [56]:
import os
import numpy as np
import pandas as pd
import xgboost as xgb
import matplotlib.pyplot as plt
from xgboost import plot_importance, plot_tree
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split, GridSearchCV

# Time series decomposition
!pip install stldecompose
from stldecompose import decompose

# Chart drawing
import plotly as py
import plotly.io as pio
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot

# Mute sklearn warnings
from warnings import simplefilter
simplefilter(action='ignore', category=FutureWarning)
simplefilter(action='ignore', category=DeprecationWarning)

# Show charts when running kernel
init_notebook_mode(connected=True)

# Change default background color for all visualizations
layout=go.Layout(paper_bgcolor='rgba(0,0,0,0)', plot_bgcolor='rgba(250,250,250,0.8)')
fig = go.Figure(layout=layout)
templated_fig = pio.to_templated(fig)
pio.templates['my_template'] = templated_fig.layout.template
pio.templates.default = 'my_template'

Collecting stldecompose
  Downloading stldecompose-0.0.5-py2.py3-none-any.whl (6.9 kB)
Installing collected packages: stldecompose
Successfully installed stldecompose-0.0.5


ImportError: cannot import name '_maybe_get_pandas_wrapper_freq' from 'statsmodels.tsa.filters._utils' (C:\ProgramData\Anaconda3\lib\site-packages\statsmodels\tsa\filters\_utils.py)

Chainer : 일본에서 만들어진 딥러닝 프레임 워크  
https://tensorflow.blog/tag/chainer/

# load data and set

In [43]:
data = pd.read_csv('./data/data/Stocks/goog.us.txt')

In [44]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 916 entries, 0 to 915
Data columns (total 7 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   Date     916 non-null    object 
 1   Open     916 non-null    float64
 2   High     916 non-null    float64
 3   Low      916 non-null    float64
 4   Close    916 non-null    float64
 5   Volume   916 non-null    int64  
 6   OpenInt  916 non-null    int64  
dtypes: float64(4), int64(2), object(1)
memory usage: 50.2+ KB


In [45]:
data["Date"] = pd.to_datetime(data.Date)
data = data.set_index("Date")

In [46]:
print(data.index.min(),data.index.max())
data.head()

2014-03-27 00:00:00 2017-11-10 00:00:00


Unnamed: 0_level_0,Open,High,Low,Close,Volume,OpenInt
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2014-03-27,568.0,568.0,552.92,558.46,13052,0
2014-03-28,561.2,566.43,558.67,559.99,41003,0
2014-03-31,566.89,567.0,556.93,556.97,10772,0
2014-04-01,558.71,568.45,558.71,567.16,7932,0
2014-04-02,599.99,604.83,562.19,567.0,146697,0


# Train set & Test set 생성

In [52]:
date_split = "2016-01-01"
train = data[:date_split]
test = data[date_split:]
print(f"number of rows in train set : {len(train)}[{len(train)/len(data):.3f}%]")
print(f"number of rows in test set : {len(test)}[{len(test)/len(data):.3f}%]")

number of rows in train set : 446[0.487%]
number of rows in test set : 470[0.513%]


## trainset과 testset 시각화

In [48]:
def plot_train_test(train, test, date_split):
    data = [
        Candlestick(x = train.index,
                    open = train.Open,
                    high = train.High,
                    low = train.Low,
                    close = train.Close,
                    name = 'train'),
        Candlestick(x = test.index,
                    open = test.Open,
                    high = test.High,
                    low = test.Low,
                    close = test.Close,
                    name = 'test')
    ]
    layout = {
        'shapes' : [
            {
                'x0' : date_split, 'x1' : date_split, 'y0' : 0, 'y1' : 1, 'xref' : 'x', 'yref' : 'paper',
                'line' : {'color' : 'rgb(0,0,0)', 'width' : 1}
            }
        ],
        'annotations': [
            {'x': date_split, 'y': 1.0, 'xref': 'x', 'yref': 'paper', 'showarrow': False, 'xanchor': 'left', 'text': ' test data'},
            {'x': date_split, 'y': 1.0, 'xref': 'x', 'yref': 'paper', 'showarrow': False, 'xanchor': 'right', 'text': 'train data '}
        ]
    }
    figure = Figure(data = data, layout = layout)
    iplot(figure)

layout  
https://plotly.com/python/reference/layout/shapes/#layout-shapes-items-shape-type

In [49]:
plot_train_test(train = train, test = test, date_split=date_split)

In [53]:
class Environment1:
    
    def __init__(self, data, history_t=90):
        self.data = data
        self.history_t = history_t
        self.reset()

    def reset(self):
        self.t = 0
        self.done = False
        self.profits = 0
        self.positions = []
        self.position_value = 0
        self.history = [0 for v in range(self.history_t)]
        
    def step(self, act):
        reward = 0
        
        #act = 0 : stay, 1 : buy, 2: sell
        if act ==1:
            self.positions.append(self.data.iloc[self.t,:]["Close"])
        elif act == 2:
            if len(self.positions) ==0:
                reward = -1
            else :
                profits = 0
                for p in self.positions:
                    profits += (self.data.iloc[self.t,:]['Close']-p)
                    

In [55]:
train

Unnamed: 0_level_0,Open,High,Low,Close,Volume,OpenInt
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2014-03-27,568.00,568.00,552.92,558.46,13052,0
2014-03-28,561.20,566.43,558.67,559.99,41003,0
2014-03-31,566.89,567.00,556.93,556.97,10772,0
2014-04-01,558.71,568.45,558.71,567.16,7932,0
2014-04-02,599.99,604.83,562.19,567.00,146697,0
...,...,...,...,...,...,...
2015-12-24,749.55,751.35,746.62,748.40,527223,0
2015-12-28,752.92,762.99,749.52,762.51,1515716,0
2015-12-29,766.69,779.98,766.43,776.60,1765012,0
2015-12-30,776.60,777.60,766.90,771.00,868174,0
