# Stock NeurIPS2018 Part 1: Data

This is the first part of the NeurIPS2018 series, introducing how to use FinRL to fetch and process data that we need for ML/RL trading.

In [1]:
# TODO: find a way to make this work from the root directory without changing the working directory
import os
import sys
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "..\..")))

In [2]:
print(sys.path)

['C:\\Users\\gupta\\AppData\\Local\\Programs\\Python\\Python310\\python310.zip', 'C:\\Users\\gupta\\AppData\\Local\\Programs\\Python\\Python310\\DLLs', 'C:\\Users\\gupta\\AppData\\Local\\Programs\\Python\\Python310\\lib', 'C:\\Users\\gupta\\AppData\\Local\\Programs\\Python\\Python310', 'c:\\Users\\gupta\\Desktop\\Folders\\Others\\Learning\\Finance\\FinRL_clone\\venv', '', 'c:\\Users\\gupta\\Desktop\\Folders\\Others\\Learning\\Finance\\FinRL_clone\\venv\\lib\\site-packages', 'c:\\Users\\gupta\\Desktop\\Folders\\Others\\Learning\\Finance\\FinRL_clone\\venv\\lib\\site-packages\\win32', 'c:\\Users\\gupta\\Desktop\\Folders\\Others\\Learning\\Finance\\FinRL_clone\\venv\\lib\\site-packages\\win32\\lib', 'c:\\Users\\gupta\\Desktop\\Folders\\Others\\Learning\\Finance\\FinRL_clone\\venv\\lib\\site-packages\\Pythonwin', 'c:\\Users\\gupta\\Desktop\\Folders\\Others\\Learning\\Finance\\FinRL_clone']


## Part 1. Install Packages

In [3]:
import datetime
import itertools
import numpy as np
import pandas as pd
import yfinance as yf

from finrl import index_tickers
from finrl.config import INDICATORS 
from finrl.meta.preprocessor.yahoodownloader import YahooDownloader
from finrl.meta.preprocessor.preprocessors import FeatureEngineer, data_split

## Part 2. Fetch Data

### Data for chosen tickers

In [4]:
index_tickers.NIFTY_50_TICKER

['ADANIPORTS.NS',
 'ASIANPAINT.NS',
 'AXISBANK.NS',
 'BAJAJ-AUTO.NS',
 'BAJFINANCE.NS',
 'BAJAJFINSV.NS',
 'BPCL.NS',
 'BHARTIARTL.NS',
 'BRITANNIA.NS',
 'CIPLA.NS',
 'COALINDIA.NS',
 'DIVISLAB.NS',
 'DRREDDY.NS']

In [5]:
TRAIN_START_DATE = '2009-01-01'
TRAIN_END_DATE = '2020-07-01'
TRADE_START_DATE = '2020-07-01'
TRADE_END_DATE = '2021-10-29'

In [6]:
df_raw = YahooDownloader(start_date=TRAIN_START_DATE,
                         end_date=TRADE_END_DATE,
                         ticker_list=index_tickers.NIFTY_50_TICKER).fetch_data()

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


Shape of the Dataframe:  (40626, 8)


In [7]:
df_raw.head()

Unnamed: 0,date,open,high,low,close,volume,tick,day
0,2009-01-02,67.300003,68.940002,65.800003,60.439636,845745,ADANIPORTS.NS,4
1,2009-01-02,89.910004,90.5,88.510002,78.483749,45390,ASIANPAINT.NS,4
2,2009-01-02,104.400002,109.5,103.459999,97.175278,14261265,AXISBANK.NS,4
3,2009-01-02,204.699997,210.0,199.0,139.212692,228864,BAJAJ-AUTO.NS,4
4,2009-01-02,14.884314,15.522915,14.34396,15.228212,255480,BAJAJFINSV.NS,4


## Part 3. Preprocess Data

In [8]:
fe = FeatureEngineer(use_technical_indicator=True,
                     tech_indicator_list = INDICATORS,
                     use_vix=True,
                     use_turbulence=True,
                     user_defined_feature = False)

processed = fe.preprocess_data(df_raw)

Successfully added technical indicators


[*********************100%***********************]  1 of 1 completed


Shape of the Dataframe:  (3228, 8)
Successfully added vix
Successfully added turbulence index


In [9]:
list_ticker = processed["tick"].unique().tolist()
list_date = list(pd.date_range(processed['date'].min(), processed['date'].max()).astype(str))
combination = list(itertools.product(list_date, list_ticker))

processed_full = pd.DataFrame(combination, columns=["date", "tick"]).merge(processed, on=["date", "tick"], how="left")
processed_full = processed_full[processed_full["date"].isin(processed["date"])]
processed_full = processed_full.sort_values(["date", "tick"])

processed_full = processed_full.fillna(0)

In [10]:
processed_full.head()

Unnamed: 0,date,tick,open,high,low,close,volume,day,macd,boll_ub,boll_lb,rsi_30,cci_30,dx_30,close_30_sma,close_60_sma,vix,turbulence
0,2009-01-02,ADANIPORTS.NS,67.300003,68.940002,65.800003,60.439636,845745.0,4.0,0.0,81.098763,50.572997,100.0,66.666667,100.0,60.439636,60.439636,39.189999,0.0
1,2009-01-02,ASIANPAINT.NS,89.910004,90.5,88.510002,78.483749,45390.0,4.0,0.0,81.098763,50.572997,100.0,66.666667,100.0,78.483749,78.483749,39.189999,0.0
2,2009-01-02,AXISBANK.NS,104.400002,109.5,103.459999,97.175278,14261265.0,4.0,0.0,81.098763,50.572997,100.0,66.666667,100.0,97.175278,97.175278,39.189999,0.0
3,2009-01-02,BAJAJ-AUTO.NS,204.699997,210.0,199.0,139.212692,228864.0,4.0,0.0,81.098763,50.572997,100.0,66.666667,100.0,139.212692,139.212692,39.189999,0.0
4,2009-01-02,BAJAJFINSV.NS,14.884314,15.522915,14.34396,15.228212,255480.0,4.0,0.0,81.098763,50.572997,100.0,66.666667,100.0,15.228212,15.228212,39.189999,0.0


## Part 4. Save Processed Data

In [11]:
train = data_split(processed_full, TRAIN_START_DATE,TRAIN_END_DATE)
trade = data_split(processed_full, TRADE_START_DATE,TRADE_END_DATE)
print(len(train))
print(len(trade))

32952
3864


In [12]:
train.to_csv('train_data.csv')
trade.to_csv('trade_data.csv')