# Data Prepare

This notebook is written to guide the data prepare process to download from yahoo finance api and generate data with indicator using C++ compiled executables

This notebook has run in windows system

In [None]:
import numpy as np
import pandas as pd
import shutil
pd.core.common.is_list_like = pd.api.types.is_list_like
from pandas_datareader import data as pdr
import datetime
## yahoo finance
import yfinance as yf
yf.pdr_override()
import talib as ta
import time
## used to executable command
import subprocess
import os

In [None]:
## ge the time line
start_date=datetime.datetime(2010, 1, 9)
end_date=datetime.datetime(2022, 1, 9)

In [None]:

## define a list of stocks
stock_list = [
    "^OEX", "^NDX", "^RUT", "^DJI", ## index
]  

## we are currently at ./notebook/
## we will write to data_with_indicator
## we will load the VS.txt file from same directory
outdir = './data_with_indicator/'
vs_file = './VS.txt'
## our exe file is in previous layer
exe_dir = '../exe/'

In [None]:
## create your out directory if not exist
if not os.path.exists(outdir):
    os.makedirs(outdir)

In [None]:
def get_yahoo_data(stock, outfile, start_date, end_date):

    ## get yahoo finance from start date to end date
    df = pdr.get_data_yahoo(stock, start=start_date, end=end_date)
    ## drop adjusted close
    df.drop(['Adj Close'], axis=1, inplace=True)
    print(stock)
    print(f'received data from yf for {stock} with {df.shape}')

    df = df.reset_index()
    ## change to date to datetime and format it
    df['Date'] = pd.to_datetime(df['Date'])
    df['Date'] = df['Date'].dt.strftime('%Y%m%d')
    ## write it down to csv file because C++ single exe. only takes in flat files with specific format
    df.to_csv(outfile, header=None, index=None, sep=' ', mode='a')

    return(df)

In [None]:


for stock in stock_list:
    ## remove the ^ in front of the stock name, otherwise leave the same
    ## stock index requires a ^ in the front for yahoo finance to scrape
    
    stock_name = stock[1:] if stock[0] == '^' else stock

    file_location = outdir + stock_name + '.txt'

    ## this function is scraping the data using yfinance, write it out to file_location, and return the df for future merge
    df = get_yahoo_data(stock, file_location)

    ## use popen to execute the single exe
    ## because the program will require us to key-input to kill the process
    ## we manually kill the program
    
    p=subprocess.Popen([exe_dir + 'single.exe', file_location, vs_file])
    time.sleep(4)
    p.kill()
    print('enter is input, executable finishing')

    ## single will create file '.\OUTVARS.TXT' 
    ## we move to <ticker_name>_singal.txt file
    ## loaded the file merge with our original data

    signal_location = outdir + stock_name + '_signal.txt'
    shutil.move('.\OUTVARS.TXT', signal_location)

    ## read signal file just created and update its dates
    indicator = pd.read_csv(signal_location ,delim_whitespace=True)
    indicator['Date'] = pd.to_datetime(indicator['Date'], format = '%Y%m%d')
    indicator['Date'] = indicator['Date'].dt.strftime('%Y%m%d')

    print('indicator file has shape', indicator.shape)

    df_result = df.merge(indicator, left_on = 'Date', right_on = 'Date', how = 'left')

    ## add two talib columns
    df_result['RSI_20_ta'] = ta.RSI(np.array(df_result['Open']), timeperiod = 20)
    df_result['RSI_25_ta'] = ta.RSI(np.array(df_result['Open']), timeperiod = 25)

    df_result.to_csv( outdir + stock_name + '_full.csv', index=False)
