In [1]:
import os
import sys
sys.path.append('/home/souravc83/trading_ideas')
%load_ext autoreload
%autoreload 2

In [2]:
from src.stock import Stock, Holding, Universe
from src.backtest import BackTest
from src.linreg_strategy import LinRegStrategy
from src.factor import (
    LinRegFactor, 
    linreg_stock, 
    MovingAverageFactor,
    PercReturnFactor
)
from src.read_write import ReadData, check_valid_symbol

  from pandas.util.testing import assert_frame_equal


In [3]:
import numpy as np
import pandas as pd
from pandas_datareader import data as pdr
import yfinance as yf


In [4]:
# read sp500
valid_sp_500_filename = '/home/souravc83/trading_ideas/src/data/sp500_valid.csv'
df = pd.read_csv(valid_sp_500_filename)
df.head()

Unnamed: 0,symbol,name,sector
0,MMM,3M Company,Industrials
1,AOS,A.O. Smith Corp,Industrials
2,ABT,Abbott Laboratories,Health Care
3,ABBV,AbbVie Inc.,Health Care
4,ABMD,ABIOMED Inc,Health Care


In [11]:
# read Vanguard and munge
vanguard_raw_filename = '/home/souravc83/trading_ideas/src/data/vanguard_list_raw.csv'
v_df = pd.read_csv(vanguard_raw_filename)
v_df.describe()

Unnamed: 0,symbol,sector
count,74,74
unique,74,18
top,VXF,International
freq,1,15


In [16]:
#no nulls
#v_df[v_df.isnull().any(axis=1)]

In [19]:
#join together with sp500 list and make one list
v_df['name'] = 'Vanguard ETF'

all_df = pd.merge(df, v_df, how='outer', on=['symbol', 'name', 'sector'])
all_fname = '/home/souravc83/trading_ideas/src/data/all_company_list.csv'
all_df.to_csv(path_or_buf=all_fname, index=False, header=True)

In [18]:
all_df.describe()

Unnamed: 0,symbol,name,sector
count,571,571,571
unique,571,498,29
top,CME,Vanguard ETF,Information Technology
freq,1,74,71


In [20]:
# Now add the historical data of the Vanguard companies to the file

In [8]:
def prep_df_join(df: pd.DataFrame, symbol: str) -> pd.DataFrame:
    df['symbol'] = symbol
    df['date'] = df.index
    return df

def make_big_dataframe(symbol_list, start_date='2019-12-02', end_date='2019-12-06'):
    df_list = []
    for symbol in symbol_list:
        try:
            A = ReadData(symbol)
            df = A.get_data(start_date=start_date, end_date=end_date)
            df = prep_df_join(df, symbol)
            #print(df)
            df_list.append(df)
        except:
            pass
    #print(df_list)
    big_df = pd.concat(df_list, ignore_index=True)
    return big_df

def store_all_data(start_date: str = '2015-01-02' , end_date: str = '2020-06-21'):
    valid_sp_500_filename = '/home/souravc83/trading_ideas/src/data/sp500_valid.csv'
    offline_filename = '/home/souravc83/trading_ideas/src/data/offline_price_data.csv'
    vanguard_raw_filename = '/home/souravc83/trading_ideas/src/data/vanguard_list_raw.csv'

    df = pd.read_csv(vanguard_raw_filename)
    symbol_list = list(df['symbol'].values)
    
    big_df = make_big_dataframe(symbol_list, start_date, end_date)
    old_df = pd.read_csv(offline_filename)
    new_df = pd.concat([big_df, old_df])
    new_df.to_csv(path_or_buf=offline_filename, index=False, header=True)
    
def append_new_data(start_date: str, end_date: str):
    valid_symbol_filename = '/home/souravc83/trading_ideas/src/data/all_company_list.csv'
    offline_filename = '/home/souravc83/trading_ideas/src/data/offline_price_data.csv'

    df = pd.read_csv(valid_symbol_filename)
    symbol_list = list(df['symbol'].values)
    
    big_df = make_big_dataframe(symbol_list, start_date, end_date)
    old_df = pd.read_csv(offline_filename)
    new_df = pd.concat([big_df, old_df])
    new_df.to_csv(path_or_buf=offline_filename, index=False, header=True)
    
    

In [23]:
store_all_data()

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%********

In [4]:
#test out date
A = ReadData('VOO')
df = A.get_data(start_date='2020-06-22', end_date='2020-06-25')

[*********************100%***********************]  1 of 1 completed


In [5]:
df

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2020-06-22,284.350006,287.190002,283.26001,286.859985,286.859985,2518000
2020-06-23,289.51001,290.429993,287.76001,288.160004,288.160004,2503600
2020-06-24,286.079987,286.700012,278.970001,280.790009,280.790009,4521700


In [9]:
append_new_data(start_date='2020-06-22', end_date='2020-06-25')

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%********