In [69]:
from alpaca_trade_api.rest import REST, TimeFrame
from config import  STOCKS, SHARE_TABLE_NAME
from creds import ALPACA_API_KEY, ALPACA_SECRET_KEY, HOST, DB, USER, PW, SCHEMA
from db_connector import engine
import pandas as pd
import json
from forex_python.converter import CurrencyRates
import psycopg2
api = REST(key_id=ALPACA_API_KEY, secret_key=ALPACA_SECRET_KEY)
import time

In [2]:
conn = psycopg2.connect(host=f"{HOST}",
                        database=f"{DB}", 
                        user=f"{USER}", 
                        password=f"{PW}",
                        options=f"-c search_path={SCHEMA}")
conn

<connection object at 0x000001E5ADFCDE10; dsn: 'user=Crypto-Analytics password=xxx dbname=postgres host=35.228.211.69 options='-c search_path=cryptolytics_staging'', closed: 0>

In [11]:
df_exist = pd.read_sql(f"select * from \"{SHARE_TABLE_NAME}\"", engine.connect())
df_exist.sort_values(by="starttime", ascending=False)

Unnamed: 0,starttime,symbol,open,close,high,low,basevolume,tradecount,market_cap
97561,2021-12-15 21:00:00,MSFT,295.446105,295.446105,295.446105,295.446105,100,1,2.179635e+12
97558,2021-12-15 20:59:00,GOOG,2607.834468,2606.861791,2607.958263,2604.854541,1643,60,1.701763e+12
97566,2021-12-15 20:59:00,BABA,108.232381,108.303121,108.320806,108.152799,10407,90,3.034282e+11
97565,2021-12-15 20:59:00,MSFT,295.954549,295.472632,296.392254,295.472632,28824,322,2.179830e+12
97564,2021-12-15 20:59:00,TSLA,862.459103,862.781855,863.462729,862.277832,8164,181,8.511745e+11
...,...,...,...,...,...,...,...,...,...
39029,2021-10-01 08:00:00,TSLA,668.460804,667.497179,668.460804,667.497179,3567,173,6.744255e+11
39030,2021-10-01 08:00:00,MSFT,243.345777,243.163469,243.936106,243.120063,6209,129,1.821078e+12
39031,2021-10-01 08:00:00,AAPL,122.163382,122.198107,122.666898,122.102613,8999,235,1.994977e+12
39032,2021-10-01 08:00:00,BABA,127.580519,127.415574,127.580519,126.946784,1430,44,3.579289e+11


In [4]:
def get_historical_data_from(symbol, from_date, to_date):
    df = api.get_bars(symbol=[symbol], timeframe=TimeFrame.Minute, start=from_date, end=to_date, adjustment='raw').df
    df.reset_index(inplace=True)
    df = df.loc[:, ['timestamp', 'symbol', 'open', 'close', 'high', 'low', 'volume', 'trade_count',]]
    df.columns = ['starttime', 'symbol', 'open', 'close', 'high', 'low', 'basevolume', 'tradecount']
    
    symbol = df.loc[0, "symbol"]
    df["market_cap"] = 0
    # Data Preperation
    df.open = df.open.astype(float)
    df.close = df.close.astype(float)
    df.high = df.high.astype(float)
    df.low = df.low.astype(float)
    df.market_cap = df.market_cap.astype(float)
    df.starttime = pd.to_datetime(df.starttime).dt.tz_localize(None)

    # Convert to Euros
    conversion_rate = CurrencyRates().get_rate('USD', 'EUR')
    df.loc[:, ["open", "high", "low", "close", "market_cap"]] *= conversion_rate
    
    return df.sort_values(by="starttime", ascending=False)

In [22]:
df_market_cap = df_exist.copy()
df_market_cap = df_market_cap.loc[(df_market_cap.starttime >= "2021-12-08 14:00:00") & (df_market_cap.starttime <= "2021-12-08 14:53:00"), :]
df_market_cap = df_market_cap.sort_values(by="starttime", ascending=False).drop_duplicates(subset="symbol", keep="first")
df_market_cap

Unnamed: 0,starttime,symbol,open,close,high,low,basevolume,tradecount,market_cap
92086,2021-12-08 14:52:00,TSLA,929.855188,930.17946,930.17946,929.326581,2158,46,936698000000.0
92084,2021-12-08 14:52:00,TSM,107.818053,107.675906,107.818053,107.644812,505,9,563204400000.0
92085,2021-12-08 14:52:00,PYPL,172.157072,172.103767,172.157072,171.926084,1338,20,196017400000.0
92096,2021-12-08 14:51:00,AAPL,154.086709,153.820185,154.086709,153.784648,4523,53,2432554000000.0
92080,2021-12-08 14:51:00,BABA,110.38113,110.412225,110.501066,110.38113,5703,88,306475100000.0
92081,2021-12-08 14:51:00,MSFT,297.148188,297.148188,297.148188,297.148188,273,6,2172765000000.0
92082,2021-12-08 14:51:00,GOOG,2638.646055,2638.646055,2638.646055,2638.646055,139,3,1747348000000.0
92091,2021-12-08 14:50:00,NFLX,557.418266,556.991827,557.418266,556.991827,588,12,244523900000.0
92092,2021-12-08 14:50:00,AMZN,3133.728678,3136.398365,3136.398365,3133.728678,801,19,1591293000000.0


In [23]:
def add_market_cap(df, symbol):
    # sort that first value in db (time wise) is first value in df
    df = df.sort_values(by="starttime", ascending=False)
    # calculate pct_change of close price
    df["pct_change"] = df["close"].pct_change(periods=1)
    df["pct_change"] += 1
    df.iloc[0, 9] = 1
    # for easier interpolation pre calculate cumulative product of pct_change
    df["base_change"] = df.loc[:,'pct_change'].cumprod()
    # get market cap to interpoilate from
    market_cap = df_market_cap.loc[df_market_cap.symbol == symbol, "market_cap"].values[0]
    # interpolate market cap
    df.market_cap = market_cap * df.base_change
    # drop not needed columns and rows
    df = df.drop(["pct_change", "base_change"], axis=1)
    df = df.iloc[1:]

    return df

In [29]:
def get_historical_data_all(): 
    first = True
    for stock in STOCKS:
        symbol = stock
        from_date = "2021-12-08"
        to_date = "2021-12-14"
        df = get_historical_data_from(symbol=symbol, from_date=from_date, to_date=to_date)
        df = add_market_cap(df, symbol)
        if first:
            df_all = df
            first = False
        else:
            df_all = df_all.append(df)
    
    return df_all.sort_values(by="starttime", ascending=False)

In [30]:
df = get_historical_data_all()
df

Unnamed: 0,starttime,symbol,open,close,high,low,basevolume,tradecount,market_cap
3449,2021-12-15 00:58:00,TSLA,846.208489,846.208489,846.208489,845.942106,939,34,9.363640e+11
4295,2021-12-15 00:58:00,AAPL,154.865921,154.865921,154.883680,154.865921,651,14,2.432136e+12
3273,2021-12-15 00:58:00,BABA,112.102646,112.102646,112.102646,112.102646,596,9,3.051458e+11
3448,2021-12-15 00:57:00,TSLA,846.652460,846.164092,846.652460,845.764518,4452,82,9.363149e+11
4294,2021-12-15 00:57:00,AAPL,154.848162,154.857041,154.857041,154.848162,1607,16,2.431997e+12
...,...,...,...,...,...,...,...,...,...
0,2021-12-08 09:00:00,BABA,109.740721,109.492097,109.767359,109.305630,3083,107,2.980398e+11
0,2021-12-08 09:00:00,AAPL,153.489611,153.498490,153.498490,153.152193,4702,138,2.410661e+12
0,2021-12-08 09:00:00,TSLA,931.894868,923.459421,931.894868,923.459421,7680,381,1.021845e+12
0,2021-12-08 09:00:00,MSFT,298.792399,298.357308,298.792399,298.357308,737,26,2.219128e+12


In [32]:
df = df.loc[(df.starttime >= "2021-12-08 14:53:00") & (df.starttime < "2021-12-14 13:00:00"), :]
df

Unnamed: 0,starttime,symbol,open,close,high,low,basevolume,tradecount,market_cap
2846,2021-12-14 12:59:00,TSLA,843.100693,843.145090,844.343811,843.100693,4571,125,9.329742e+11
3613,2021-12-14 12:59:00,AAPL,157.147931,157.121293,157.156811,157.094655,14724,117,2.467556e+12
2176,2021-12-14 12:59:00,PYPL,163.514473,163.514473,163.514473,163.514473,309,9,1.927723e+11
2440,2021-12-14 12:59:00,MSFT,299.147576,299.183094,299.183094,299.147576,807,27,2.225270e+12
2845,2021-12-14 12:58:00,TSLA,842.834310,842.257148,842.834310,841.875333,3384,264,9.319917e+11
...,...,...,...,...,...,...,...,...,...
60,2021-12-08 14:53:00,NFLX,556.490854,555.016871,556.774996,554.075653,32185,772,2.547878e+11
311,2021-12-08 14:53:00,AAPL,153.738235,153.418487,153.818150,153.391937,547920,4193,2.409404e+12
232,2021-12-08 14:53:00,BABA,110.393358,109.998224,110.406677,109.971586,74573,766,2.994175e+11
46,2021-12-08 14:53:00,AMZN,3130.585242,3125.794708,3133.204582,3125.794708,6819,598,1.656352e+12


In [34]:
df.groupby("symbol").agg({
    "starttime": ["min", "max", "count"],
    "market_cap": ["first", "last"]
})

Unnamed: 0_level_0,starttime,starttime,starttime,market_cap,market_cap
Unnamed: 0_level_1,min,max,count,first,last
symbol,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
AAPL,2021-12-08 14:53:00,2021-12-14 12:59:00,3303,2467556000000.0,2409404000000.0
AMZN,2021-12-08 14:53:00,2021-12-14 12:31:00,1675,1588480000000.0,1656352000000.0
BABA,2021-12-08 14:53:00,2021-12-14 12:57:00,2479,290643800000.0,299417500000.0
GOOG,2021-12-08 14:53:00,2021-12-14 10:14:00,1479,1751578000000.0,1784167000000.0
MSFT,2021-12-08 14:53:00,2021-12-14 12:59:00,2255,2225270000000.0,2199050000000.0
NFLX,2021-12-08 14:53:00,2021-12-14 12:19:00,1610,245184300000.0,254787800000.0
PYPL,2021-12-08 14:53:00,2021-12-14 12:59:00,2058,192772300000.0,202638600000.0
TSLA,2021-12-08 14:53:00,2021-12-14 12:59:00,2628,932974200000.0,1030182000000.0
TSM,2021-12-08 14:53:00,2021-12-14 12:57:00,1770,561224100000.0,584697000000.0


In [35]:
# df.to_sql(SHARE_TABLE_NAME, engine, if_exists='append', index=False)

In [140]:
sleep_amount = 3600 - time.time() % 3600
sleep_amount

3583.2059450149536