In [1]:
%load_ext autoreload

In [2]:
%autoreload 2

# An example of a Low Liquidity Trade

In [48]:
import requests
import s3fs
import pandas as pd
import re
import humps
import sqlalchemy
from datetime import timedelta, datetime
import pendulum

import yfinance as yf

from prefect import Client, Task, Flow, task, Parameter
from prefect.schedules import IntervalSchedule
from prefect.environments.storage import Docker

# Homemade
from extractMOCData import CONFIG as cfg
from extractMOCData.moc_data import TsxMocData

from normalize.ticker_symbols import TsxToYhoo
from addFeatures.daily import DailyData 

from mocIO.read_moc import get_moc_file_lst
#from extractMOCData import read_moc 

import etl_moc

In [4]:
# tsxMocData = TsxMocData()

# moc_df = tsxMocData.scrape_moc_data()

## Load Raw Data

In [5]:
moc_file_lst = get_moc_file_lst.run("tsx-moc")

In [6]:
data_flpth = f"s3://{moc_file_lst[-1]}"
data_flpth

's3://tsx-moc/moc_tsx_20200409.csv'

In [7]:
moc_df = pd.read_csv(data_flpth, parse_dates=["moc_date"], na_filter=False)
moc_df["Symbol"].fillna("NA", inplace=True)

In [8]:
moc_df[moc_df["Symbol"].isna()==True]

Unnamed: 0,Symbol,Imbalance Side,Imbalance Size,Imbalance Reference Price,moc_date


## Create a key table
Map TSX symbols to Yahoo Symbols

In [9]:
yhooMap =  TsxToYhoo()
moc_key_df = yhooMap.run(moc_df)
moc_key_df

Unnamed: 0,Symbol,Imbalance Side,Imbalance Size,Imbalance Reference Price,moc_date,yahoo_symbol
0,ABX,BUY,133560,31.210,2020-04-09,ABX.TO
1,ACO.X,BUY,5257,39.680,2020-04-09,ACO-X.TO
2,ADW.A,BUY,200,8.435,2020-04-09,ADW-A.TO
3,AEM,BUY,2033,68.045,2020-04-09,AEM.TO
4,AGI,BUY,67360,8.725,2020-04-09,AGI.TO
...,...,...,...,...,...,...
367,WPK,SELL,98,42.970,2020-04-09,WPK.TO
368,WPRT,SELL,14415,1.160,2020-04-09,WPRT.TO
369,WSP,SELL,7726,84.295,2020-04-09,WSP.TO
370,WTE,SELL,4170,14.580,2020-04-09,WTE.TO


## Add daily features
(from yahoo)

In [10]:
# Get EOD features
eod_df = etl_moc.get_eod_features.run(moc_key_df.head(6))

## Add indtraday features
(from yahoo)

In [46]:
intraday_df = etl_moc.get_1min_ohlc.run(moc_key_df.head(6))

[*********************100%***********************]  6 of 6 completed


In [47]:
intraday_df

Unnamed: 0_level_0,Unnamed: 1_level_0,adj_close,close,high,low,open,volume
moc_date,yahoo_symbol,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2020-04-09 09:30:00-04:00,ABX.TO,29.06,29.06,29.33,28.90,29.33,0.0
2020-04-09 09:30:00-04:00,ACO-X.TO,39.46,39.46,39.70,39.46,39.70,0.0
2020-04-09 09:30:00-04:00,ADW-A.TO,,,,,,
2020-04-09 09:30:00-04:00,AEM.TO,65.81,65.81,66.36,65.41,65.80,0.0
2020-04-09 09:30:00-04:00,AGI.TO,8.71,8.71,8.71,8.54,8.54,0.0
...,...,...,...,...,...,...,...
2020-04-09 15:59:00-04:00,ACO-X.TO,39.75,39.75,39.75,39.68,39.68,4127.0
2020-04-09 15:59:00-04:00,ADW-A.TO,8.44,8.44,8.44,8.44,8.44,400.0
2020-04-09 15:59:00-04:00,AEM.TO,68.60,68.60,68.71,68.49,68.66,15611.0
2020-04-09 15:59:00-04:00,AGI.TO,8.84,8.84,8.89,8.84,8.85,23741.0


In [39]:
df = df.stack(dropna=False).reset_index().rename(columns={'level_1':'yahoo_symbol'})
df 

Unnamed: 0,Datetime,yahoo_symbol,Adj Close,Close,High,Low,Open,Volume
0,2020-04-09 09:30:00-04:00,ABX.TO,29.06,29.06,29.33,28.90,29.33,0.0
1,2020-04-09 09:30:00-04:00,ACO-X.TO,39.46,39.46,39.70,39.46,39.70,0.0
2,2020-04-09 09:30:00-04:00,ADW-A.TO,,,,,,
3,2020-04-09 09:30:00-04:00,AEM.TO,65.81,65.81,66.36,65.41,65.80,0.0
4,2020-04-09 09:30:00-04:00,AGI.TO,8.71,8.71,8.71,8.54,8.54,0.0
...,...,...,...,...,...,...,...,...
2335,2020-04-09 15:59:00-04:00,ACO-X.TO,39.75,39.75,39.75,39.68,39.68,4127.0
2336,2020-04-09 15:59:00-04:00,ADW-A.TO,8.44,8.44,8.44,8.44,8.44,400.0
2337,2020-04-09 15:59:00-04:00,AEM.TO,68.60,68.60,68.71,68.49,68.66,15611.0
2338,2020-04-09 15:59:00-04:00,AGI.TO,8.84,8.84,8.89,8.84,8.85,23741.0


In [20]:
df = yf.download(
        moc_key_df.head(6)["yahoo_symbol"].tolist(), 
        start="2020-04-09", 
        end="2020-04-10",
        interval='1m'
)

[*********************100%***********************]  6 of 6 completed


## Prepare for dfs For db

In [12]:
eod_df.columns

Index(['symbol', 'imbalance_side', 'imbalance_size',
       'imbalance_reference_price', 'moc_date', 'yahoo_symbol', 'date', 'open',
       'high', 'low',
       ...
       'peg_ratio', 'last_cap_gain', 'short_percent_of_float',
       'shares_short_prior_month', 'category', 'five_year_average_return',
       'regular_market_price', 'logo_url', 'full_time_employees', 'fax'],
      dtype='object', length=137)

In [None]:
# Set db indices
eod_df = eod_df.set_index(["moc_date", "symbol"], verify_integrity=True)

In [None]:
mormalize_col_names("Imbalance Size")

In [None]:
import humps

In [None]:
stringcase.snakecase("Imbalance Size")

In [None]:
humps.decamelize("yahoo_symbol").replace(" ","")

In [None]:
eod_df.rename(columns=lambda col_nm: humps.decamelize(col_nm).replace(" ",""), inplace=True)

In [None]:
eod_df.columns

In [None]:
sym = yf.Ticker("AAV.TO")

In [None]:


eod_df.assign([sym.info])

In [None]:
info_df = pd.DataFrame([sym.info])
eod_df.join(info_df)

In [None]:
moc_key_df

In [None]:
ohlc_df

In [None]:
df.stack(level=0)

In [None]:
 df = yf.download(
        
        ['ABX.TO' 'AC.TO' 'ACB.TO' 'AD.TO' 'ADN.TO'], 
        start=st_dt.strftime('%Y-%m-%d'), 
        end=end_dt.strftime('%Y-%m-%d'),
        interval=interval
 )

In [None]:
for i in grpd_eod_dfs:
    print(i[1]["yahoo_symbol"])

In [None]:
data = yf.download(
    "SPY AAPL", 
    start="2020-04-06", 
    end="2017-04-30"
)

In [None]:
# Make daily moc data
daily_moc_df = moc_key_df.merge(
    ohlc_df,
    how="left",
    left_on=["moc_date", "yahoo_symbol"],
    right_on=["Date", "yahoo_symbol"],
    validate="one_to_one"
)

In [None]:
daily_moc_df = daily_moc_df.drop("Date", axis=1)
daily_moc_df

In [None]:
daily_moc_df.columns

In [None]:
clmn_name = 'Imbalance Side'
tmp_col = clmn_name.split(" ")
tmp_col[0].lower()

In [None]:
def norm_clm_names(clm_name):
    clm_name_lst = 

In [None]:
daily_moc_df.columns  = map(str.split(" ") ,daily_moc_df.columns) 

In [None]:
som

In [None]:
ohlc_df_lst = dailyData.add_ohlc(moc_key_df)

In [None]:
df_lst = [df for df in ohlc_df_lst]

In [None]:
ohlc_df = pd.concat(df_lst, axis=0).reset_index()

In [None]:
ohlc_df.head()

In [14]:
st_dt = moc_key_df.iloc[0]["moc_date"].strftime('%Y-%m-%d')


In [None]:
sym = yf.Ticker("AEM.TO")

In [None]:
sym.info["sector"]

In [None]:
sym.history(
    start="2020-04-02",
    end="2020-04-03",
    interval = "1m",
    auto_adjust = True
)
    

In [None]:
sym.ticker

In [None]:
def get_ohlc(row):
    sym = yf.Ticker(row["yahoo_symbol"])
    print(sym.info)
    st_dt = row["moc_date"]
    end_dt = st_dt + timedelta(days=1)
    
    df = sym.history(
            start=st_dt.strftime('%Y-%m-%d'), 
            end=end_dt.strftime('%Y-%m-%d'), 
            auto_adjust=True
        ).head(1)
    
    # Add symbolto ohlc
    df["yahoo_symbol"] = row["yahoo_symbol"]
    print(sym.info)
    #df["sector"] = sym.info["sector"]
    return df

ohlc_df_lst = moc_key_df.head(2).apply(get_ohlc, axis=1)

In [None]:
ohlc_df_lst[0].rea

In [None]:
st_dt = moc_key_df.iloc[0]["moc_date"].strftime('%Y-%m-%d')
end_dt = moc_key_df.iloc[0]["moc_date"] # + timedelta(days=1)
st_dt

In [None]:
sym_to_get_lst[:2]

In [None]:
data

In [None]:
return sym.history(start=st_dt).head(1)

In [None]:
def get_eod_data(row):
    sym = yf.Ticker(row["yahoo_tsx_symbol"])
    row["currency"] = sym.info["currency"]
    
appiled_df = df[""].apply(get_eod_data, axis=1)
df = pd.concat([moc_key_df, appiled_df], axis='columns')

In [None]:
sym =  moc_key_df.iloc[0]["Symbol"]
sym

In [None]:
pre_client = Client()
project_nm = "MOC"

In [None]:
try:
    proj_id = pre_client.create_project(project_name=project_nm)
except Exception as err:
    print(err.with_traceback)

In [None]:
scrape_tsxmoc_fl.storage = Docker(dockerfile="/home/ilivni/MOC/Dockerfile")
scrape_tsxmoc_fl.register(project_name=project_nm)

In [None]:
Mschedule = IntervalSchedule(
    start_date=datetime.utcnow() + timedelta(seconds=1),
    interval=timedelta(minutes=1),
)
schedule

In [None]:
moc_df["Symbol"] =moc_df["Symbol"].fillna("NA")

In [None]:
moc_df[moc_df["Symbol"].isnull()]

In [None]:
moc_df.to_json(orient="records")