# Bar data

In [6]:
from ib_insync import *
util.startLoop()  

ib = IB()
ib.client.setConnectOptions('+PACEAPI')
ib.connect('127.0.0.1', 7497, clientId=17)

<IB connected to 127.0.0.1:7497 clientId=17>

In [7]:
import numpy as np
import yahoo_fin.stock_info as si
import requests
import pandas as pd
import datetime as dt
import os
import os.path
from dateutil.relativedelta import relativedelta

In [10]:
def GetInputs(inputs_path):
    df = pd.read_csv(inputs_path, header= None)
    df_dict = dict(zip(df[0], df[1]))
    df_dict['symbols'] = df_dict['symbols'].split("|")
    return df_dict


def CreateFolderIfFolderDoesntExist(symbol, data_path, symbol_kind):
    folder_path = data_path + "/" + symbol_kind + "/" + symbol
    folder_exist = os.path.exists(folder_path)
    if not folder_exist:
        os.mkdir(folder_path)
        print("created folder for ", folder_path)


def GetDurationOfDataNeeded(file_path):
    df = pd.read_csv(file_path)
    df['date_time'] = pd.to_datetime(df['date'])
    most_recent_date = df['date_time'].max()
    now = dt.datetime.now()
    return (now - most_recent_date)

def GetTimeIncrement(interval_time):
    if interval_time == "10 mins":
        return dt.timedelta(seconds = 600)
    elif interval_time == "1 hour":
        return dt.timedelta(seconds = 3600)
    elif interval_time == "1 day":
        return dt.timedelta(days = 1)


def UpdatePriceFiles(symbol, data_path, symbol_kind, interval_time_list):
    contract = Stock(symbol, 'SMART', 'USD')
    ib.qualifyContracts(contract)
    for interval_time in interval_time_list:
        time_increment = GetTimeIncrement(interval_time)
        interval_time_string = interval_time.replace(" ", "_")
        file_path = data_path + "/" + symbol_kind + "/" + symbol + "/" + interval_time_string + ".csv"
        file_exist = os.path.exists(file_path)
        duration_time = '365 D'
        if file_exist:
            time_since_last_data_stored = GetDurationOfDataNeeded(file_path)
            if time_since_last_data_stored > time_increment:
                duration_time = str(time_since_last_data_stored.days + 1) + " D"
                bars = ib.reqHistoricalData(
                        contract,
                        endDateTime='',
                        durationStr=duration_time,
                        barSizeSetting=interval_time,
                        whatToShow='TRADES',
                        useRTH=True,
                        formatDate=1)
                existing_data_df = pd.read_csv(file_path)
                existing_data_df = existing_data_df.drop(existing_data_df.filter(regex='Unnamed').columns, axis=1)
                new_data_df = util.df(bars)
                merged_df = pd.concat([existing_data_df,new_data_df]).drop_duplicates().reset_index(drop=True)
                merged_df = merged_df.drop_duplicates(subset='date', keep="last")
                merged_df = merged_df.dropna()
                merged_df.to_csv(file_path)
                print("Added data to ", symbol, interval_time)
        else:
            bars = ib.reqHistoricalData(
                    contract,
                    endDateTime='',
                    durationStr=duration_time,
                    barSizeSetting=interval_time,
                    whatToShow='TRADES',
                    useRTH=True,
                    formatDate=1)
            df = util.df(bars)
            df.to_csv(file_path)
            print("Created new file for ", symbol, interval_time)
    return

def UpdateEarningsFile(symbol, data_path, symbol_kind):
    file_path = data_path + "/" + symbol_kind + "/" + symbol + "/earnings.csv"
    file_exist = os.path.exists(file_path)
    if file_exist:
        filetime = dt.datetime.fromtimestamp(os.path.getmtime(file_path))
        if filetime.date() == dt.date.today():
            return

    else:
        earnings_dict = si.get_earnings_history(symbol)
        df_earnings = pd.DataFrame(earnings_dict)
        earnings_dates_list = pd.to_datetime(df_earnings.startdatetime).dt.date.to_list()
        earnings_dates_list = [earning_date for earning_date in earnings_dates_list if earning_date <= dt.date.today()]
        df = pd.DataFrame({"date" :earnings_dates_list})
        df.to_csv(file_path)
        print("Updated file for earnings for", symbol)
    return

def UpdateOccFiles(symbol, data_path, symbol_kind):
    file_path = data_path + "/" + symbol_kind + "/" + symbol + "/occ.csv"
    file_exist = os.path.exists(file_path)
    if file_exist:
        filetime = dt.datetime.fromtimestamp(os.path.getmtime(file_path))
        if filetime.date() == dt.date.today():
            return
    else:
        tod = dt.date.today()
        url = 'https://marketdata.theocc.com/series-search?symbolType=U&symbol='+symbol
        r = requests.get(url)
        li = r.text.splitlines()
        list_opt = []
        for x in range(7,len(li)-1):
            lin=li[x]
            myli=lin.split()
            k = float(myli[4] + "." + myli[5])
            exp = dt.date(int(myli[1]),int(myli[2]),int(myli[3]))
            if exp>=tod and exp <= tod + relativedelta(months=4) and (k*10)%5==0:
                list_opt.append( {'exp': exp.strftime('%Y%m%d'), 'k': str(k)} )
        df_opt = pd.DataFrame(list_opt)
        df_opt = df_opt.groupby('exp').filter(lambda x: len(x)>8)
        df_opt.to_csv(file_path)
        print("Updated file for occ for", symbol)
    return

def UpdateData(symbol, data_path, symbol_kind, interval_time_list):
    CreateFolderIfFolderDoesntExist(symbol, data_path, symbol_kind)
    UpdatePriceFiles(symbol, data_path, symbol_kind, interval_time_list)
    UpdateEarningsFile(symbol, data_path, symbol_kind)
    UpdateOccFiles(symbol, data_path, symbol_kind)
    return






In [11]:
inputs_path = "./../../Inputs/historical_data_inputs.csv"
data_path = "./../../HistoricalData"
interval_time_list = ['1 day', '1 hour', '10 mins']
inputs_dict = GetInputs(inputs_path)
for symbol in inputs_dict['symbols']:
    UpdateData(symbol, data_path, "stocks", interval_time_list)

Added data to  AAPL 1 hour
Added data to  AAPL 10 mins
Updated file for occ for AAPL
Added data to  MSFT 1 day
Added data to  MSFT 1 hour
Added data to  MSFT 10 mins
Updated file for occ for MSFT
Added data to  AMZN 1 day
Added data to  AMZN 1 hour
Added data to  AMZN 10 mins
Updated file for occ for AMZN
Added data to  META 1 day
Added data to  META 1 hour
Added data to  META 10 mins
Updated file for occ for META
Added data to  TSLA 1 day
Added data to  TSLA 1 hour
Added data to  TSLA 10 mins
Updated file for occ for TSLA
Added data to  OXY 1 day
Added data to  OXY 1 hour
Added data to  OXY 10 mins
Updated file for occ for OXY


In [12]:
ib.disconnect()