In [None]:
from datetime import datetime, date, timedelta
import json
import numpy as np
import pandas as pd
import simfin as sf
from simfin.names import *
from simfin.datasets import *
import matplotlib.pyplot as plt
import os

sf.set_api_key(os.environ['SIMFIN_API_KEY'])
sf.set_data_dir('~/simfin_data/')

pd.set_option('display.max_rows', None)
pd.set_option('display.expand_frame_repr', False)
pd.set_option('max_colwidth', None)
pd.set_option('display.float_format', lambda x: '%.3f' % x)

In [None]:
class Ticker:
    def __init__(self, ticker, variant='daily', refresh_days=1, refresh_days_shareprices=1,):
        super(Ticker).__init__()
        self.ticker = ticker
        self.hub = sf.StockHub(market='us',
                  tickers=[ticker],
                  refresh_days=refresh_days,
                  refresh_days_shareprices=refresh_days_shareprices)

        df_fin_signals = self.hub.fin_signals(variant=variant)
        df_growth_signals = self.hub.growth_signals(variant=variant)
        df_val_signals = self.hub.val_signals(variant=variant)
        df_prices = self.hub.load_shareprices(variant=variant)

        self.df_fin_signals = df_fin_signals
        self.df_growth_signals = df_growth_signals
        self.df_val_signals = df_val_signals
        self.df_prices = df_prices


        self.df = pd.concat([df_prices, df_fin_signals, df_growth_signals, df_val_signals], axis=1)
        self.df.reset_index(inplace=True)

        # drop useless columns
        self.df.drop('Ticker', inplace=True, axis=1)
        self.df.drop('SimFinId', inplace=True, axis=1)

        # convert date to integer
        self.df['Date'] = pd.to_datetime(self.df['Date']).apply(datetime.timestamp)

        # convert all values to float
        self.df = self.df.astype('float')

        self.drop_columns_if_nans_more_than()
        self.drop_rows_if_nans_more_than()
        self.df.fillna(0)

    def drop_columns_if_nans_more_than(self, thresh=0.5):
        columns_before = self.df.columns
        self.df.dropna(axis=1, thresh=thresh * self.df.shape[0], inplace=True)
        columns_after = self.df.columns
        diff = columns_before.difference(columns_after)

        print("Columns Dropped:\n", diff)
        print()
        return diff

    def drop_rows_if_nans_more_than(self, thresh=0.5):
        rows_before = self.df.copy()
        self.df.dropna(axis=0, thresh=thresh * self.df.shape[1], inplace=True)
        rows_after = self.df.copy()
        diff = rows_before.merge(rows_after,indicator = True, how='left').loc[lambda x : x['_merge']!='both']

        print("Rows Dropped:\n", diff)
        print()
        return diff

    def percentage_columns_null(self):
        print((self.df.isnull().sum() / len(self.df)).sort_values(ascending=False))

    def get_data_up_to(self, date):
        return self.df[self.df['Date'] <= datetime.fromisoformat(date).timestamp()]

    def get_open_price_on(self, date):
        return self.df[self.df['Date'] <= datetime.fromisoformat(date).timestamp()].tail(1)['Open'].values[0]

tsla = Ticker('TSLA')

# tsla.df.info()
# tsla.percentage_columns_null()
# tsla.df.head()

Dataset "us-income-ttm" on disk (0 days old).
- Loading from disk ... Done!
Dataset "us-balance-ttm" on disk (0 days old).
- Loading from disk ... Done!
Dataset "us-cashflow-ttm" on disk (0 days old).
- Loading from disk ... Done!
Dataset "us-shareprices-daily" on disk (0 days old).
- Loading from disk ... Done!
Cache-file 'fin_signals-b1050b9e.pickle' on disk (0 days old).
- Loading from disk ... Done!
Dataset "us-income-quarterly" on disk (0 days old).
- Loading from disk ... Done!
Dataset "us-balance-quarterly" on disk (0 days old).
- Loading from disk ... Done!
Dataset "us-cashflow-quarterly" on disk (0 days old).
- Loading from disk ... Done!
Cache-file 'growth_signals-b1050b9e.pickle' on disk (0 days old).
- Loading from disk ... Done!
Cache-file 'val_signals-67c6b2dc.pickle' on disk (0 days old).
- Loading from disk ... Done!
Columns Dropped:
 Index(['Dividend', 'Dividend Yield'], dtype='object')

Rows Dropped:
              Date   Open    Low   High  Close  Adj. Close        Vo

In [None]:
def daterange(start_date, end_date):
    for n in range(int((end_date - start_date).days)):
        yield start_date + timedelta(n)

start_date = date(2020, 1, 1)
end_date = date(2020, 1, 10)
for single_date in daterange(start_date, end_date):
    print(single_date, tsla.get_open_price_on(single_date.strftime("%Y-%m-%d")))

2020-01-01 27.0
2020-01-02 28.3
2020-01-03 29.37
2020-01-04 29.37
2020-01-05 29.37
2020-01-06 29.36
2020-01-07 30.76
2020-01-08 31.58
2020-01-09 33.14


In [None]:
start = date.fromtimestamp(tsla.df.iloc[0]['Date'])
start

datetime.date(2017, 3, 31)

In [None]:
end = date.fromtimestamp(tsla.df.iloc[-1]['Date'])
end

datetime.date(2021, 12, 31)

In [None]:
(end - start).days

1736