In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
# pip install -r requirements.txt
# !pip install -e ./

import sys
!{sys.executable} -m pip install -e ./


Obtaining file:///home/ivelin/canswim
  Preparing metadata (setup.py) ... [?25ldone
Installing collected packages: canswim
  Attempting uninstall: canswim
    Found existing installation: canswim 0.0.1
    Uninstalling canswim-0.0.1:
      Successfully uninstalled canswim-0.0.1
  Running setup.py develop for canswim
Successfully installed canswim-0.0.1


## Setup third party service provider connections

In [3]:
import yfinance as yf
import pandas as pd
from pathlib import Path
import numpy as np

In [4]:
from dotenv import load_dotenv
import os

load_dotenv(override=True)

FMP_API_KEY=os.getenv("FMP_API_KEY")

print(f'FMP_API_KEY={FMP_API_KEY!= None}')

FMP_API_KEY=True


## Load list of IBD Growth Stocks

In [5]:
all_stocks_file = 'all_stocks.csv'

In [6]:
all_stock_set = set()
stock_files = [
    'IBD50.csv',
    'IBD250.csv',
    'ibdlive_picks.csv',
    'russell2000_iwm_holdings.csv',
    'sp500_ivv_holdings.csv',
    'nasdaq100_cndx_holdings.csv',
    all_stocks_file
    ]
for f in stock_files:
    fp = f'data/data-3rd-party/{f}'
    if Path(fp).is_file():
        stocks = pd.read_csv(fp)
        print(f'loaded {len(stocks)} symbols from {fp}')
        stock_set = set(stocks['Symbol'])
        print(f'{len(stock_set)} symbols in stock set')
        all_stock_set |= stock_set
        print(f'total symbols loaded: {len(all_stock_set)}')
    else:
        print(f'{fp} not found.')


loaded 50 symbols from data/data-3rd-party/IBD50.csv
50 symbols in stock set
total symbols loaded: 50
loaded 300 symbols from data/data-3rd-party/IBD250.csv
300 symbols in stock set
total symbols loaded: 309
loaded 434 symbols from data/data-3rd-party/ibdlive_picks.csv
292 symbols in stock set
total symbols loaded: 437
loaded 1974 symbols from data/data-3rd-party/russell2000_iwm_holdings.csv
1971 symbols in stock set
total symbols loaded: 2262
loaded 509 symbols from data/data-3rd-party/sp500_ivv_holdings.csv
509 symbols in stock set
total symbols loaded: 2649
loaded 110 symbols from data/data-3rd-party/nasdaq100_cndx_holdings.csv
109 symbols in stock set
total symbols loaded: 2661
loaded 2650 symbols from data/data-3rd-party/all_stocks.csv
2650 symbols in stock set
total symbols loaded: 2673


In [7]:
len(all_stock_set), all_stock_set

(2673,
 {'ASB',
  'CIM',
  'SEER',
  'JANX',
  'DOW',
  'GDYN',
  'LCNB',
  'ARTNA',
  'MNTK',
  'SITC',
  'AWR',
  'WERN',
  'IAS',
  'GEVO',
  'PKBK',
  'PAGS',
  'BTBT',
  'FSLR',
  'EYEN',
  'GTLB',
  'GEFB',
  'WRK',
  'TDY',
  'SONO',
  'MLP',
  'TARS',
  'SPT',
  'ATLC',
  'CXM',
  'CHRS',
  'IPI',
  'FLYW',
  'SBGI',
  'GSHD',
  'CCSI',
  'BITO',
  'IOT',
  'LNN',
  'TILE',
  'MTRN',
  'CLMB',
  'ECL',
  'OTTR',
  'DAL',
  'OKE',
  'AMT',
  'CUBI',
  'IMGN',
  'DORM',
  'MET',
  'PRAA',
  'MRK',
  'COFS',
  'VCEL',
  'WSR',
  'CVBF',
  'HQI',
  'DCPH',
  'DGICA',
  'COOK',
  'SKIN',
  'KALU',
  'REG',
  'ONL',
  'CDZI',
  'GDDY',
  'BKU',
  'AMN',
  'BBUC',
  'P5N994',
  'AVAH',
  'DZSI',
  'UNTY',
  'PEP',
  'TDUP',
  'FCEL',
  'ZURA',
  'JNJ',
  'ACRE',
  'LNTH',
  'SRRK',
  'MMYT',
  'VTOL',
  'DO',
  'DLTH',
  'NBN',
  'HRT',
  'NOG',
  'SUPN',
  'GLT',
  'CPSS',
  'MRTN',
  'ARWR',
  'MGNI',
  'TGH',
  'SAMG',
  'PETS',
  'TSE',
  'KMX',
  'BLK',
  'ANET',
  'POWW',
  'ULT

In [8]:
stocks_ticker_set = all_stock_set

In [9]:
growth_stocks_df = pd.DataFrame()
growth_stocks_df['Symbol'] = list(stocks_ticker_set)
growth_stocks_df = growth_stocks_df.set_index(['Symbol'])
growth_stocks_df.index = growth_stocks_df.index.drop_duplicates()
# drop known junk symbols from the data feed
junk = ['MSFUT', 'GEFB', 'METCV', 'SGAFT', 'NQH4', 'XTSLA', '-', 'PDLI', 'ADRO', 'ICSUAGD', 'BFB', 'GTXI', 'P5N994', 'LGFB', 'MLIFT', 'ESH4', 'LGFA', 'MOGA', 'PBRA', 'BRKB', 'RTYH4', '\xa0', 'CRDA']
growth_stocks_df.index = growth_stocks_df.index.drop(junk)
growth_stocks_df

ASB
CIM
SEER
JANX
DOW
...
MTW
SPCE
AMRX
QNST
GEO


In [10]:
growth_stocks_df.to_csv(f'data/data-3rd-party/{all_stocks_file}')

## Prepare broad market indicies

In [10]:
# Capture S&P500, NASDAQ100 and Russell 200 indexes and their equal weighted counter parts
# As well as VIX volatility index, DYX US Dollar index, TNX US 12 Weeks Treasury Yield, 5 Years Treasury Yield and 10 Year Treasuries Yield
broad_market_indicies = '^SPX ^SPXEW ^NDX ^NDXE ^RUT ^R2ESC ^VIX DX-Y.NYB ^IRX ^FVX ^TNX'

In [11]:
broad_market = yf.download(broad_market_indicies, period='max', group_by='tickers')
broad_market

[*********************100%%**********************]  11 of 11 completed


Unnamed: 0_level_0,^R2ESC,^R2ESC,^R2ESC,^R2ESC,^R2ESC,^R2ESC,^NDXE,^NDXE,^NDXE,^NDXE,...,^TNX,^TNX,^TNX,^TNX,^SPX,^SPX,^SPX,^SPX,^SPX,^SPX
Unnamed: 0_level_1,Open,High,Low,Close,Adj Close,Volume,Open,High,Low,Close,...,Low,Close,Adj Close,Volume,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
1927-12-30,,,,,,,,,,,...,,,,,17.660000,17.660000,17.660000,17.660000,17.660000,0.000000e+00
1928-01-03,,,,,,,,,,,...,,,,,17.760000,17.760000,17.760000,17.760000,17.760000,0.000000e+00
1928-01-04,,,,,,,,,,,...,,,,,17.719999,17.719999,17.719999,17.719999,17.719999,0.000000e+00
1928-01-05,,,,,,,,,,,...,,,,,17.549999,17.549999,17.549999,17.549999,17.549999,0.000000e+00
1928-01-06,,,,,,,,,,,...,,,,,17.660000,17.660000,17.660000,17.660000,17.660000,0.000000e+00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-01-29,,,,,,,7265.220215,7339.299805,7262.799805,7339.000000,...,4.089,4.091,4.091,0.0,4892.950195,4929.310059,4887.399902,4927.930176,4927.930176,3.525160e+09
2024-01-30,,,,,,,7300.240234,7312.680176,7285.680176,7294.640137,...,4.041,4.059,4.059,0.0,4925.890137,4931.089844,4916.270020,4924.970215,4924.970215,3.836130e+09
2024-01-31,,,,,,,7242.250000,7289.140137,7183.790039,7190.290039,...,3.944,3.967,3.967,0.0,4899.189941,4906.750000,4845.149902,4845.649902,4845.649902,4.696120e+09
2024-02-01,,,,,,,7214.520020,7276.729980,7179.700195,7274.620117,...,3.817,3.863,3.863,0.0,4861.109863,4906.970215,4853.520020,4906.189941,4906.189941,4.386090e+09


In [None]:
# broad_market.to_csv('data/broad_market.csv.bz2', index='Date')


In [12]:
bm_file = 'data/data-3rd-party/broad_market.parquet'
broad_market.to_parquet(bm_file)

In [13]:
bm = pd.read_parquet(bm_file)

In [14]:
bm

Unnamed: 0_level_0,^R2ESC,^R2ESC,^R2ESC,^R2ESC,^R2ESC,^R2ESC,^NDXE,^NDXE,^NDXE,^NDXE,...,^TNX,^TNX,^TNX,^TNX,^SPX,^SPX,^SPX,^SPX,^SPX,^SPX
Unnamed: 0_level_1,Open,High,Low,Close,Adj Close,Volume,Open,High,Low,Close,...,Low,Close,Adj Close,Volume,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
1927-12-30,,,,,,,,,,,...,,,,,17.660000,17.660000,17.660000,17.660000,17.660000,0.000000e+00
1928-01-03,,,,,,,,,,,...,,,,,17.760000,17.760000,17.760000,17.760000,17.760000,0.000000e+00
1928-01-04,,,,,,,,,,,...,,,,,17.719999,17.719999,17.719999,17.719999,17.719999,0.000000e+00
1928-01-05,,,,,,,,,,,...,,,,,17.549999,17.549999,17.549999,17.549999,17.549999,0.000000e+00
1928-01-06,,,,,,,,,,,...,,,,,17.660000,17.660000,17.660000,17.660000,17.660000,0.000000e+00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-01-29,,,,,,,7265.220215,7339.299805,7262.799805,7339.000000,...,4.089,4.091,4.091,0.0,4892.950195,4929.310059,4887.399902,4927.930176,4927.930176,3.525160e+09
2024-01-30,,,,,,,7300.240234,7312.680176,7285.680176,7294.640137,...,4.041,4.059,4.059,0.0,4925.890137,4931.089844,4916.270020,4924.970215,4924.970215,3.836130e+09
2024-01-31,,,,,,,7242.250000,7289.140137,7183.790039,7190.290039,...,3.944,3.967,3.967,0.0,4899.189941,4906.750000,4845.149902,4845.649902,4845.649902,4.696120e+09
2024-02-01,,,,,,,7214.520020,7276.729980,7179.700195,7274.620117,...,3.817,3.863,3.863,0.0,4861.109863,4906.970215,4853.520020,4906.189941,4906.189941,4.386090e+09


## Prepare Sector Indicies

In [1]:
sector_indicies = 'XLE ^SP500-15 ^SP500-20 ^SP500-25 ^SP500-30 ^SP500-35 ^SP500-40 ^SP500-45 ^SP500-50 ^SP500-55 ^SP500-60'

In [3]:
import yfinance as yf
sectors = yf.download(sector_indicies, period='max', group_by='tickers')
sectors

[*********************100%%**********************]  11 of 11 completed


Unnamed: 0_level_0,^SP500-25,^SP500-25,^SP500-25,^SP500-25,^SP500-25,^SP500-25,^SP500-45,^SP500-45,^SP500-45,^SP500-45,...,^SP500-35,^SP500-35,^SP500-35,^SP500-35,^SP500-40,^SP500-40,^SP500-40,^SP500-40,^SP500-40,^SP500-40
Unnamed: 0_level_1,Open,High,Low,Close,Adj Close,Volume,Open,High,Low,Close,...,Low,Close,Adj Close,Volume,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
1993-05-04,99.760002,99.760002,99.760002,99.760002,99.760002,0.0,76.230003,76.230003,76.230003,76.230003,...,90.959999,90.959999,90.959999,0.0,106.129997,106.129997,106.129997,106.129997,106.129997,0.0
1993-05-05,99.500000,99.500000,99.500000,99.500000,99.500000,0.0,76.889999,76.889999,76.889999,76.889999,...,91.760002,91.760002,91.760002,0.0,105.000000,105.000000,105.000000,105.000000,105.000000,0.0
1993-05-06,99.190002,99.190002,99.190002,99.190002,99.190002,0.0,76.419998,76.419998,76.419998,76.419998,...,90.550003,90.550003,90.550003,0.0,104.629997,104.629997,104.629997,104.629997,104.629997,0.0
1993-05-07,99.129997,99.129997,99.129997,99.129997,99.129997,0.0,77.089996,77.089996,77.089996,77.089996,...,90.120003,90.120003,90.120003,0.0,103.550003,103.550003,103.550003,103.550003,103.550003,0.0
1993-05-10,99.660004,99.660004,99.660004,99.660004,99.660004,0.0,77.029999,77.029999,77.029999,77.029999,...,90.230003,90.230003,90.230003,0.0,103.940002,103.940002,103.940002,103.940002,103.940002,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-02-06,1406.000000,1416.650024,1405.650024,1415.989990,1415.989990,437199900.0,3652.020020,3656.310059,3601.040039,3629.600098,...,1664.260010,1677.469971,1677.469971,268512500.0,643.739990,646.059998,642.289978,645.849976,645.849976,284950900.0
2024-02-07,1423.260010,1436.670044,1420.609985,1431.920044,1431.920044,488606300.0,3653.239990,3681.919922,3640.780029,3681.639893,...,1678.890015,1681.819946,1681.819946,227851800.0,648.280029,650.890015,644.419983,650.320007,650.320007,300426600.0
2024-02-08,1431.150024,1438.010010,1428.619995,1436.109985,1436.109985,380864100.0,3685.949951,3698.969971,3680.320068,3686.899902,...,1671.310059,1678.920044,1678.920044,210514200.0,647.570007,649.179993,643.400024,647.109985,647.109985,312500100.0
2024-02-09,1436.229980,1452.979980,1434.650024,1450.310059,1450.310059,395630000.0,3704.330078,3744.179932,3699.159912,3742.199951,...,1672.439941,1678.000000,1678.000000,198299800.0,646.590027,649.419983,645.039978,649.169983,649.169983,282978500.0


In [None]:
# sectors.to_csv('data/sectors.csv.bz2')

In [4]:
sectors_file = 'data/data-3rd-party/sectors.parquet'
sectors.to_parquet(sectors_file)

In [6]:
import pandas as pd
tmp_s = pd.read_parquet(sectors_file)
tmp_s

Unnamed: 0_level_0,^SP500-25,^SP500-25,^SP500-25,^SP500-25,^SP500-25,^SP500-25,^SP500-45,^SP500-45,^SP500-45,^SP500-45,...,^SP500-35,^SP500-35,^SP500-35,^SP500-35,^SP500-40,^SP500-40,^SP500-40,^SP500-40,^SP500-40,^SP500-40
Unnamed: 0_level_1,Open,High,Low,Close,Adj Close,Volume,Open,High,Low,Close,...,Low,Close,Adj Close,Volume,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
1993-05-04,99.760002,99.760002,99.760002,99.760002,99.760002,0.0,76.230003,76.230003,76.230003,76.230003,...,90.959999,90.959999,90.959999,0.0,106.129997,106.129997,106.129997,106.129997,106.129997,0.0
1993-05-05,99.500000,99.500000,99.500000,99.500000,99.500000,0.0,76.889999,76.889999,76.889999,76.889999,...,91.760002,91.760002,91.760002,0.0,105.000000,105.000000,105.000000,105.000000,105.000000,0.0
1993-05-06,99.190002,99.190002,99.190002,99.190002,99.190002,0.0,76.419998,76.419998,76.419998,76.419998,...,90.550003,90.550003,90.550003,0.0,104.629997,104.629997,104.629997,104.629997,104.629997,0.0
1993-05-07,99.129997,99.129997,99.129997,99.129997,99.129997,0.0,77.089996,77.089996,77.089996,77.089996,...,90.120003,90.120003,90.120003,0.0,103.550003,103.550003,103.550003,103.550003,103.550003,0.0
1993-05-10,99.660004,99.660004,99.660004,99.660004,99.660004,0.0,77.029999,77.029999,77.029999,77.029999,...,90.230003,90.230003,90.230003,0.0,103.940002,103.940002,103.940002,103.940002,103.940002,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-02-06,1406.000000,1416.650024,1405.650024,1415.989990,1415.989990,437199900.0,3652.020020,3656.310059,3601.040039,3629.600098,...,1664.260010,1677.469971,1677.469971,268512500.0,643.739990,646.059998,642.289978,645.849976,645.849976,284950900.0
2024-02-07,1423.260010,1436.670044,1420.609985,1431.920044,1431.920044,488606300.0,3653.239990,3681.919922,3640.780029,3681.639893,...,1678.890015,1681.819946,1681.819946,227851800.0,648.280029,650.890015,644.419983,650.320007,650.320007,300426600.0
2024-02-08,1431.150024,1438.010010,1428.619995,1436.109985,1436.109985,380864100.0,3685.949951,3698.969971,3680.320068,3686.899902,...,1671.310059,1678.920044,1678.920044,210514200.0,647.570007,649.179993,643.400024,647.109985,647.109985,312500100.0
2024-02-09,1436.229980,1452.979980,1434.650024,1450.310059,1450.310059,395630000.0,3704.330078,3744.179932,3699.159912,3742.199951,...,1672.439941,1678.000000,1678.000000,198299800.0,646.590027,649.419983,645.039978,649.169983,649.169983,282978500.0


## Prepare stocks price data

In [12]:
price_interval = "1d" # "1wk"

In [27]:
stock_price_data = yf.download(all_stock_set, period='max', group_by='tickers', interval=price_interval)
stock_price_data

[**********************68%%*******               ]  1821 of 2673 completed

Failed to get ticker 'THE CONTENT CONTAINED HEREIN IS OWNED OR LICENSED BY BLACKROCK AND/OR ITS THIRD-PARTY INFORMATION PROVIDERS AND IS PROTECTED BY APPLICABLE COPYRIGHTS, TRADEMARKS, SERVICE MARKS, AND/OR OTHER INTELLECTUAL PROPERTY RIGHTS. SUCH CONTENT IS SOLELY FOR YOUR PERSONAL, NON-COMMERCIAL USE. ACCORDINGLY, YOU MAY NOT COPY, DISTRIBUTE, MODIFY, POST, FRAME OR DEEP LINK THIS CONTENT. YOU MAY DOWNLOAD MATERIAL DISPLAYED ON THIS WEBSITE FOR YOUR PERSONAL USE PROVIDED YOU ALSO RETAIN ALL COPYRIGHT AND OTHER PROPRIETARY NOTICES CONTAINED ON THE MATERIALS. MODIFICATION OR USE OF THE MATERIALS FOR ANY OTHER PURPOSE VIOLATES BLACKROCK'S INTELLECTUAL PROPERTY RIGHTS.
HOLDINGS SUBJECT TO CHANGE. SEE WWW.ISHARES.COM FOR THE MOST RECENT FUNDS HOLDINGS.
THE VALUES FOR “PRICE” SHOWN HEREIN GENERALLY REPRESENT A PRICE PROVIDED BY A THIRD-PARTY PRICING VENDOR FOR THE PORTFOLIO HOLDING AND DO NOT REFLECT THE IMPACT OF SYSTEMATIC FAIR VALUATION (“THE VENDOR PRICE”). THE VENDOR PRICE IS NOT NECE

[*********************100%%**********************]  2673 of 2673 completed


24 Failed downloads:
['ESH4', 'MOGA', 'MSFUT', 'PDLI', 'RTYH4', 'METCV', 'MLIFT', 'LGFA', 'SGAFT', 'P5N994', 'BFB', 'BRKB', 'GEFB', 'ICSUAGD', 'GTXI', 'NQH4', 'XTSLA', 'LGFB', '-', 'PBRA', 'ADRO']: Exception('%ticker%: No timezone found, symbol may be delisted')
["THE CONTENT CONTAINED HEREIN IS OWNED OR LICENSED BY BLACKROCK AND/OR ITS THIRD-PARTY INFORMATION PROVIDERS AND IS PROTECTED BY APPLICABLE COPYRIGHTS, TRADEMARKS, SERVICE MARKS, AND/OR OTHER INTELLECTUAL PROPERTY RIGHTS. SUCH CONTENT IS SOLELY FOR YOUR PERSONAL, NON-COMMERCIAL USE. ACCORDINGLY, YOU MAY NOT COPY, DISTRIBUTE, MODIFY, POST, FRAME OR DEEP LINK THIS CONTENT. YOU MAY DOWNLOAD MATERIAL DISPLAYED ON THIS WEBSITE FOR YOUR PERSONAL USE PROVIDED YOU ALSO RETAIN ALL COPYRIGHT AND OTHER PROPRIETARY NOTICES CONTAINED ON THE MATERIALS. MODIFICATION OR USE OF THE MATERIALS FOR ANY OTHER PURPOSE VIOLATES BLACKROCK'S INTELLECTUAL PROPERTY RIGHTS.\nHOLDINGS SUBJECT TO CHANGE. SEE WWW.ISHARES.COM FOR THE MOST RECENT FUNDS HOLDI




Unnamed: 0_level_0,VTS,VTS,VTS,VTS,VTS,VTS,PGEN,PGEN,PGEN,PGEN,...,POWI,POWI,POWI,POWI,JNJ,JNJ,JNJ,JNJ,JNJ,JNJ
Unnamed: 0_level_1,Open,High,Low,Close,Adj Close,Volume,Open,High,Low,Close,...,Low,Close,Adj Close,Volume,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
1962-01-02,,,,,,,,,,,...,,,,,0.000000,0.223380,0.222222,0.223380,0.064498,0
1962-01-03,,,,,,,,,,,...,,,,,0.000000,0.221065,0.219907,0.219907,0.063496,345600
1962-01-04,,,,,,,,,,,...,,,,,0.000000,0.221065,0.217593,0.217593,0.062827,216000
1962-01-05,,,,,,,,,,,...,,,,,0.000000,0.215856,0.214120,0.214120,0.061825,129600
1962-01-08,,,,,,,,,,,...,,,,,0.000000,0.212384,0.210648,0.210648,0.060822,172800
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-01-29,21.450001,21.670000,21.020000,21.650000,21.650000,152800.0,1.41,1.48,1.350,1.480,...,75.870003,77.709999,77.709999,247500.0,159.009995,160.070007,158.960007,159.360001,159.360001,6483200
2024-01-30,21.490000,21.775999,21.360001,21.730000,21.730000,83900.0,1.46,1.46,1.360,1.390,...,75.720001,76.029999,76.029999,235800.0,159.350006,159.759995,158.130005,158.770004,158.770004,7200500
2024-01-31,21.780001,21.780001,20.990000,21.010000,21.010000,222100.0,1.40,1.45,1.370,1.380,...,74.580002,74.959999,74.959999,546500.0,160.399994,160.619995,158.669998,158.899994,158.899994,8256800
2024-02-01,21.110001,21.260000,20.660000,20.709999,20.709999,181000.0,1.38,1.42,1.360,1.400,...,74.500000,75.580002,75.580002,390200.0,158.160004,158.490005,157.050003,158.360001,158.360001,8012000


In [130]:
stock_price_data.columns.levels

NameError: name 'stock_price_data' is not defined

In [29]:
stock_price_data.tail(20)

Unnamed: 0_level_0,VTS,VTS,VTS,VTS,VTS,VTS,PGEN,PGEN,PGEN,PGEN,...,POWI,POWI,POWI,POWI,JNJ,JNJ,JNJ,JNJ,JNJ,JNJ
Unnamed: 0_level_1,Open,High,Low,Close,Adj Close,Volume,Open,High,Low,Close,...,Low,Close,Adj Close,Volume,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2024-01-05 00:00:00,21.450001,21.725,21.35,21.610001,21.610001,254600.0,1.31,1.34,1.23,1.29,...,75.93,76.379997,76.379997,333000.0,160.5,161.339996,159.470001,161.130005,161.130005,5602500
2024-01-08 00:00:00,21.4,21.450001,20.719999,21.389999,21.389999,271600.0,1.3,1.43,1.26,1.4,...,76.269997,77.25,77.25,353400.0,161.100006,161.660004,159.610001,161.529999,161.529999,6569800
2024-01-09 00:00:00,21.370001,21.370001,20.9,21.040001,21.040001,197400.0,1.4,1.47,1.34,1.4,...,75.449997,77.260002,77.260002,265400.0,161.630005,163.279999,160.600006,161.630005,161.630005,6357000
2024-01-10 00:00:00,21.059999,21.219999,20.709999,20.98,20.98,270400.0,1.41,1.44,1.32,1.35,...,75.040001,77.139999,77.139999,349000.0,161.820007,162.619995,161.199997,161.869995,161.869995,4945100
2024-01-11 00:00:00,21.02,21.07,20.629999,20.84,20.84,167100.0,1.35,1.39,1.29,1.38,...,75.690002,76.889999,76.889999,255900.0,162.070007,162.199997,160.779999,161.149994,161.149994,4946000
2024-01-12 00:00:00,21.1,21.35,20.68,20.85,20.85,142100.0,1.38,1.49,1.36,1.37,...,76.25,77.099998,77.099998,253600.0,161.839996,162.470001,161.690002,162.389999,162.389999,4496500
2024-01-16 00:00:00,20.76,20.799999,20.209999,20.219999,20.219999,248300.0,1.41,1.41,1.25,1.31,...,76.0,77.07,77.07,286100.0,161.570007,162.25,160.179993,160.520004,160.520004,6923200
2024-01-17 00:00:00,20.16,20.370001,19.969999,20.299999,20.299999,191100.0,1.29,1.32,1.25,1.3,...,75.279999,76.779999,76.779999,406400.0,160.25,161.300003,159.869995,160.429993,160.429993,5072100
2024-01-18 00:00:00,20.42,20.51,20.08,20.440001,20.440001,162800.0,1.3,1.32,1.2,1.21,...,77.559998,78.199997,78.199997,415900.0,160.0,161.369995,159.690002,161.210007,161.210007,5368500
2024-01-19 00:00:00,20.59,20.639999,20.26,20.6,20.6,212300.0,1.22,1.28,1.19,1.28,...,78.480003,82.25,82.25,386800.0,161.149994,161.919998,160.279999,161.679993,161.679993,7055800


In [35]:
len(stock_price_data)

15629

In [36]:
stock_price_data = stock_price_data.dropna(how='all')

In [37]:
len(stock_price_data)

15629

In [13]:
# price_hist_file = f'data/all_stocks_price_hist_{price_interval}.csv.bz2'
price_hist_file = f'data/data-3rd-party/all_stocks_price_hist_{price_interval}.parquet'

In [39]:
# stock_price_data.to_csv(price_hist_file, index='Date')
# stock_price_data.to_parquet(price_hist_file)

In [None]:
# stock_price_data.to_csv('data/all_stocks_price_hist.bak.csv', index='Date')

In [23]:
df = stock_price_data_loaded.copy()

In [174]:
df2 = df.stack(level=0)
df2

Unnamed: 0_level_0,Unnamed: 1_level_0,Adj Close,Close,High,Low,Open,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1962-01-02,AEP,0.970557,34.312500,35.125000,34.312500,0.000000,5800.0
1962-01-02,BA,0.190931,0.823045,0.837449,0.823045,0.837449,352350.0
1962-01-02,CAT,0.476659,1.604167,1.619792,1.588542,1.604167,163200.0
1962-01-02,CNP,0.334449,10.783375,10.865333,10.783375,0.000000,13879.0
1962-01-02,CVX,0.339417,3.296131,3.296131,3.244048,0.000000,105840.0
...,...,...,...,...,...,...,...
2024-02-02,ZVIA,1.640000,1.640000,1.690000,1.640000,1.670000,28259.0
2024-02-02,ZVRA,5.840000,5.840000,5.920000,5.770000,5.900000,31833.0
2024-02-02,ZWS,30.180000,30.180000,30.250000,29.908400,29.950001,135475.0
2024-02-02,ZYME,10.920000,10.920000,11.060000,10.800000,10.940000,50296.0


In [180]:
df2.index

MultiIndex([('1962-01-02',  'AEP'),
            ('1962-01-02',   'BA'),
            ('1962-01-02',  'CAT'),
            ('1962-01-02',  'CNP'),
            ('1962-01-02',  'CVX'),
            ('1962-01-02',  'DIS'),
            ('1962-01-02',  'DTE'),
            ('1962-01-02',   'ED'),
            ('1962-01-02',   'FL'),
            ('1962-01-02',   'GD'),
            ...
            ('2024-02-02',   'ZS'),
            ('2024-02-02',  'ZTS'),
            ('2024-02-02', 'ZUMZ'),
            ('2024-02-02',  'ZUO'),
            ('2024-02-02', 'ZURA'),
            ('2024-02-02', 'ZVIA'),
            ('2024-02-02', 'ZVRA'),
            ('2024-02-02',  'ZWS'),
            ('2024-02-02', 'ZYME'),
            ('2024-02-02', 'ZYXI')],
           names=['Date', None], length=13476911)

In [190]:
df2.index.names = ['Date', 'Symbol']

In [194]:
df2 = df2.index.swaplevel(0)

MultiIndex([( 'AEP', '1962-01-02'),
            (  'BA', '1962-01-02'),
            ( 'CAT', '1962-01-02'),
            ( 'CNP', '1962-01-02'),
            ( 'CVX', '1962-01-02'),
            ( 'DIS', '1962-01-02'),
            ( 'DTE', '1962-01-02'),
            (  'ED', '1962-01-02'),
            (  'FL', '1962-01-02'),
            (  'GD', '1962-01-02'),
            ...
            (  'ZS', '2024-02-02'),
            ( 'ZTS', '2024-02-02'),
            ('ZUMZ', '2024-02-02'),
            ( 'ZUO', '2024-02-02'),
            ('ZURA', '2024-02-02'),
            ('ZVIA', '2024-02-02'),
            ('ZVRA', '2024-02-02'),
            ( 'ZWS', '2024-02-02'),
            ('ZYME', '2024-02-02'),
            ('ZYXI', '2024-02-02')],
           names=['Symbol', 'Date'], length=13476911)

In [195]:
df2

Unnamed: 0_level_0,Unnamed: 1_level_0,Adj Close,Close,High,Low,Open,Volume
Date,Symbol,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1962-01-02,AEP,0.970557,34.312500,35.125000,34.312500,0.000000,5800.0
1962-01-02,BA,0.190931,0.823045,0.837449,0.823045,0.837449,352350.0
1962-01-02,CAT,0.476659,1.604167,1.619792,1.588542,1.604167,163200.0
1962-01-02,CNP,0.334449,10.783375,10.865333,10.783375,0.000000,13879.0
1962-01-02,CVX,0.339417,3.296131,3.296131,3.244048,0.000000,105840.0
...,...,...,...,...,...,...,...
2024-02-02,ZVIA,1.640000,1.640000,1.690000,1.640000,1.670000,28259.0
2024-02-02,ZVRA,5.840000,5.840000,5.920000,5.770000,5.900000,31833.0
2024-02-02,ZWS,30.180000,30.180000,30.250000,29.908400,29.950001,135475.0
2024-02-02,ZYME,10.920000,10.920000,11.060000,10.800000,10.940000,50296.0


In [None]:
df2 = df2.sort_index()
df2

In [45]:
# stock_price_data.to_csv(price_hist_file, index='Date')
df2.to_parquet(price_hist_file)

In [31]:
# stock_price_data_loaded = pd.read_csv(price_hist_file, header=[0, 1], index_col=0)
stock_price_data_loaded = pd.read_parquet(price_hist_file) #, filters=[("Symbol", "in", ['AEP', 'AAPL'])])
stock_price_data_loaded

Unnamed: 0_level_0,Unnamed: 1_level_0,Adj Close,Close,High,Low,Open,Volume
Symbol,Date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
A,1999-11-18,26.695686,31.473534,35.765381,28.612303,32.546494,62546380.0
A,1999-11-19,24.496334,28.880545,30.758226,28.478184,30.713518,15234146.0
A,1999-11-22,26.695686,31.473534,31.473534,28.657009,29.551144,6577870.0
A,1999-11-23,24.268808,28.612303,31.205294,28.612303,30.400572,5975611.0
A,1999-11-24,24.913454,29.372318,29.998213,28.612303,28.701717,4843231.0
...,...,...,...,...,...,...,...
ZYXI,2024-01-29,11.410000,11.410000,11.550000,11.300000,11.350000,261900.0
ZYXI,2024-01-30,11.760000,11.760000,11.950000,11.370000,11.370000,484200.0
ZYXI,2024-01-31,11.840000,11.840000,12.070000,11.700000,11.700000,423600.0
ZYXI,2024-02-01,11.660000,11.660000,12.130000,11.640000,11.930000,417600.0


In [32]:
stock_price_data_loaded.dropna()

Unnamed: 0_level_0,Unnamed: 1_level_0,Adj Close,Close,High,Low,Open,Volume
Symbol,Date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
A,1999-11-18,26.695686,31.473534,35.765381,28.612303,32.546494,62546380.0
A,1999-11-19,24.496334,28.880545,30.758226,28.478184,30.713518,15234146.0
A,1999-11-22,26.695686,31.473534,31.473534,28.657009,29.551144,6577870.0
A,1999-11-23,24.268808,28.612303,31.205294,28.612303,30.400572,5975611.0
A,1999-11-24,24.913454,29.372318,29.998213,28.612303,28.701717,4843231.0
...,...,...,...,...,...,...,...
ZYXI,2024-01-29,11.410000,11.410000,11.550000,11.300000,11.350000,261900.0
ZYXI,2024-01-30,11.760000,11.760000,11.950000,11.370000,11.370000,484200.0
ZYXI,2024-01-31,11.840000,11.840000,12.070000,11.700000,11.700000,423600.0
ZYXI,2024-02-01,11.660000,11.660000,12.130000,11.640000,11.930000,417600.0


In [42]:
list(stock_price_data_loaded.index.levels[0])

['A',
 'AADI',
 'AAL',
 'AAN',
 'AAON',
 'AAPL',
 'AAT',
 'ABBNY',
 'ABBV',
 'ABCB',
 'ABG',
 'ABM',
 'ABNB',
 'ABR',
 'ABT',
 'ABUS',
 'ABVX',
 'ACA',
 'ACAD',
 'ACCD',
 'ACCO',
 'ACDC',
 'ACEL',
 'ACET',
 'ACGL',
 'ACHR',
 'ACIC',
 'ACIW',
 'ACLS',
 'ACLX',
 'ACMR',
 'ACN',
 'ACNB',
 'ACRE',
 'ACRS',
 'ACRV',
 'ACT',
 'ACTG',
 'ACVA',
 'ADBE',
 'ADEA',
 'ADI',
 'ADM',
 'ADMA',
 'ADNT',
 'ADP',
 'ADPT',
 'ADSK',
 'ADTN',
 'ADUS',
 'ADV',
 'AEE',
 'AEHR',
 'AEIS',
 'AEL',
 'AEO',
 'AEP',
 'AER',
 'AES',
 'AESI',
 'AEVA',
 'AFCG',
 'AFL',
 'AFRI',
 'AFRM',
 'AFYA',
 'AGEN',
 'AGIO',
 'AGM',
 'AGO',
 'AGS',
 'AGTI',
 'AGX',
 'AGYS',
 'AHCO',
 'AHH',
 'AI',
 'AIG',
 'AIN',
 'AIR',
 'AIRS',
 'AIT',
 'AIV',
 'AIZ',
 'AJG',
 'AKAM',
 'AKR',
 'AKRO',
 'AKTS',
 'AKYA',
 'ALB',
 'ALCO',
 'ALDX',
 'ALE',
 'ALEC',
 'ALEX',
 'ALG',
 'ALGN',
 'ALGT',
 'ALHC',
 'ALIT',
 'ALKS',
 'ALKT',
 'ALL',
 'ALLE',
 'ALLK',
 'ALLO',
 'ALLY',
 'ALNT',
 'ALPN',
 'ALRM',
 'ALRS',
 'ALT',
 'ALTG',
 'ALTI',
 'ALTM',

## Prepare earnings and sales data

In [42]:
import fmpsdk

# Company Valuation Methods
symbol: str = "AAPL"
symbols: ["AAPL", "CSCO", "QQQQ"]
exchange: str = "NYSE"
exchanges: ["NYSE", "NASDAQ"]
query: str = "AA"
limit: int = 3
period: str = "quarter"
download: bool = True
market_cap_more_than: int = 1000000000
beta_more_than: int = 1
volume_more_than: int = 10000
sector: str = "Technology"
dividend_more_than: int = 0
industry: str = "Software"
filing_type: str = "10-K"
print(f"Company Profile: {fmpsdk.company_profile(apikey=FMP_API_KEY, symbol=symbol)=}")


Company Profile: fmpsdk.company_profile(apikey=FMP_API_KEY, symbol=symbol)=[{'symbol': 'AAPL', 'price': 186.742, 'beta': 1.29, 'volAvg': 53759957, 'mktCap': 2887386129800, 'lastDiv': 0.96, 'range': '143.9-199.62', 'changes': -0.118, 'companyName': 'Apple Inc.', 'currency': 'USD', 'cik': '0000320193', 'isin': 'US0378331005', 'cusip': '037833100', 'exchange': 'NASDAQ Global Select', 'exchangeShortName': 'NASDAQ', 'industry': 'Consumer Electronics', 'website': 'https://www.apple.com', 'description': 'Apple Inc. designs, manufactures, and markets smartphones, personal computers, tablets, wearables, and accessories worldwide. The company offers iPhone, a line of smartphones; Mac, a line of personal computers; iPad, a line of multi-purpose tablets; and wearables, home, and accessories comprising AirPods, Apple TV, Apple Watch, Beats products, and HomePod. It also provides AppleCare support and cloud services; and operates various platforms, including the App Store that allow customers to dis

In [43]:
earnings_all_df = pd.DataFrame()
for ticker in stocks_ticker_set: # ['AAON']: #
    earnings = fmpsdk.historical_earning_calendar(apikey=FMP_API_KEY, symbol=ticker, limit=-1)
    if earnings is not None and len(earnings) > 0:
        edf = pd.DataFrame(earnings)
        edf['date'] = pd.to_datetime(edf['date'])
        edf = edf.set_index(['symbol', 'date'])
        # edf = edf.pivot(columns='symbol')
        # edf.swaplevel(i=0,j=1, axis=0)
        # edf.drop(columns=['symbol'])
        earnings_all_df = pd.concat([earnings_all_df, edf])
        n_earnings = len(earnings)
        # print(f"Total earnings reports for {ticker}: {n_earnings}")
#    earliest_earn = earnings[-1] if len(earnings > 0 else 'None')
#    print(f"Earliest earnings report for {ticker}: {earliest_earn}")


ERROR:root:A requests exception has occurred that we have not yet detailed an 'except' clause for.  Error: Expecting value: line 1 column 1 (char 0)
ERROR:root:Connection to https://financialmodelingprep.com/api/v3/historical/earning_calendar/MMI failed:  DNS failure, refused connection or some other connection related issue.
ERROR:root:Connection to https://financialmodelingprep.com/api/v3/historical/earning_calendar/GEFB timed out.
ERROR:root:Connection to https://financialmodelingprep.com/api/v3/historical/earning_calendar/BKR failed:  DNS failure, refused connection or some other connection related issue.
ERROR:root:Connection to https://financialmodelingprep.com/api/v3/historical/earning_calendar/BHRB failed:  DNS failure, refused connection or some other connection related issue.
ERROR:root:Connection to https://financialmodelingprep.com/api/v3/historical/earning_calendar/CYBR failed:  DNS failure, refused connection or some other connection related issue.


In [45]:
earnings

[{'date': '2024-02-27',
  'symbol': 'FLYW',
  'eps': None,
  'epsEstimated': -0.08,
  'time': 'amc',
  'revenue': None,
  'revenueEstimated': 89960000,
  'updatedFromDate': '2024-02-02',
  'fiscalDateEnding': '2023-12-30'},
 {'date': '2023-11-07',
  'symbol': 'FLYW',
  'eps': 0.08,
  'epsEstimated': 0.1,
  'time': 'amc',
  'revenue': 123323000,
  'revenueEstimated': 90520000,
  'updatedFromDate': '2024-02-02',
  'fiscalDateEnding': '2023-09-30'},
 {'date': '2023-08-08',
  'symbol': 'FLYW',
  'eps': -0.15,
  'epsEstimated': -0.15,
  'time': 'amc',
  'revenue': 84869000,
  'revenueEstimated': 74950000,
  'updatedFromDate': '2023-12-04',
  'fiscalDateEnding': '2023-06-30'},
 {'date': '2023-05-09',
  'symbol': 'FLYW',
  'eps': -0.03,
  'epsEstimated': -0.05,
  'time': 'amc',
  'revenue': 94357000,
  'revenueEstimated': 83870000,
  'updatedFromDate': '2023-12-04',
  'fiscalDateEnding': '2023-03-31'},
 {'date': '2023-02-28',
  'symbol': 'FLYW',
  'eps': -0.01,
  'epsEstimated': -0.11,
  'tim

In [44]:
aaon = earnings_all_df.loc[['AAON']]

In [46]:
aaon

Unnamed: 0_level_0,Unnamed: 1_level_0,eps,epsEstimated,time,revenue,revenueEstimated,updatedFromDate,fiscalDateEnding
symbol,date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
AAON,2024-11-04,,,amc,,,2024-02-02,2024-09-30
AAON,2024-08-01,,,amc,,,2024-02-02,2024-06-30
AAON,2024-05-02,,,amc,,,2024-02-02,2024-03-30
AAON,2024-02-26,,0.53,bmo,,294360000.0,2024-02-02,2023-12-30
AAON,2023-11-06,0.64000,0.55,amc,311970000.0,294360000.0,2024-02-02,2023-09-30
AAON,...,...,...,...,...,...,...,...
AAON,1991-03-31,-0.00159,,bmo,5700000.0,,2023-12-04,1991-03-31
AAON,1990-12-31,0.00352,,bmo,36100000.0,,2023-12-04,1990-12-31
AAON,1990-09-30,0.00703,,bmo,10100000.0,,2023-12-04,1990-09-30
AAON,1990-06-30,0.00703,,bmo,9300000.0,,2023-12-04,1990-06-30


In [47]:
len(earnings_all_df)

216678

In [50]:
earnings_all_df


NameError: name 'earnings_all_df' is not defined

In [49]:
len(earnings_all_df.index.levels[0])


2641

In [45]:
# earnings_file = 'data/earnings_calendar.csv.bz2'
earnings_file = 'data/data-3rd-party/earnings_calendar.parquet'

In [54]:
# earnings_all_df.to_csv(earnings_file)
earnings_all_df.to_parquet(earnings_file)

### Read back data and verify it

In [49]:
import pandas as pd

tmp_earn_df = pd.read_parquet(earnings_file)
tmp_earn_df

# earnings_loaded_df = pd.read_csv('data/earnings_calendar.csv.bz2', index_col=['symbol', 'date'])
# print(earnings_loaded_df)

Unnamed: 0_level_0,Unnamed: 1_level_0,eps,epsEstimated,time,revenue,revenueEstimated,updatedFromDate,fiscalDateEnding
symbol,date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
A,1999-01-31,0.1600,,amc,1.786000e+09,,2023-12-04,1999-01-31
A,1999-04-30,0.3400,,amc,2.010000e+09,,2023-12-04,1999-04-30
A,1999-07-31,0.3000,,amc,2.087000e+09,,2023-12-04,1999-07-31
A,1999-10-31,0.3318,,amc,2.448000e+09,,2023-12-04,1999-10-31
A,2000-02-17,0.3000,0.22,bmo,2.246000e+09,1.357400e+09,2023-12-04,2000-01-31
...,...,...,...,...,...,...,...,...
ZYXI,2023-10-26,0.1000,0.09,bmo,4.991500e+07,5.453000e+07,2024-01-26,2023-09-30
ZYXI,2024-03-04,,0.17,bmo,,5.453000e+07,2024-02-02,2023-12-30
ZYXI,2024-04-25,,,bmo,,,2024-02-02,2024-03-30
ZYXI,2024-07-25,,,bmo,,,2024-02-02,2024-06-30


In [52]:
tmp_earn_df.index.names = ['Symbol', 'Date']

## Prepare historical dividends
  * This is secondary information since growth stocks usually do not have dividends and rarely have splits
  * Additionally the dividends and split information is partially reflected in Adj Close of price history data

In [None]:
def fetch_dividends_history():
    divs_hist_all_df = pd.DataFrame()
    for ticker in stocks_ticker_set: # ['AAON']:
        divs_hist = fmpsdk.historical_stock_dividend(apikey=FMP_API_KEY, symbol=ticker)
        # print(f"Loaded historical dividends for {ticker}: \n{divs_hist}")
        print(f"Loaded {len(divs_hist['historical'])} historical dividends for {ticker}")
        if divs_hist['historical'] is not None and len(divs_hist['historical']) > 0:
            dh_df_tmp = pd.DataFrame.from_dict(data=divs_hist['historical'])
            # print(f"Historical dividends for {ticker} dataframe: \n{dh_df_tmp.head()}")
            dh_df_tmp['symbol'] = ticker
            dh_df = dh_df_tmp
            # print(f"Historical dividends for {ticker} dataframe: \n{dh_df_tmp.head()}")
            # print(f"Historical dividends for {ticker} full dataframe: \n{dh_df.head()}")
            dh_df['date'] = pd.to_datetime(dh_df['date'])
            dh_df = dh_df.set_index(['symbol', 'date'])
            n_divs_hist = len(dh_df)
            print(f"Total dividends history reports for {ticker}: {n_divs_hist}")
            # print(f"Historical dividends for {ticker} full dataframe: \n{dh_df}")
            divs_hist_all_df = pd.concat([divs_hist_all_df, dh_df])
    return divs_hist_all_df


In [None]:
# divs_hist_file = 'data/dividends_history.csv.bz2'

In [None]:
# divs_hist_all_df.to_csv(divs_hist_file)

## Prepare key metrics data for company fundamentals

In [53]:

keymetrics_all_df = pd.DataFrame()
for ticker in stocks_ticker_set:
    kms = fmpsdk.key_metrics(apikey=FMP_API_KEY, symbol=ticker, period='quarter', limit=-1)
    if kms is not None and len(kms) > 0:
        kms_df = pd.DataFrame(kms)
        kms_df['date'] = pd.to_datetime(kms_df['date'])
        kms_df = kms_df.set_index(['symbol', 'date'])
        # print(f"Key metrics for {ticker} sample: \n{kms_df.columns}")
        keymetrics_all_df = pd.concat([keymetrics_all_df, kms_df])
        # print(f"Key metrics concatenated {ticker}: \n{keymetrics_all_df.columns}")
        n_kms = len(kms_df)
        print(f"Total key metrics reports for {ticker}: {n_kms}")
    else:
        print(f"No {ticker} key metrics reports: kms={kms}")

Total key metrics reports for IAUX: 15
Total key metrics reports for PGEN: 48
Total key metrics reports for PETQ: 31
Total key metrics reports for NUVB: 15
Total key metrics reports for HSY: 153
No ESH4 key metrics reports: kms=[]
Total key metrics reports for ALVR: 19
Total key metrics reports for VTS: 6
Total key metrics reports for ME: 14
Total key metrics reports for SUM: 40
Total key metrics reports for LAW: 15
Total key metrics reports for EQT: 153
Total key metrics reports for INN: 59
Total key metrics reports for BURL: 47
Total key metrics reports for ODC: 153
Total key metrics reports for OPEN: 15
Total key metrics reports for KHC: 51
Total key metrics reports for GCMG: 15
Total key metrics reports for FPI: 43
Total key metrics reports for O: 117
Total key metrics reports for SFIX: 29
Total key metrics reports for UE: 43
Total key metrics reports for NVEC: 115
Total key metrics reports for AWR: 153
Total key metrics reports for MNDY: 15
Total key metrics reports for PARA: 128


ERROR:root:A requests exception has occurred that we have not yet detailed an 'except' clause for.  Error: Expecting value: line 1 column 1 (char 0)


Total key metrics reports for CCRN: 91
No The content contained herein is owned or licensed by BlackRock and/or its third-party information providers and is protected by applicable copyrights, trademarks, service marks, and/or other intellectual property rights. Such content is solely for your personal, non-commercial use. Accordingly, you may not copy, distribute, modify, post, frame or deep link this content. You may download material displayed on this Website for your personal use provided you also retain all copyright and other proprietary notices contained on the materials. Modification or use of the materials for any other purpose violates BlackRock's intellectual property rights.
Holdings subject to change. See www.iShares.com for the most recent funds holdings.
The values for “price” shown herein generally represent a price provided by a third-party pricing vendor for the portfolio holding and do not reflect the impact of systematic fair valuation (“the vendor price”). The vend

ERROR:root:Connection to https://financialmodelingprep.com/api/v3/key-metrics/AMNB failed:  DNS failure, refused connection or some other connection related issue.


Total key metrics reports for DIN: 135
No AMNB key metrics reports: kms=None
Total key metrics reports for ROVR: 14
Total key metrics reports for ALLE: 44
Total key metrics reports for KO: 153
Total key metrics reports for INTU: 124
Total key metrics reports for BE: 27
Total key metrics reports for ABBNY: 91
Total key metrics reports for GCT: 11
Total key metrics reports for RCKY: 132
Total key metrics reports for MBIN: 31
Total key metrics reports for UTZ: 21
Total key metrics reports for RGCO: 120
Total key metrics reports for MO: 153
Total key metrics reports for EXTR: 102
Total key metrics reports for ONTF: 17
Total key metrics reports for CPS: 75
Total key metrics reports for NATH: 126
Total key metrics reports for ACMR: 32
Total key metrics reports for CIFR: 14
Total key metrics reports for UBER: 24
Total key metrics reports for IOT: 12
Total key metrics reports for TGT: 153
Total key metrics reports for SMLR: 47
Total key metrics reports for MXL: 61
Total key metrics reports for

ERROR:root:Connection to https://financialmodelingprep.com/api/v3/key-metrics/BHVN failed:  DNS failure, refused connection or some other connection related issue.


Total key metrics reports for OGS: 48
No BHVN key metrics reports: kms=None
Total key metrics reports for CPE: 119
Total key metrics reports for DOLE: 14
Total key metrics reports for WULF: 73
Total key metrics reports for NX: 96
Total key metrics reports for RCKT: 40
Total key metrics reports for SLP: 109
Total key metrics reports for COF: 119
Total key metrics reports for IPI: 66
Total key metrics reports for DHR: 154
Total key metrics reports for PLAY: 47
Total key metrics reports for INVH: 32
Total key metrics reports for ALTM: 4
Total key metrics reports for FFIC: 111
Total key metrics reports for PRLD: 17
Total key metrics reports for YUM: 105
Total key metrics reports for NBIX: 111
Total key metrics reports for EXPE: 77
Total key metrics reports for NTAP: 112
Total key metrics reports for SYBT: 132
Total key metrics reports for AVNS: 43
Total key metrics reports for NAUT: 20
Total key metrics reports for AMBA: 51
Total key metrics reports for AFL: 153
Total key metrics reports f

ERROR:root:Connection to https://financialmodelingprep.com/api/v3/key-metrics/MGM failed:  DNS failure, refused connection or some other connection related issue.


No MGM key metrics reports: kms=None
Total key metrics reports for CMCSA: 154
Total key metrics reports for PFBC: 79
Total key metrics reports for LPSN: 95
Total key metrics reports for SKIL: 19
Total key metrics reports for TOL: 153
Total key metrics reports for IAS: 15
Total key metrics reports for PRFT: 98
Total key metrics reports for SPCE: 26
Total key metrics reports for EMN: 123
Total key metrics reports for LIVN: 123
Total key metrics reports for KMI: 59
Total key metrics reports for BXMT: 134
Total key metrics reports for LRMR: 38
Total key metrics reports for NGVC: 52
Total key metrics reports for STN: 107
Total key metrics reports for CF: 76
No RNAC key metrics reports: kms=[]
Total key metrics reports for ORA: 79
Total key metrics reports for CBT: 153
Total key metrics reports for SMMF: 112
Total key metrics reports for INDB: 154
Total key metrics reports for CAC: 111
Total key metrics reports for TCX: 111
Total key metrics reports for ARAY: 77
Total key metrics reports for

ERROR:root:Connection to https://financialmodelingprep.com/api/v3/key-metrics/JAKK failed:  DNS failure, refused connection or some other connection related issue.


Total key metrics reports for KAR: 64
No JAKK key metrics reports: kms=None
Total key metrics reports for BARK: 14
Total key metrics reports for KEY: 153
Total key metrics reports for CMT: 107
Total key metrics reports for WEC: 153
Total key metrics reports for VALU: 153
Total key metrics reports for GEL: 111
Total key metrics reports for VIAV: 129
Total key metrics reports for JAMF: 19
Total key metrics reports for BVN: 107
Total key metrics reports for EGHT: 106
Total key metrics reports for HD: 153
Total key metrics reports for BLDR: 75
Total key metrics reports for ELA: 139
Total key metrics reports for WWD: 120
Total key metrics reports for PRGS: 140
Total key metrics reports for MPB: 116
Total key metrics reports for PKOH: 153
Total key metrics reports for BKH: 153
Total key metrics reports for MCRB: 40
Total key metrics reports for CSTM: 47
Total key metrics reports for KGS: 5
Total key metrics reports for SQ: 37
Total key metrics reports for IMKTA: 152
Total key metrics reports

ERROR:root:Connection to https://financialmodelingprep.com/api/v3/key-metrics/FMAO failed:  DNS failure, refused connection or some other connection related issue.


Total key metrics reports for ASND: 40
No FMAO key metrics reports: kms=None
Total key metrics reports for ARIS: 72
Total key metrics reports for VRNS: 47
Total key metrics reports for HIG: 115
Total key metrics reports for LKQ: 88
Total key metrics reports for ACIC: 66
Total key metrics reports for HTLD: 153
Total key metrics reports for YELP: 52
Total key metrics reports for BLBD: 40
Total key metrics reports for COIN: 15
Total key metrics reports for SGRY: 37
Total key metrics reports for STLA: 130
Total key metrics reports for FOSL: 123
Total key metrics reports for SXC: 55
Total key metrics reports for CRL: 95
Total key metrics reports for BIPC: 18
Total key metrics reports for TERN: 16
Total key metrics reports for AVGO: 62
Total key metrics reports for PTC: 141
Total key metrics reports for ICFI: 80
Total key metrics reports for SXI: 153
Total key metrics reports for FC: 133
Total key metrics reports for HLF: 83
Total key metrics reports for FUBO: 50
Total key metrics reports fo

In [54]:
keymetrics_all_df

Unnamed: 0_level_0,Unnamed: 1_level_0,calendarYear,period,revenuePerShare,netIncomePerShare,operatingCashFlowPerShare,freeCashFlowPerShare,cashPerShare,bookValuePerShare,tangibleBookValuePerShare,shareholdersEquityPerShare,...,averagePayables,averageInventory,daysSalesOutstanding,daysPayablesOutstanding,daysOfInventoryOnHand,receivablesTurnover,payablesTurnover,inventoryTurnover,roe,capexPerShare
symbol,date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
IAUX,2023-09-30,2023,Q3,0.046025,-0.014624,-0.049448,-0.089751,0.131422,1.560281,1.560281,1.560281,...,11083000.0,18953500.0,5.278093,67.855056,125.656780,17.051613,1.326357,0.716237,-0.009373,-0.040302
IAUX,2023-06-30,2023,Q2,0.042610,-0.060136,-0.061273,-0.108882,0.072918,1.603962,1.603962,1.603962,...,10400500.0,18993500.0,12.883289,71.495440,113.441524,6.985794,1.258822,0.793360,-0.037492,-0.047609
IAUX,2023-03-31,2023,Q1,0.018518,-0.053411,-0.092759,-0.134208,0.231715,1.458083,1.458083,1.458083,...,9788500.0,17863000.0,10.507916,101.211855,216.901921,8.564972,0.889224,0.414934,-0.036631,-0.041449
IAUX,2022-12-31,2022,Q4,0.048455,-0.265996,-0.020485,-0.089775,0.200842,1.387086,1.387086,1.387086,...,7087500.0,20697000.0,4.814115,63.272222,98.494275,18.695024,1.422425,0.913759,-0.191766,-0.069289
IAUX,2022-09-30,2022,Q3,0.066835,-0.046895,-0.003145,-0.080443,0.316127,1.650186,1.650186,1.650186,...,3267000.0,27577500.0,2.834734,26.736622,187.066054,31.749012,3.366169,0.481113,-0.028418,-0.077298
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
FLYW,2020-12-31,2020,Q4,0.372878,-0.045231,0.149719,0.146027,1.167544,-0.917433,-2.183821,-0.917433,...,,,96.827661,42.354774,38.957666,0.929486,2.124908,2.310200,0.049302,-0.003692
FLYW,2020-09-30,2020,Q3,0.472238,0.058584,0.146353,0.140170,,,,,...,,,,,,0.000000,0.000000,0.000000,0.000000,-0.006183
FLYW,2020-06-30,2020,Q2,0.266572,-0.179499,0.073238,0.065069,,,,,...,,,,,,0.000000,0.000000,0.000000,0.000000,-0.008169
FLYW,2020-03-31,2020,Q1,0.367020,0.041517,-0.528903,-0.534883,,,,,...,,,,,,0.000000,0.000000,0.000000,0.000000,-0.005981


In [56]:
keymetrics_all_df.dtypes

calendarYear                               object
period                                     object
revenuePerShare                           float64
netIncomePerShare                         float64
operatingCashFlowPerShare                 float64
freeCashFlowPerShare                      float64
cashPerShare                              float64
bookValuePerShare                         float64
tangibleBookValuePerShare                 float64
shareholdersEquityPerShare                float64
interestDebtPerShare                      float64
marketCap                                 float64
enterpriseValue                           float64
peRatio                                   float64
priceToSalesRatio                         float64
pocfratio                                 float64
pfcfRatio                                 float64
pbRatio                                   float64
ptbRatio                                  float64
evToSales                                 float64


In [57]:
len(keymetrics_all_df)

213611

In [85]:
index, row = next(keymetrics_all_df.iterrows())

In [87]:
index

('IAUX', Timestamp('2023-09-30 00:00:00'))

In [86]:
row['averagePayables']

11083000.0

In [101]:
# prevent parquet serialization issues
keymetrics_all_df["averagePayables"] = pd.to_numeric(keymetrics_all_df["averagePayables"], dtype_backend="pyarrow")

In [None]:
keymetrics_all_df.index.names = ["Symbol", "Date"]
keymetrics_all_df.index.names

In [66]:
keymetrics_all_df = keymetrics_all_df.sort_index()
keymetrics_all_df

Unnamed: 0_level_0,Unnamed: 1_level_0,calendarYear,period,revenuePerShare,netIncomePerShare,operatingCashFlowPerShare,freeCashFlowPerShare,cashPerShare,bookValuePerShare,tangibleBookValuePerShare,shareholdersEquityPerShare,...,averagePayables,averageInventory,daysSalesOutstanding,daysPayablesOutstanding,daysOfInventoryOnHand,receivablesTurnover,payablesTurnover,inventoryTurnover,roe,capexPerShare
Symbol,Date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
A,1999-01-31,1999,Q1,4.102973,0.170000,,,,,,,...,,,,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,
A,1999-04-30,1999,Q2,4.480892,0.350000,,,,,,,...,,,,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,
A,1999-07-31,1999,Q3,4.637778,0.300000,,,,,,,...,,,,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,
A,1999-10-31,1999,Q4,5.576310,0.332574,,,0.000000,7.703872,7.703872,7.703872,...,,,60.110294,0.000000,0.000000,1.497248,0.000000,0.000000,0.043170,
A,2000-01-31,2000,Q1,5.116173,0.298405,0.874715,0.667426,3.211845,10.218679,10.218679,10.218679,...,406500000.0,1.533000e+09,57.902939,23.508621,121.577586,1.554325,3.828383,0.740268,0.029202,-0.207289
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ZYXI,2022-09-30,2022,Q3,1.091311,0.128082,0.194791,0.191636,0.618514,1.713373,0.932818,1.713373,...,5187500.0,1.446900e+07,61.452312,55.119771,154.086521,1.464550,1.632808,0.584087,0.074754,-0.003154
ZYXI,2022-12-31,2022,Q4,1.310694,0.200129,0.127753,0.125443,0.540982,1.782093,0.990708,1.782093,...,5370000.0,1.392500e+07,64.658744,53.695143,129.267150,1.391923,1.676129,0.696233,0.112300,-0.002310
ZYXI,2023-03-31,2023,Q1,1.149234,0.042759,0.052706,0.047692,0.457622,1.757399,0.960429,1.757399,...,5625500.0,1.383400e+07,68.423050,54.860287,137.723595,1.315346,1.640531,0.653483,0.024331,-0.005014
ZYXI,2023-06-30,2023,Q2,1.233759,0.092054,0.020338,0.014574,1.612433,1.708989,0.912584,1.708989,...,5790000.0,1.425450e+07,65.984383,57.560397,139.047670,1.363959,1.563575,0.647260,0.053865,-0.005764


In [58]:
# kms_file = 'data/keymetrics_history.csv.bz2'
# keymetrics_all_df.to_csv(kms_file)

kms_file = 'data/data-3rd-party/keymetrics_history.parquet'

In [67]:

keymetrics_all_df.to_parquet(kms_file, engine="pyarrow")

In [68]:
temp_kms_df = pd.read_parquet(kms_file)
temp_kms_df

Unnamed: 0_level_0,Unnamed: 1_level_0,calendarYear,period,revenuePerShare,netIncomePerShare,operatingCashFlowPerShare,freeCashFlowPerShare,cashPerShare,bookValuePerShare,tangibleBookValuePerShare,shareholdersEquityPerShare,...,averagePayables,averageInventory,daysSalesOutstanding,daysPayablesOutstanding,daysOfInventoryOnHand,receivablesTurnover,payablesTurnover,inventoryTurnover,roe,capexPerShare
Symbol,Date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
A,1999-01-31,1999,Q1,4.102973,0.170000,,,,,,,...,,,,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,
A,1999-04-30,1999,Q2,4.480892,0.350000,,,,,,,...,,,,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,
A,1999-07-31,1999,Q3,4.637778,0.300000,,,,,,,...,,,,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,
A,1999-10-31,1999,Q4,5.576310,0.332574,,,0.000000,7.703872,7.703872,7.703872,...,,,60.110294,0.000000,0.000000,1.497248,0.000000,0.000000,0.043170,
A,2000-01-31,2000,Q1,5.116173,0.298405,0.874715,0.667426,3.211845,10.218679,10.218679,10.218679,...,406500000.0,1.533000e+09,57.902939,23.508621,121.577586,1.554325,3.828383,0.740268,0.029202,-0.207289
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ZYXI,2022-09-30,2022,Q3,1.091311,0.128082,0.194791,0.191636,0.618514,1.713373,0.932818,1.713373,...,5187500.0,1.446900e+07,61.452312,55.119771,154.086521,1.464550,1.632808,0.584087,0.074754,-0.003154
ZYXI,2022-12-31,2022,Q4,1.310694,0.200129,0.127753,0.125443,0.540982,1.782093,0.990708,1.782093,...,5370000.0,1.392500e+07,64.658744,53.695143,129.267150,1.391923,1.676129,0.696233,0.112300,-0.002310
ZYXI,2023-03-31,2023,Q1,1.149234,0.042759,0.052706,0.047692,0.457622,1.757399,0.960429,1.757399,...,5625500.0,1.383400e+07,68.423050,54.860287,137.723595,1.315346,1.640531,0.653483,0.024331,-0.005014
ZYXI,2023-06-30,2023,Q2,1.233759,0.092054,0.020338,0.014574,1.612433,1.708989,0.912584,1.708989,...,5790000.0,1.425450e+07,65.984383,57.560397,139.047670,1.363959,1.563575,0.647260,0.053865,-0.005764


## Prepare institutional ownership data

In [9]:

from fmpsdk.settings import DEFAULT_LIMIT, SEC_RSS_FEEDS_FILENAME, BASE_URL_v3
from fmpsdk.url_methods import __return_json_v4
import typing


def institutional_symbol_ownership(
    apikey: str,
    symbol: str,
    limit: int,
    includeCurrentQuarter: bool = False,
) -> typing.Optional[typing.List[typing.Dict]]:
    """
    Query FMP /institutional-ownership/ API.

    :param apikey: Your API key.
    :param symbol: Company ticker.
    :param limit: up to how many quarterly reports to return.
    :param includeCurrentQuarter: Whether to include any available data in the current quarter.
    :return: A list of dictionaries.
    """
    path = f"institutional-ownership/symbol-ownership"
    query_vars = {"symbol": symbol, "apikey": apikey, "includeCurrentQuarter": includeCurrentQuarter, "limit": limit}
    return __return_json_v4(path=path, query_vars=query_vars)


In [40]:

inst_ownership_all_df = pd.DataFrame()
for ticker in stocks_ticker_set:
    inst_ownership = institutional_symbol_ownership(apikey=FMP_API_KEY, symbol=ticker, limit=-1, includeCurrentQuarter=False)
    # print("inst_ownership: ", inst_ownership)
    if inst_ownership is not None and len(inst_ownership) > 0:
        inst_ownership_df = pd.DataFrame(inst_ownership)
        inst_ownership_df['date'] = pd.to_datetime(inst_ownership_df['date'])
        inst_ownership_df = inst_ownership_df.set_index(['symbol', 'date'])
        # print(f"Institutional ownership for {ticker} # columns: \n{len(inst_ownership_df.columns)}")
        n_iown = len(inst_ownership_df)
        print(f"Total institutional ownership reports for {ticker}: {n_iown}")
        inst_ownership_all_df = pd.concat([inst_ownership_all_df, inst_ownership_df])
        # print(f"Institutional ownership concatenated {ticker} # columns: \n{inst_ownership_all_df.columns}")
    else:
        print(f"No {ticker} institutional ownership reports: inst_ownership={inst_ownership}")

Total institutional ownership reports for AAT: 51
No APGE institutional ownership reports: inst_ownership=[]
Total institutional ownership reports for MLNK: 9
Total institutional ownership reports for VIAV: 34
Total institutional ownership reports for QTRX: 24
Total institutional ownership reports for SKIN: 11
No ATMU institutional ownership reports: inst_ownership=[]
Total institutional ownership reports for CMCSA: 84
Total institutional ownership reports for LSPD: 10
Total institutional ownership reports for OMER: 56
Total institutional ownership reports for EU: 7
Total institutional ownership reports for NEWT: 37
Total institutional ownership reports for MWA: 70
Total institutional ownership reports for CEVA: 80
Total institutional ownership reports for EMBC: 8
Total institutional ownership reports for BSVN: 66
Total institutional ownership reports for TBBK: 76
Total institutional ownership reports for FOSL: 54
Total institutional ownership reports for INSM: 51
Total institutional o

ERROR:root:Connection to https://financialmodelingprep.com/api/v4/institutional-ownership/symbol-ownership failed:  DNS failure, refused connection or some other connection related issue.


Total institutional ownership reports for GPRO: 39
No IPI institutional ownership reports: inst_ownership=None
Total institutional ownership reports for INTC: 106
Total institutional ownership reports for BOX: 36
Total institutional ownership reports for SNPS: 100
Total institutional ownership reports for TILE: 46
Total institutional ownership reports for PRG: 19
Total institutional ownership reports for PRM: 8
Total institutional ownership reports for COMM: 40
Total institutional ownership reports for RBCAA: 98
Total institutional ownership reports for SNPO: 9
Total institutional ownership reports for VRA: 52
Total institutional ownership reports for SANA: 11
Total institutional ownership reports for PEPG: 6
Total institutional ownership reports for OPEN: 19
Total institutional ownership reports for HDSN: 75
Total institutional ownership reports for LAND: 43
Total institutional ownership reports for IRM: 39
Total institutional ownership reports for CSTM: 19
Total institutional ownersh

ERROR:root:Connection to https://financialmodelingprep.com/api/v4/institutional-ownership/symbol-ownership failed:  DNS failure, refused connection or some other connection related issue.


Total institutional ownership reports for INFU: 54
No WASH institutional ownership reports: inst_ownership=None
Total institutional ownership reports for EVBG: 29
Total institutional ownership reports for AMPX: 5
Total institutional ownership reports for BHLB: 93
Total institutional ownership reports for KMI: 53
Total institutional ownership reports for PTLO: 8
Total institutional ownership reports for AFRM: 19
Total institutional ownership reports for CAD: 5
Total institutional ownership reports for EWTX: 11
Total institutional ownership reports for BHVN: 26
Total institutional ownership reports for UHG: 1
Total institutional ownership reports for EPM: 68
Total institutional ownership reports for RKLB: 9
Total institutional ownership reports for VRNT: 86
Total institutional ownership reports for ATNI: 30
Total institutional ownership reports for THRD: 5
Total institutional ownership reports for BRO: 99
Total institutional ownership reports for INTU: 99
Total institutional ownership re

ERROR:root:Connection to https://financialmodelingprep.com/api/v4/institutional-ownership/symbol-ownership failed:  DNS failure, refused connection or some other connection related issue.


Total institutional ownership reports for JBHT: 99
No CTLT institutional ownership reports: inst_ownership=None
Total institutional ownership reports for MBUU: 39
Total institutional ownership reports for TGT: 101
Total institutional ownership reports for DK: 31
Total institutional ownership reports for ALXO: 13
Total institutional ownership reports for LUV: 100
Total institutional ownership reports for NEM: 100
Total institutional ownership reports for EXFY: 8
Total institutional ownership reports for CTSH: 99
Total institutional ownership reports for CLDX: 20
Total institutional ownership reports for BRY: 24
Total institutional ownership reports for AKRO: 18
Total institutional ownership reports for CE: 75
Total institutional ownership reports for NCLH: 43
Total institutional ownership reports for LTC: 99
Total institutional ownership reports for RY: 99
Total institutional ownership reports for BOH: 87
Total institutional ownership reports for EXP: 81
Total institutional ownership re

ERROR:root:Connection to https://financialmodelingprep.com/api/v4/institutional-ownership/symbol-ownership failed:  DNS failure, refused connection or some other connection related issue.


Total institutional ownership reports for GEF: 99
No DZSI institutional ownership reports: inst_ownership=None
Total institutional ownership reports for CENT: 99
Total institutional ownership reports for IMGN: 99
Total institutional ownership reports for CIVI: 9
Total institutional ownership reports for NTRA: 33
Total institutional ownership reports for AMSWA: 99
No GORV institutional ownership reports: inst_ownership=[]
Total institutional ownership reports for ASTS: 10
Total institutional ownership reports for RELY: 9
No ABBNY institutional ownership reports: inst_ownership=[]
Total institutional ownership reports for MKTW: 10
Total institutional ownership reports for ANET: 38
Total institutional ownership reports for CCRD: 91
Total institutional ownership reports for WTFC: 97
Total institutional ownership reports for ITGR: 30
Total institutional ownership reports for LTH: 8
Total institutional ownership reports for USCB: 8
Total institutional ownership reports for MAXN: 13
Total ins

ERROR:root:Connection to https://financialmodelingprep.com/api/v4/institutional-ownership/symbol-ownership failed:  DNS failure, refused connection or some other connection related issue.


Total institutional ownership reports for PFSI: 21
No MFC institutional ownership reports: inst_ownership=None
Total institutional ownership reports for PBFS: 17
Total institutional ownership reports for NPO: 86
Total institutional ownership reports for HLIT: 99
Total institutional ownership reports for GLBE: 10
Total institutional ownership reports for EB: 21
Total institutional ownership reports for AXSM: 32
Total institutional ownership reports for DIN: 62
Total institutional ownership reports for HCKT: 64
Total institutional ownership reports for AGM: 99
Total institutional ownership reports for NSC: 100
Total institutional ownership reports for ADP: 101
Total institutional ownership reports for NRIM: 88
Total institutional ownership reports for ZURA: 1
Total institutional ownership reports for WLLAW: 4
Total institutional ownership reports for MAS: 100
Total institutional ownership reports for NKLA: 19
Total institutional ownership reports for CBAN: 67
Total institutional ownershi

In [101]:
inst_own_df = inst_ownership_all_df.copy()

In [111]:
inst_own_df

Unnamed: 0_level_0,Unnamed: 1_level_0,cik,investorsHolding,lastInvestorsHolding,investorsHoldingChange,numberOf13Fshares,lastNumberOf13Fshares,numberOf13FsharesChange,totalInvested,lastTotalInvested,totalInvestedChange,...,reducedPositionsChange,totalCalls,lastTotalCalls,totalCallsChange,totalPuts,lastTotalPuts,totalPutsChange,putCallRatio,lastPutCallRatio,putCallRatioChange
symbol,date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
AAT,2023-09-30,1500217,179,173,6,53143863,54106944,-963081,1.031812e+09,1.686976e+09,-655164822,...,8,51500.0,12300.0,39200,1500.0,9000.0,-7500,0.0291,0.7317,-70.2581
AAT,2023-06-30,1500217,173,190,-17,54106944,55578800,-1471856,1.686976e+09,1.962058e+09,-275081870,...,18,12300.0,0.0,12300,9000.0,0.0,9000,0.7317,0.0000,73.1707
AAT,2023-03-31,1500217,190,189,1,55578800,56689571,-1110771,1.962058e+09,1.505419e+09,456639094,...,-3,0.0,0.0,0,0.0,0.0,0,0.0000,0.0000,0.0000
AAT,2022-12-31,1500217,189,181,8,56689571,56532348,157223,1.505419e+09,1.454094e+09,51324839,...,-17,0.0,0.0,0,0.0,0.0,0,0.0000,0.0000,0.0000
AAT,2022-09-30,1500217,181,199,-18,56532348,57748945,-1216597,1.454094e+09,1.715183e+09,-261088756,...,6,0.0,0.0,0,0.0,0.0,0,0.0000,0.0000,0.0000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
CYBR,2015-06-30,1598110,157,97,60,14394181,9779375,4614806,9.032510e+08,5.429883e+08,360262723,...,7,314100.0,97900.0,216200,399970.0,193754.0,206216,1.2734,1.9791,-70.5717
CYBR,2015-03-31,1598110,97,56,41,9779375,7711977,2067398,5.429883e+08,3.056906e+08,237297670,...,11,97900.0,10000.0,87900,193754.0,6600.0,187154,1.9791,0.6600,131.9101
CYBR,2014-12-31,1598110,56,44,12,7711977,7571273,140704,3.056906e+08,2.423507e+08,63339941,...,5,10000.0,0.0,10000,6600.0,0.0,6600,0.6600,0.0000,66.0000
CYBR,2014-09-30,1598110,44,0,44,7571273,0,7571273,2.423507e+08,0.000000e+00,242350682,...,0,0.0,0.0,0,0.0,0.0,0,0.0000,0.0000,0.0000


In [43]:
# prevent parquet serialization issues
inst_own_df["totalInvestedChange"] = pd.to_numeric(inst_own_df["totalInvestedChange"], dtype_backend="pyarrow", downcast = 'integer')

In [86]:
def find_bad_cell():
    for index, row in inst_own_df.iterrows():
        try:
            x = row["totalPutsChange"]
            assert isinstance(x, int)
        except Exception as e:
            print(f"Unable to convert to numeric type value:({x}), type({type(x)}), index({index}, \nerror:{e}\nrow:{row})")
            break

In [87]:
find_bad_cell()

In [46]:
type(inst_own_df["totalInvestedChange"][0])

int

In [102]:
# clean up bad data from the third party source feed
inst_own_df["totalInvestedChange"] = inst_own_df["totalInvestedChange"].astype("float64")
inst_own_df["totalInvestedChange"] = inst_own_df["totalInvestedChange"].astype("int64")


In [103]:
inst_own_df["cik"] = inst_own_df["cik"].replace('', -1)
inst_own_df["cik"] = inst_own_df["cik"].astype("int64")


In [104]:
inst_own_df["totalPutsChange"] = inst_own_df["totalPutsChange"].astype("float64")
inst_own_df["totalPutsChange"] = inst_own_df["totalPutsChange"].astype("int64")
# inst_own_df["totalPutsChange"] = pd.to_numeric(inst_own_df["totalPutsChange"], dtype_backend="pyarrow", downcast = 'integer')

In [107]:
inst_own_df["totalCallsChange"] = inst_own_df["totalCallsChange"].astype("float64")
inst_own_df["totalCallsChange"] = inst_own_df["totalCallsChange"].astype("int64")


In [108]:
inst_own_df.dtypes

cik                           int64
investorsHolding              int64
lastInvestorsHolding          int64
investorsHoldingChange        int64
numberOf13Fshares             int64
lastNumberOf13Fshares         int64
numberOf13FsharesChange       int64
totalInvested               float64
lastTotalInvested           float64
totalInvestedChange           int64
ownershipPercent            float64
lastOwnershipPercent        float64
ownershipPercentChange      float64
newPositions                  int64
lastNewPositions              int64
newPositionsChange            int64
increasedPositions            int64
lastIncreasedPositions        int64
increasedPositionsChange      int64
closedPositions               int64
lastClosedPositions           int64
closedPositionsChange         int64
reducedPositions              int64
lastReducedPositions          int64
reducedPositionsChange        int64
totalCalls                  float64
lastTotalCalls              float64
totalCallsChange            

In [124]:
# inst_ownership_file = 'data/institutional_symbol_ownership.csv.bz2'
# inst_ownership_all_df.to_csv(inst_ownership_file)

In [77]:
inst_own_df = inst_own_df.sort_index()
inst_own_df

Unnamed: 0_level_0,Unnamed: 1_level_0,cik,investorsHolding,lastInvestorsHolding,investorsHoldingChange,numberOf13Fshares,lastNumberOf13Fshares,numberOf13FsharesChange,totalInvested,lastTotalInvested,totalInvestedChange,...,reducedPositionsChange,totalCalls,lastTotalCalls,totalCallsChange,totalPuts,lastTotalPuts,totalPutsChange,putCallRatio,lastPutCallRatio,putCallRatioChange
symbol,date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
A,1999-12-31,1090872,15,0,15,16350001,0,16350001,1259479.0,0.0,1259479,...,0,200.0,0.0,200,0.0,0.0,0,0.0000,0.0000,0.0000
A,2000-03-31,1090872,19,15,4,19655547,16350001,3305546,10596271.0,1259479.0,9336792,...,3,0.0,200.0,-200,1000.0,0.0,1000,0.0000,0.0000,0.0000
A,2000-06-30,1090872,99,19,80,61180271,19655547,41524724,19609932.0,10596271.0,9013661,...,0,500.0,0.0,500,1700.0,1000.0,700,3.4000,0.0000,340.0000
A,2000-09-30,1090872,77,99,-22,60125149,61180271,-1055122,5746041.0,19609932.0,-13863891,...,25,85000.0,500.0,84500,20000.0,1700.0,18300,0.2353,3.4000,-316.4706
A,2000-12-31,1090872,81,77,4,36801893,60125149,-23323256,5956761.0,5746041.0,210720,...,4,85000.0,85000.0,0,20200.0,20000.0,200,0.2376,0.2353,0.2353
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ZYXI,2022-09-30,846475,115,103,12,10539839,11010023,-470184,96250915.0,87843124.0,8407791,...,-7,18000.0,29510.0,-11510,14200.0,10730.0,3470,0.7889,0.3636,42.5283
ZYXI,2022-12-31,846475,129,115,14,10286348,10539839,-253491,268096710.0,96250915.0,171845795,...,15,22500.0,18000.0,4500,2200.0,14200.0,-12000,0.0978,0.7889,-69.1111
ZYXI,2023-03-31,846475,129,129,0,10881276,10286348,594928,130575312.0,268096710.0,-137521398,...,-11,0.0,22500.0,-22500,333200.0,2200.0,331000,0.0000,0.0978,-9.7778
ZYXI,2023-06-30,846475,147,129,18,12911045,10881276,2029769,170590938.0,130575312.0,40015626,...,-3,1400.0,0.0,1400,17000.0,333200.0,-316200,12.1429,0.0000,1214.2857


In [78]:
inst_own_df.index.names = ["Symbol", "Date"]
inst_own_df.index.names

FrozenList(['Symbol', 'Date'])

In [72]:
inst_ownership_file = 'data/data-3rd-party/institutional_symbol_ownership.parquet'


In [79]:
inst_own_df.to_parquet(inst_ownership_file, engine="pyarrow")

In [73]:
tmp_int_own_df = pd.read_parquet(inst_ownership_file)
tmp_int_own_df

Unnamed: 0_level_0,Unnamed: 1_level_0,cik,investorsHolding,lastInvestorsHolding,investorsHoldingChange,numberOf13Fshares,lastNumberOf13Fshares,numberOf13FsharesChange,totalInvested,lastTotalInvested,totalInvestedChange,...,reducedPositionsChange,totalCalls,lastTotalCalls,totalCallsChange,totalPuts,lastTotalPuts,totalPutsChange,putCallRatio,lastPutCallRatio,putCallRatioChange
symbol,date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
AAT,2023-09-30,1500217,179,173,6,53143863,54106944,-963081,1.031812e+09,1.686976e+09,-655164822,...,8,51500.0,12300.0,39200,1500.0,9000.0,-7500,0.0291,0.7317,-70.2581
AAT,2023-06-30,1500217,173,190,-17,54106944,55578800,-1471856,1.686976e+09,1.962058e+09,-275081870,...,18,12300.0,0.0,12300,9000.0,0.0,9000,0.7317,0.0000,73.1707
AAT,2023-03-31,1500217,190,189,1,55578800,56689571,-1110771,1.962058e+09,1.505419e+09,456639094,...,-3,0.0,0.0,0,0.0,0.0,0,0.0000,0.0000,0.0000
AAT,2022-12-31,1500217,189,181,8,56689571,56532348,157223,1.505419e+09,1.454094e+09,51324839,...,-17,0.0,0.0,0,0.0,0.0,0,0.0000,0.0000,0.0000
AAT,2022-09-30,1500217,181,199,-18,56532348,57748945,-1216597,1.454094e+09,1.715183e+09,-261088756,...,6,0.0,0.0,0,0.0,0.0,0,0.0000,0.0000,0.0000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
CYBR,2015-06-30,1598110,157,97,60,14394181,9779375,4614806,9.032510e+08,5.429883e+08,360262723,...,7,314100.0,97900.0,216200,399970.0,193754.0,206216,1.2734,1.9791,-70.5717
CYBR,2015-03-31,1598110,97,56,41,9779375,7711977,2067398,5.429883e+08,3.056906e+08,237297670,...,11,97900.0,10000.0,87900,193754.0,6600.0,187154,1.9791,0.6600,131.9101
CYBR,2014-12-31,1598110,56,44,12,7711977,7571273,140704,3.056906e+08,2.423507e+08,63339941,...,5,10000.0,0.0,10000,6600.0,0.0,6600,0.6600,0.0000,66.0000
CYBR,2014-09-30,1598110,44,0,44,7571273,0,7571273,2.423507e+08,0.000000e+00,242350682,...,0,0.0,0.0,0,0.0,0.0,0,0.0000,0.0000,0.0000


## Prepare forward looking analyst estimates to be used as future covariates

In [115]:
DEFAULT_LIMIT=-1
import typing
from fmpsdk.url_methods import __return_json_v3, __validate_period


def analyst_estimates(
    apikey: str,
    symbol: str,
    period: str = "annual",
    limit: int = DEFAULT_LIMIT
) -> typing.Optional[typing.List[typing.Dict]]:
    """
    Query FMP /analyst-estimates/ API.

    :param apikey: Your API key.
    :param symbol: Company ticker.
    :param period: 'annual' or 'quarter'
    :param limit: Number of rows to return.
    :return: A list of dictionaries.
    """
    path = f"/analyst-estimates/{symbol}"
    query_vars = {
        "apikey": apikey,
        "symbol": symbol,
        "period": __validate_period(value=period),
        "limit": limit,
    }
    return __return_json_v3(path=path, query_vars=query_vars)


In [116]:
def fetch_estimates(period=None):
    assert period in ['quarter', 'annual']
    estimates_all_df = pd.DataFrame()
    for ticker in stocks_ticker_set: # ['ALTR']:
        est = analyst_estimates(apikey=FMP_API_KEY, symbol=ticker, period=period, limit=-1)
        # print('est:', est)
        if est is not None and len(est) > 0:
            est_df = pd.DataFrame(est)
            est_df['date'] = pd.to_datetime(est_df['date'])
            est_df = est_df.set_index(['symbol', 'date'])
            # print(f"Analyst estimates for {ticker} sample: \n{est_df.columns}")
            estimates_all_df = pd.concat([estimates_all_df, est_df])
            # print(f"Key metrics concatenated {ticker}: \n{estimates_all_df.columns}")
            n_est = len(est_df)
            print(f"{n_est} total {ticker} {period} analyst estimates reports")
        else:
            print(f"No {ticker} {period} analyst estimates reports: est={est}")

    return estimates_all_df


In [117]:
# 'TW' in stocks_ticker_set

In [85]:
for p in ['annual', 'quarter']:
    est_file_name= f'data/data-3rd-party/analyst_estimates_{p}.parquet'
    estimates_all_df = fetch_estimates(p)
    estimates_all_df = estimates_all_df.sort_index()
    estimates_all_df.index.names = ["Symbol", "Date"]
    # est_file_name= f'data/analyst_estimates_{p}.csv.bz2'
    # estimates_all_df.to_csv(est_file_name)
    estimates_all_df.to_parquet(est_file_name)
    print(f'all {p} estimates count:', len(estimates_all_df.index))


all annual estimates count: 43626
all quarter estimates count: 149305


In [86]:
tmp_est_dict = {}
for p in ['annual', 'quarter']:
    est_file_name= f'data/data-3rd-party/analyst_estimates_{p}.parquet'
    tmp_est_dict[p] = pd.read_parquet(est_file_name)
    print(f'all {p} estimates count:', len(tmp_est_dict[p]))


all annual estimates count: 43626
all quarter estimates count: 149305


In [89]:
estimates_all_df = tmp_est_dict['quarter']

In [90]:

estimates_all_df

Unnamed: 0_level_0,Unnamed: 1_level_0,estimatedRevenueLow,estimatedRevenueHigh,estimatedRevenueAvg,estimatedEbitdaLow,estimatedEbitdaHigh,estimatedEbitdaAvg,estimatedEbitLow,estimatedEbitHigh,estimatedEbitAvg,estimatedNetIncomeLow,estimatedNetIncomeHigh,estimatedNetIncomeAvg,estimatedSgaExpenseLow,estimatedSgaExpenseHigh,estimatedSgaExpenseAvg,estimatedEpsAvg,estimatedEpsHigh,estimatedEpsLow,numberAnalystEstimatedRevenue,numberAnalystsEstimatedEps
Symbol,Date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
A,2000-01-31,1233480090,1469878657,1357400000,314775889.0,375103226.0,346399423.0,246185043.0,293366828.0,270917691.0,59746350.0,71180550.0,64900000.0,330042814.0,393296083.0,363200119.0,0.22000,0.24129,0.20253,3.0,6.0
A,2000-05-01,1779613532,2120679507,1958400000,151709090.0,227563636.0,189636363.0,87054545.0,130581818.0,108818181.0,66690909.0,100036363.0,83363636.0,318181818.0,477272727.0,397727272.0,0.32000,0.35097,0.29459,3.0,9.0
A,2000-07-31,2280011634,2716979761,2509069921,260275000.0,390412500.0,325343750.0,155150000.0,232725000.0,193937500.0,120350000.0,180525000.0,150437500.0,517650000.0,776475000.0,647062500.0,0.20000,0.21935,0.18412,3.0,4.0
A,2000-10-31,1986031577,2366657923,2185555555,287496396.0,431244594.0,359370495.0,157224591.0,235836887.0,196530739.0,116046722.0,174070083.0,145058402.0,578736234.0,868104352.0,723420293.0,0.54000,0.59226,0.49712,3.0,4.0
A,2001-01-31,2056619769,2450774463,2263235294,320444444.0,480666666.0,400555555.0,237481481.0,356222222.0,296851851.0,158148148.0,237222222.0,197685185.0,429333333.0,644000000.0,536666666.0,0.45000,0.49355,0.41427,2.0,8.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ZYXI,2023-09-30,49500000,50120000,49807000,5790798.0,8686197.0,7238497.0,5392057.0,8088085.0,6740071.0,4092628.0,6138942.0,5115785.0,13338072.0,20007108.0,16672590.0,0.09300,0.10000,0.09000,3.0,3.0
ZYXI,2023-12-30,55800000,58570000,57190000,10024172.0,15036258.0,12530215.0,9532960.0,14299441.0,11916200.0,7317983.0,10976974.0,9147478.0,17648237.0,26472355.0,22060296.0,0.20000,0.23000,0.18000,2.0,2.0
ZYXI,2024-03-30,37911313,56866969,47389141,3457792.0,5186689.0,4322240.0,2324709.0,3487064.0,2905886.0,1679498.0,2519248.0,2099373.0,27145197.0,40717796.0,33931496.0,0.17000,0.19550,0.15300,1.0,1.0
ZYXI,2024-06-30,78273738,96420408,87351250,16148074.0,19891778.0,18020788.0,14784246.0,18211766.0,16498795.0,43624339.0,55742311.0,48471528.0,46568614.0,57364896.0,51969240.0,1.34259,1.54398,1.20833,0.0,0.0


## Upload raw data to hf hub

In [7]:
import os
from pathlib import Path
from dotenv import load_dotenv
from huggingface_hub import snapshot_download, upload_folder, create_repo
from canswim.hfhub import HFHub

# prefix for HF Hub dataset repo
repo_id = "ivelin/canswim"
private=True

load_dotenv(override=True)

HF_TOKEN=os.getenv("HF_TOKEN")

print(f'HF_TOKEN={HF_TOKEN!= None}')

HF_TOKEN=True


In [8]:
# Create repo if not existing yet
repo_info = create_repo(
    repo_id=repo_id, repo_type="dataset", private=private, exist_ok=True, token=HF_TOKEN
)
print(f"repo_info: ", repo_info)
data_path = Path("data")
upload_folder(
    repo_id=repo_id,
    # path_in_repo="data-3rd-party",
    repo_type="dataset",
    folder_path=data_path,
    token=HF_TOKEN,
)

repo_info:  https://huggingface.co/datasets/ivelin/canswim


sectors.parquet:   0%|          | 0.00/3.12M [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/datasets/ivelin/canswim/commit/6d3a1c9c577bd79abf642d4e1b4f606d1ab06e43', commit_message='Upload folder using huggingface_hub', commit_description='', oid='6d3a1c9c577bd79abf642d4e1b4f606d1ab06e43', pr_url=None, pr_revision=None, pr_num=None)