# Setup

In [1]:
import yfinance as yf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from openbb import obb
import financedatabase as fd
from financetoolkit import Toolkit
from utils.api_keys import API_KEYS

# Tiingo

In [None]:
from tiingo import TiingoClient

config = {}

# To reuse the same HTTP Session across API calls (and have better performance), include a session key.
config['session'] = True

# If you don't have your API key as an environment variable,
# pass it in via a configuration dictionary.
config['api_key'] = API_KEYS["tiingo"]

# Initialize
client = TiingoClient(config)

In [None]:
index_list = pd.read_csv("/Users/omar/Documents/WQU-Capstone/data/sample_cluster_Index_data.csv")
index_list = index_list["Index"]
index_list = index_list.tolist()


In [None]:
def get_tiingo_data(ticker, metric_name='adjClose', start_date, end_date), frequency="daily":
    """
    Fetch historical data from Tiingo API.
    """
    try:
        data = client.get_dataframe(ticker, startDate=start_date, endDate=end_date)
        data.reset_index(inplace=True)
        data.rename(columns={"date": "Date"}, inplace=True)
        data.set_index("Date", inplace=True)
        return data
    except Exception as e:
        print(f"Error fetching data for {ticker}: {e}")
        return None

In [None]:
start_date = "2020-01-01"
end_date = "2024-12-31"
frequency = "daily"

# Initialize an empty DataFrame
tiingo_df = pd.DataFrame()

# Fetch data ticker by ticker
errorcounter = 0
for ticker in index_list:
    print(f"Fetching data for {ticker}...")
    try:
        # Fetch data for the current ticker
        ticker_data = client.get_dataframe(
            ticker,
            startDate=start_date,
            endDate=end_date,
            frequency=frequency,
            fmt='json'
        )
        
        # Add a column for the ticker symbol
        ticker_data['Ticker'] = ticker
        
        # Append the data to the main DataFrame
        tiingo_df = pd.concat([tiingo_df, ticker_data])
    except Exception as e:
        errorcounter += 1
        print(f"Error fetching data for {ticker}: {e}")
print(f"Total errors encountered: {errorcounter}")
print(f"Total tickers loaded: {len(index_list) - errorcounter}")
# Reset the index for the final DataFrame
tiingo_df.reset_index(inplace=True)

# Display the resulting DataFrame
print(tiingo_df)

Fetching data for VXUS...
Fetching data for BND...
Fetching data for QQQ...
Fetching data for BNDX...
Fetching data for VTIP...
Fetching data for VCIT...
Fetching data for TLT...
Fetching data for VCSH...
Fetching data for VGIT...
Fetching data for MBB...
Fetching data for IEF...
Fetching data for IUSB...
Fetching data for VONG...
Fetching data for VGSH...
Fetching data for SHY...
Fetching data for IGSB...
Fetching data for SHV...
Fetching data for TQQQ...
Fetching data for SMH...
Fetching data for ACWI...
Fetching data for IEI...
Fetching data for IGIB...
Fetching data for VCLT...
Fetching data for VGLT...
Fetching data for PFF...
Fetching data for EMB...
Fetching data for RDVY...
Fetching data for USIG...
Fetching data for SOXX...
Fetching data for SCZ...
Fetching data for QYLD...
Fetching data for CIBR...
Fetching data for SDVY...
Fetching data for FTSM...
Fetching data for MCHI...
Fetching data for IBB...
Fetching data for ACWX...
Fetching data for PDBC...
Fetching data for KBWB...

In [None]:
print(tiingo_df.head())

                       date  close   high    low   open   volume   adjClose  \
0 2020-01-02 00:00:00+00:00  56.31  56.34  56.08  56.17  2017952  48.193664   
1 2020-01-03 00:00:00+00:00  55.62  55.90  55.56  55.59  2140248  47.603119   
2 2020-01-06 00:00:00+00:00  55.70  55.73  55.40  55.42  2119875  47.671588   
3 2020-01-07 00:00:00+00:00  55.60  55.74  55.56  55.74  1987978  47.586001   
4 2020-01-08 00:00:00+00:00  55.70  55.94  55.54  55.59  1824412  47.671588   

     adjHigh     adjLow    adjOpen  adjVolume  divCash  splitFactor Ticker  
0  48.219340  47.996816  48.073843    2017952      0.0          1.0   VXUS  
1  47.842760  47.551767  47.577443    2140248      0.0          1.0   VXUS  
2  47.697264  47.414829  47.431946    2119875      0.0          1.0   VXUS  
3  47.705822  47.551767  47.705822    1987978      0.0          1.0   VXUS  
4  47.876995  47.534650  47.577443    1824412      0.0          1.0   VXUS  


In [None]:
tiingo_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 55352 entries, 0 to 55351
Data columns (total 14 columns):
 #   Column       Non-Null Count  Dtype              
---  ------       --------------  -----              
 0   date         55352 non-null  datetime64[ns, UTC]
 1   close        55352 non-null  float64            
 2   high         55352 non-null  float64            
 3   low          55352 non-null  float64            
 4   open         55352 non-null  float64            
 5   volume       55352 non-null  int64              
 6   adjClose     55352 non-null  float64            
 7   adjHigh      55352 non-null  float64            
 8   adjLow       55352 non-null  float64            
 9   adjOpen      55352 non-null  float64            
 10  adjVolume    55352 non-null  int64              
 11  divCash      55352 non-null  float64            
 12  splitFactor  55352 non-null  float64            
 13  Ticker       55352 non-null  object             
dtypes: datetime64[ns, UTC]

In [16]:
tiingo_df.to_csv("/Users/omar/Documents/WQU-Capstone/data/tiingo_data.csv", index=False)

In [17]:
# Build df with all the tickers as columns and dates as index with only the adjusted close price
tiingo_df_adj_close = tiingo_df[['date', 'Ticker', 'adjClose']]
tiingo_df_adj_close = tiingo_df_adj_close.pivot(index='date', columns='Ticker', values='adjClose')
tiingo_df_adj_close.reset_index(inplace=True)
tiingo_df_adj_close.set_index('date', inplace=True)
tiingo_df_adj_close = tiingo_df_adj_close.astype(float)
# Display the resulting DataFrame
print(tiingo_df_adj_close.head())

Ticker                          ACWI       ACWX       ANGL        BND  \
date                                                                    
2020-01-02 00:00:00+00:00  73.339969  43.517758  22.859068  72.502058   
2020-01-03 00:00:00+00:00  72.707965  42.957594  22.904924  72.761087   
2020-01-06 00:00:00+00:00  72.872836  43.062625  22.843783  72.674744   
2020-01-07 00:00:00+00:00  72.689646  42.975099  22.889639  72.614304   
2020-01-08 00:00:00+00:00  73.046866  43.088883  22.889639  72.502058   

Ticker                          BNDX       BSCR       CIBR        EMB  \
date                                                                    
2020-01-02 00:00:00+00:00  48.633984  17.742726  29.429946  89.863443   
2020-01-03 00:00:00+00:00  48.771150  17.806168  29.507266  89.636238   
2020-01-06 00:00:00+00:00  48.736859  17.797709  29.869704  89.456041   
2020-01-07 00:00:00+00:00  48.702567  17.759644  29.893867  89.597064   
2020-01-08 00:00:00+00:00  48.608265  17.755414  3

# Building the Asset Universe

## Defining OpenBB Data Providers

In [None]:
# OBB Data Providers list:
#-------------------------#
obb_data_providers = [
    "alphavantage",
    "benzinga",
    "biztoc",
    "fmp",
    "fred",
    "nasdaq",
    "polygon",
    "tiingo",
    "yfinance"
]
#-------------------------#
# OBB interval list:
obb_intervals = []
    
#defining the function to get data from OBB
def get_obb_data(ticker, provider, start_date=None, end_date=None, interval=None):
    if provider not in obb_data_providers:
        print(f"Error: provider should be in {obb_data_providers}")
        return None
    try:
        data = obb.equity.price.historical(
            ticker, 
            provider=provider, 
            start_date=start_date, 
            end_date=end_date, 
            interval=interval
        ).to_df()
        return data
    except Exception as e:
        print(f"Error fetching data from {provider}: {e}")
        return None


Testing `OpenBB`to import AAPL data

In [None]:
apple_data = obb.equity.price.historical("AAPL", provider="yfinance").to_df()

## Testing Finance DataBase Package

In [None]:
# Load FMP API key from API_KEYS dictionary
fmp_api_key = API_KEYS["fmp"]

### FD Equities

In [None]:
# Initialize the Equities database
equities = fd.Equities()

# Build Asset Universe based on sector and market cap
# Sector: Information Technology (IT)
# Market Cap: focus on Mid Cap stocks
asset_universe_equities = equities.select(
    sector="Information Technology",
    market_cap=["Mid Cap"],
)

In [None]:
# Show number of assets 
asset_universe_equities.groupby("exchange").size().sort_values(ascending=False).head(25)

Let's pick only top 25 countries from above:

In [None]:
# TOP 25 exchanges
top25_equities_exchanges = asset_universe_equities.groupby("exchange").size().sort_values(ascending=False).head(25).index.tolist()
top25_equities_exchanges

In [None]:
asset_universe_equities = asset_universe_equities[asset_universe_equities["exchange"].isin(toop25_equities_exchanges)]
asset_universe_equities

List echanges:

In [None]:
# Show number of assets by market
display(asset_universe_equities.groupby("market").size().sort_values(ascending=False))
# Print the number of assets in the universe
print(f"Number of equities in the universe: {len(asset_universe_equities)}")

### FD ETFs

In [None]:
etfs = fd.ETFs()
asset_universe_etfs = etfs.select(category_group="Information Technology")
# Show number of ETFs by market
display(asset_universe_etfs.groupby("exchange").size().sort_values(ascending=False))

In [None]:
# TOP 5 exchanges
top25_ETFS_exchanges = asset_universe_etfs.groupby("exchange").size().sort_values(ascending=False).head(5).index.tolist()
# Select only the top 25 exchanges
asset_universe_etfs = asset_universe_etfs[asset_universe_etfs["exchange"].isin(top25_ETFS_exchanges)]
# Show number of ETFs by market
display(asset_universe_etfs.groupby("exchange").size().sort_values(ascending=False))
# Print the number of ETFs in the universe
print(f"Number of ETFs in the universe: {len(asset_universe_etfs)}")


In [None]:
asset_universe_etfs

In [None]:
# Save Top5 ETFs Symbols in list
etf_symbols = asset_universe_etfs.index.tolist()
etf_symbols

In [None]:
#Toolkit
ETFs = Toolkit(
    etf_symbols[:20], api_key=API_KEYS["fmp"], start_date="2020-01-01"
)

In [None]:
display(ETFs.get_historical_data())

### FD Funds

In [None]:
# Initialize Funds database
funds = fd.Funds()
asset_universe_funds = funds.select(category_group="Information Technology")
# Pick only the top 25 exchanges
asset_universe_funds = asset_universe_funds[asset_universe_funds["exchange"].isin(asset_universe_exchanges)]    
# Show number of funds by market
display(asset_universe_funds.groupby("exchange").size().sort_values(ascending=False))
# Print the number of funds in the universe
print(f"Number of funds in the universe: {len(asset_universe_funds)}")

In [None]:
# display funds
asset_universe_funds

### FD Cryptos

In [None]:
# Initialize the Crypto database
cryptos = fd.Cryptos()
eth_cryptos = cryptos.select(
    cryptocurrency="ETH"
)
cryptos_toolkit = eth_cryptos.to_toolkit(
    api_key=fmp_api_key,
    start_date="2020-01-01"
)
cryptos_toolkit.get_historical_data(period="daily")

In [2]:
from tiingo import TiingoClient

config = {}

# To reuse the same HTTP Session across API calls (and have better performance), include a session key.
config['session'] = True

# If you don't have your API key as an environment variable,
# pass it in via a configuration dictionary.
config['api_key'] = API_KEYS["tiingo"]

# Initialize
client = TiingoClient(config)

In [3]:
index_list = pd.read_csv("/Users/omar/Documents/WQU-Capstone/data/sample_cluster_Index_data.csv")
index_list = index_list["Index"]
index_list = index_list.tolist()


In [4]:
# load data from Tiingo
def get_tiingo_data(ticker, metric_name='adjClose', start_date=None, end_date=None, frequency="daily"):
    try:
        data = client.get_dataframe(
            ticker,
            metric_name =metric_name,
            startDate=start_date,
            endDate=end_date,
            frequency=frequency,
        )
        return data
    except Exception as e:
        print(f"Error fetching data from Tiingo: {e}")
        return None

In [6]:
start_date = "2020-01-01"
end_date = "2024-12-31"
frequency = 'daily'

# Initialize an empty DataFrame
tiingo_df = pd.DataFrame()

# Fetch data ticker by ticker
errorcounter = 0
for ticker in index_list:
    print(f"Fetching data for {ticker}...")
    try:
        # Fetch data for the current ticker
        ticker_data = client.get_dataframe(
            ticker,
            startDate=start_date,
            endDate=end_date,
            frequency=frequency,
            fmt='json'
        )
        
        # Add a column for the ticker symbol
        ticker_data['Ticker'] = ticker
        
        # Append the data to the main DataFrame
        tiingo_df = pd.concat([tiingo_df, ticker_data])
    except Exception as e:
        errorcounter += 1
        print(f"Error fetching data for {ticker}: {e}")
print(f"Total errors encountered: {errorcounter}")
# Reset the index for the final DataFrame
tiingo_df.reset_index(inplace=True)

# Display the resulting DataFrame
print(tiingo_df)

Fetching data for VXUS...
Fetching data for BND...
Fetching data for QQQ...
Fetching data for BNDX...
Fetching data for VTIP...
Fetching data for VCIT...
Fetching data for TLT...
Fetching data for VCSH...
Fetching data for VGIT...
Fetching data for MBB...
Fetching data for IEF...
Fetching data for IUSB...
Fetching data for VONG...
Fetching data for VGSH...
Fetching data for SHY...
Fetching data for IGSB...
Fetching data for SHV...
Fetching data for TQQQ...
Fetching data for SMH...
Fetching data for ACWI...
Fetching data for IEI...
Fetching data for IGIB...
Fetching data for VCLT...
Fetching data for VGLT...
Fetching data for PFF...
Fetching data for EMB...
Fetching data for RDVY...
Fetching data for USIG...
Fetching data for SOXX...
Fetching data for SCZ...
Fetching data for QYLD...
Fetching data for CIBR...
Fetching data for SDVY...
Fetching data for FTSM...
Fetching data for MCHI...
Fetching data for IBB...
Fetching data for ACWX...
Fetching data for PDBC...
Fetching data for KBWB...

In [7]:
print(tiingo_df.head())

                       date  close   high    low   open   volume   adjClose  \
0 2020-01-02 00:00:00+00:00  56.31  56.34  56.08  56.17  2017952  48.193664   
1 2020-01-03 00:00:00+00:00  55.62  55.90  55.56  55.59  2140248  47.603119   
2 2020-01-06 00:00:00+00:00  55.70  55.73  55.40  55.42  2119875  47.671588   
3 2020-01-07 00:00:00+00:00  55.60  55.74  55.56  55.74  1987978  47.586001   
4 2020-01-08 00:00:00+00:00  55.70  55.94  55.54  55.59  1824412  47.671588   

     adjHigh     adjLow    adjOpen  adjVolume  divCash  splitFactor Ticker  
0  48.219340  47.996816  48.073843    2017952      0.0          1.0   VXUS  
1  47.842760  47.551767  47.577443    2140248      0.0          1.0   VXUS  
2  47.697264  47.414829  47.431946    2119875      0.0          1.0   VXUS  
3  47.705822  47.551767  47.705822    1987978      0.0          1.0   VXUS  
4  47.876995  47.534650  47.577443    1824412      0.0          1.0   VXUS  


In [15]:
tiingo_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 55352 entries, 0 to 55351
Data columns (total 14 columns):
 #   Column       Non-Null Count  Dtype              
---  ------       --------------  -----              
 0   date         55352 non-null  datetime64[ns, UTC]
 1   close        55352 non-null  float64            
 2   high         55352 non-null  float64            
 3   low          55352 non-null  float64            
 4   open         55352 non-null  float64            
 5   volume       55352 non-null  int64              
 6   adjClose     55352 non-null  float64            
 7   adjHigh      55352 non-null  float64            
 8   adjLow       55352 non-null  float64            
 9   adjOpen      55352 non-null  float64            
 10  adjVolume    55352 non-null  int64              
 11  divCash      55352 non-null  float64            
 12  splitFactor  55352 non-null  float64            
 13  Ticker       55352 non-null  object             
dtypes: datetime64[ns, UTC]