In [951]:
#Import dependencies
import streamlit as st
import datetime
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
import yfinance as yf
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from sklearn.impute import SimpleImputer

# Set display options for Pandas
pd.set_option('display.max_colwidth', None)
pd.set_option('expand_frame_repr', False)

In [952]:
# Function to fetch asset data using yfinance
def fetch_asset_data(api_pull, start_date, end_date):
    data_frames = [] # Initialize an empty list to store data frames

    # Iterate over asset classes and their corresponding tickers
    for asset_class, tickers in api_pull.items():
        for ticker in tickers:
            # Download historical data for each ticker within the specified date range
            historical_data = yf.download(ticker, start=start_date, end=end_date)
            historical_data['Asset Class'] = asset_class # Add a column indicating the asset class
            historical_data['Ticker'] = ticker # Add a column indicating the ticker
            data_frames.append(historical_data) # Append the data frame to the list

    # Concatenate all data frames in the list into a single data frame
    combined_data = pd.concat(data_frames)
    return combined_data

In [953]:
# Streamlit app
st.title("Portfolio Optimization")

DeltaGenerator(_root_container=0, _provided_cursor=None, _parent=None, _block_type=None, _form_data=None)

In [954]:
# Define tickers for different asset classes

# VAR1.1 1 with 10 individial tickers and commmodities for Jupyter lab 
# crypto_tickers = ['BTC-USD', 'ETH-USD', 'DOGE-USD', 'MATIC-USD', 'AVAX-USD', 'SOL-USD', 'TRX-USD', 'ATOM-USD', 'UNI7083-USD', 'LINK-USD']
# stocks_tickers = ['AMZN', 'AAPL', 'TSLA', 'GOOGL', 'NVDA', 'MSFT', 'TSM', 'META', 'XOM', 'LAC']
# commodities_tickers = ['GC=F', 'SI=F', 'CL=F', 'HG=F', 'LBS=F', 'ZS=F', 'GF=F', 'KE=F', 'CT=F', 'ZR=F']
# bonds_tickers = ['TLT', 'IEF', 'SHY', 'AGG', 'BND', 'LQD', 'HYG', 'MUB', 'TIP', 'EMB']

# VAR 1.2 with 20  individial tickers for Streamlit with individual stocks and bonds
# Prompt user to choose assets in asset classes
# crypto_selected = st.multiselect("Choose cryptocurrencies:", options=['AAVE-USD', 'ALGO-USD', 'BAT-USD', 'BCH-USD', 'BTC-USD', 'DAI-USD', 'ETH-USD', 'GRT-USD', 'LINK-USD', 'LTC-USD', 'MATIC-USD', 'MKR-USD', 'NEAR-USD', 'PAXG-USD', 'SHIB-USD', 'SOL-USD', 'TRX-USD', 'UNI-USD', 'USDT-USD ', 'WBTC-USD'])
# stocks_selected = st.multiselect("Choose stocks:", options=['AAPL', 'MSFT', 'AMZN', 'GOOGL', 'META', 'TSLA', 'JPM ', 'V', 'JNJ', 'UNH', 'PG', 'KO', 'XOM', 'CVX', 'BA', 'CAT', 'NVDA', 'NVDA','DIS', 'PFE'])
# commodities_selected = st.multiselect("Choose commodities:", options=['GLD', 'SLV', 'CL=F', 'HG=F', 'LBS=F', 'ZS=F', 'GF=F', 'KE=F', 'CT=F', 'ZR=F'])not supported on Alpaca
# bonds_selected = st.multiselect("Choose bonds:", options=['TLT', 'IEF', 'SHY', 'AGG', 'BND', 'LQD', 'HYG', 'MUB', 'TIP', 'EMB'])not supported on Alpaca

# VAR1.1 3 with with 20  individial tickers for Jupyter lab 
# crypto_tickers = ['AAVE-USD', 'ALGO-USD', 'BAT-USD', 'BCH-USD', 'BTC-USD', 'DAI-USD', 'ETH-USD', 'GRT-USD', 'LINK-USD', 'LTC-USD', 'MATIC-USD', 'MKR-USD', 'NEAR-USD', 'PAXG-USD', 'SHIB-USD', 'SOL-USD', 'TRX-USD', 'UNI-USD', 'USDT-USD ', 'WBTC-USD']
# stocks_tickers = ['AAPL', 'MSFT', 'AMZN', 'GOOGL', 'META', 'TSLA', 'JPM ', 'V', 'JNJ', 'UNH', 'PG', 'KO', 'XOM', 'CVX', 'BA', 'CAT', 'NVDA', 'NVDA','DIS', 'PFE']
# commodities_tickers = ['GC=F', 'SI=F', 'CL=F', 'HG=F', 'LBS=F', 'ZS=F', 'GF=F', 'KE=F', 'CT=F', 'ZR=F']not supported on Alpaca
# bonds_tickers = ['TLT', 'IEF', 'SHY', 'AGG', 'BND', 'LQD', 'HYG', 'MUB', 'TIP', 'EMB']not supported on Alpaca


# VAR 2.1 with 20 tickers and ETF's for Streamlit with crypto and ETF's dsupported USD pairs by Alpaca
# (USDT_USD is neded just for swaping or preserving purchasing power)
# Prompt user to choose assets in asset classes
# crypto_selected = st.multiselect("Choose cryptocurrencies:", options=['AAVE-USD', 'ALGO-USD', 'BAT-USD', 'BCH-USD', 'BTC-USD', 'DAI-USD', 'ETH-USD', 'GRT-USD', 'LINK-USD', 'LTC-USD', 'MATIC-USD', 'MKR-USD', 'NEAR-USD', 'PAXG-USD', 'SHIB-USD', 'SOL-USD', 'TRX-USD', 'UNI-USD', 'USDT-USD ', 'WBTC-USD'])
# stocks_selected = st.multiselect("Choose stocks:", options=['SPY', 'QQQ', 'IWM', 'VTI', 'VOO', 'VO', 'VB', 'VEA', 'VWO', 'XLF ', 'XLV', 'XLE', 'XLY', 'XLC', 'XLK', 'XLI', 'XLP', 'XLB', 'XLU', 'XLRE'])                                
# commodities_selected = st.multiselect("Choose commodities:", options=['GLD', 'SLV', 'USO', 'UNG', 'DBA', 'DBC', 'GSG', 'IAU ', 'PPLT', 'SIVR', 'MOO', 'NIB', 'JO', 'JJG', 'WEAT', 'UGA', 'DBE', 'HAP', 'REMX', 'OIL'])
# bonds_selected = st.multiselect("Choose bonds:", options=['AGG', 'BND', 'TLT', 'IEF', 'SHY', 'LQD', 'HYG', 'JNK', 'MUB', 'TIP', 'BNDX', 'EMB', 'VWOB', 'PFF', 'BKLN', 'FLOT', 'GSY', 'SCHO', 'SCHR', 'SCHZ'])                                    
                                      
# VAR 2.2 with 20 tickers and commmodities for Jupyter lab with crypto and ETF's dsupported USD pairs by Alpaca
# (USDT_USD is neded just for swaping or preserving purchasing power) 
# WITH REMOVED NAN TICKERS !!!
# Prompt user to choose assets in asset classes
crypto_tickers = ['AAVE-USD', 'ALGO-USD', 'BAT-USD', 'BCH-USD', 'BTC-USD', 'DAI-USD', 'ETH-USD', 'LINK-USD', 'LTC-USD', 'MATIC-USD', 'MKR-USD', 'NEAR-USD', 'PAXG-USD', 'SOL-USD', 'TRX-USD', 'USDT-USD ', 'WBTC-USD'] 
stocks_tickers = ['SPY', 'QQQ', 'IWM', 'VTI', 'VOO', 'VO', 'VB', 'VEA', 'VWO', 'XLF ', 'XLV', 'XLE', 'XLY', 'XLC', 'XLK', 'XLI', 'XLP', 'XLB', 'XLU', 'XLRE']
commodities_tickers = ['GLD', 'SLV', 'USO', 'UNG', 'DBA', 'DBC', 'GSG', 'IAU ', 'PPLT', 'SIVR', 'MOO', 'NIB', 'JO', 'JJG', 'WEAT', 'UGA', 'DBE', 'REMX', 'OIL']
bonds_tickers = ['AGG', 'BND', 'TLT', 'IEF', 'SHY', 'LQD', 'HYG', 'JNK', 'MUB', 'TIP', 'BNDX', 'EMB', 'VWOB', 'PFF', 'BKLN', 'FLOT', 'GSY', 'SCHO', 'SCHR', 'SCHZ']



Create a DataFrame with the tickers and their corresponding first trading dates.

In [955]:
# # Define lists of tickers for different asset classes
# crypto_tickers = ['AAVE-USD', 'ALGO-USD', 'BAT-USD', 'BCH-USD', 'BTC-USD', 'DAI-USD', 'ETH-USD', 'GRT-USD', 'LINK-USD', 'LTC-USD', 'MATIC-USD', 'MKR-USD', 'NEAR-USD', 'PAXG-USD', 'SHIB-USD', 'SOL-USD', 'TRX-USD', 'UNI-USD', 'USDT-USD', 'WBTC-USD']
# stocks_tickers = ['SPY', 'QQQ', 'IWM', 'VTI', 'VOO', 'VO', 'VB', 'VEA', 'VWO', 'XLF', 'XLV', 'XLE', 'XLY', 'XLC', 'XLK', 'XLI', 'XLP', 'XLB', 'XLU', 'XLRE']
# commodities_tickers = ['GLD', 'SLV', 'USO', 'UNG', 'DBA', 'DBC', 'GSG', 'IAU', 'PPLT', 'SIVR', 'MOO', 'NIB', 'JO', 'JJG', 'WEAT', 'UGA', 'DBE', 'HAP', 'REMX', 'OIL']
# bonds_tickers = ['AGG', 'BND', 'TLT', 'IEF', 'SHY', 'LQD', 'HYG', 'JNK', 'MUB', 'TIP', 'BNDX', 'EMB', 'VWOB', 'PFF', 'BKLN', 'FLOT', 'GSY', 'SCHO', 'SCHR', 'SCHZ']

# Combine all tickers into a single list
all_tickers = crypto_tickers + stocks_tickers + commodities_tickers + bonds_tickers

first_trading_dates = []

# Iterate through each ticker
for ticker in all_tickers:
    # Download the ticker's historical data
    data = yf.download(ticker, start='2000-01-01', end='2023-04-06', progress=False)
    # Extract the first trading date
    first_date = data.index[0].strftime('%Y-%m-%d')
    # Append the ticker and its first trading date to the list
    first_trading_dates.append((ticker, first_date))

# Create a Pandas DataFrame with the tickers and their corresponding first trading dates
df = pd.DataFrame(first_trading_dates, columns=['Ticker', 'First Trading Date'])









In [964]:
#Display whole df and sort the firts tradig date 
pd.options.display.max_rows = None
df = df.sort_values(by='First Trading Date', ascending=True)
df

Unnamed: 0,Ticker,First Trading Date
0,XLE,2000-01-03
8,XLU,2000-01-03
10,XLB,2000-01-03
7,XLP,2000-01-03
6,QQQ,2000-01-03
9,XLI,2000-01-03
4,XLY,2000-01-03
3,SPY,2000-01-03
1,XLK,2000-01-03
5,XLV,2000-01-03


In [965]:
# VAR 1 for Streamlit 
# Define the tickers for each asset class.
# api_pull = {'crypto': crypto_selected,
#             'stocks': stocks_selected,
#             'commodities': commodities_selected,
#             'bonds': bonds_selected}

# VAR 2 for Jupyter lab 
# Define the tickers for each asset class.
api_pull = {'crypto': crypto_tickers,
            'stocks': stocks_tickers,
            'commodities': commodities_tickers,
            'bonds': bonds_tickers}

In [966]:
# Prompt user to choose time period
st.write("Choose the analysis period:\n"
          "Note that you can only choose a period starting from Jan 1st, 2020!")

In [967]:
# FOR STREAMLIT
# # Get the current date
# today = datetime.datetime.now().date()

# # Set the earliest allowed start date to January 1st, 2020
# earliest_start_date = datetime.date(2020, 1, 1)

# # Get the user selected start and end dates
# selected_start_date = st.date_input("Select the start date", earliest_start_date)

# # If the selected start date is earlier than the earliest allowed start date, set it to the earliest start date
# if selected_start_date < earliest_start_date:
#     selected_start_date = earliest_start_date

# selected_end_date = st.date_input("Select the end date", today)

# # If the selected end date is later than the current date, set it to the current date
# if selected_end_date > today:
#     selected_end_date = today

# # If the selected end date is earlier than the selected start date, set the end date to the start date
# if selected_end_date < selected_start_date:
#     selected_end_date = selected_start_date


In [968]:
# FOR STREAMLIT
# #Call fetch_asset_data function to pull data from yfinance API
# data = fetch_asset_data(api_pull, selected_start_date, selected_end_date)

# #Display the fetched data
# st.write("Fetched Data:")
# st.dataframe(data)

# Define the time range for the data
selected_start_date = '2020-10-14'
selected_end_date = '2023-03-22'

data = fetch_asset_data(api_pull, selected_start_date, selected_end_date)

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%********

In [969]:
#Reset the index of a DataFrame and set a new index with multiple columns
data = data.reset_index().set_index(['Asset Class', 'Ticker','Date'])

In [970]:
display(data.head(30))
data.tail(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Open,High,Low,Close,Adj Close,Volume
Asset Class,Ticker,Date,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
crypto,AAVE-USD,2020-10-14,51.372898,57.853947,49.629528,51.316517,51.316517,3132405
crypto,AAVE-USD,2020-10-15,51.387611,52.080685,44.018326,44.019184,44.019184,5883146
crypto,AAVE-USD,2020-10-16,42.890606,43.906731,39.142956,39.495846,39.495846,52855205
crypto,AAVE-USD,2020-10-17,39.486191,43.077374,38.633614,41.366993,41.366993,43339687
crypto,AAVE-USD,2020-10-18,41.389599,41.749489,39.531586,40.746971,40.746971,40441163
crypto,AAVE-USD,2020-10-19,40.836758,40.995186,35.951363,36.012318,36.012318,49289897
crypto,AAVE-USD,2020-10-20,36.043785,36.319069,32.101101,32.180016,32.180016,60632326
crypto,AAVE-USD,2020-10-21,32.25349,37.078201,32.048149,35.160358,35.160358,43831235
crypto,AAVE-USD,2020-10-22,35.097488,39.856922,33.589462,38.031651,38.031651,54717366
crypto,AAVE-USD,2020-10-23,37.730595,42.546658,36.924671,41.686638,41.686638,76081860


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Open,High,Low,Close,Adj Close,Volume
Asset Class,Ticker,Date,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
bonds,SCHZ,2023-03-08,45.610001,45.700001,45.419998,45.48,45.357838,624100
bonds,SCHZ,2023-03-09,45.52,45.740002,45.509998,45.639999,45.51741,465500
bonds,SCHZ,2023-03-10,45.990002,46.290001,45.990002,46.18,46.055962,624200
bonds,SCHZ,2023-03-13,46.610001,46.91,46.419998,46.529999,46.405018,705600
bonds,SCHZ,2023-03-14,46.529999,46.529999,46.189999,46.290001,46.165665,535200
bonds,SCHZ,2023-03-15,46.720001,46.93,46.529999,46.740002,46.614456,603600
bonds,SCHZ,2023-03-16,46.950001,46.959999,46.459999,46.52,46.395046,992500
bonds,SCHZ,2023-03-17,46.709999,47.0,46.709999,46.869999,46.744106,989600
bonds,SCHZ,2023-03-20,46.959999,46.959999,46.630001,46.700001,46.574562,709300
bonds,SCHZ,2023-03-21,46.5,46.639999,46.48,46.540001,46.414993,987500


In [971]:
data.isna().sum()

Open         0
High         0
Low          0
Close        0
Adj Close    0
Volume       0
dtype: int64

In [972]:
# Check for 0s in the DataFrame
zero_count = np.count_nonzero(data.values == 0)
zero_count

0

In [973]:
# Check if any assset rows have value of 0
data[data==0].index.get_level_values(1).unique()

Index(['AAVE-USD', 'ALGO-USD', 'BAT-USD', 'BCH-USD', 'BTC-USD', 'DAI-USD',
       'ETH-USD', 'LINK-USD', 'LTC-USD', 'MATIC-USD', 'MKR-USD', 'NEAR-USD',
       'PAXG-USD', 'SOL-USD', 'TRX-USD', 'USDT-USD ', 'WBTC-USD', 'SPY', 'QQQ',
       'IWM', 'VTI', 'VOO', 'VO', 'VB', 'VEA', 'VWO', 'XLF ', 'XLV', 'XLE',
       'XLY', 'XLC', 'XLK', 'XLI', 'XLP', 'XLB', 'XLU', 'XLRE', 'GLD', 'SLV',
       'USO', 'UNG', 'DBA', 'DBC', 'GSG', 'IAU ', 'PPLT', 'SIVR', 'MOO', 'NIB',
       'JO', 'JJG', 'WEAT', 'UGA', 'DBE', 'REMX', 'OIL', 'AGG', 'BND', 'TLT',
       'IEF', 'SHY', 'LQD', 'HYG', 'JNK', 'MUB', 'TIP', 'BNDX', 'EMB', 'VWOB',
       'PFF', 'BKLN', 'FLOT', 'GSY', 'SCHO', 'SCHR', 'SCHZ'],
      dtype='object', name='Ticker')

In [1105]:
"""
Analize df for NaN and zero values. 
Print df containing the ticker name, zero values, NaN values, and dates for each ticker.
"""
def check_nan_and_zero_values(df):
    tickers = df.index.get_level_values('Ticker').unique()
    results = []

    for ticker in tickers:
        ticker_data = df.loc[pd.IndexSlice[:, ticker, :], :]
        nan_dates = ticker_data[ticker_data.isna().any(axis=1)].index.get_level_values('Date').tolist()
        zero_dates = ticker_data[(ticker_data == 0).any(axis=1)].index.get_level_values('Date').tolist()

        for date in nan_dates:
            results.append({"Ticker": ticker, "Value": "NaN", "Date": date})
        
        for date in zero_dates:
            results.append({"Ticker": ticker, "Value": "0", "Date": date})

    results_df = pd.DataFrame(results)
    return results_df

# Call the function to check for NaN and zero values in the data frame
results_df = check_nan_and_zero_values(data)
results_df

Unnamed: 0,Ticker,Value,Date
0,TRX-USD,0,2021-10-17
1,SPY,0,2022-08-11
2,QQQ,0,2021-07-13
3,VTI,0,2021-11-15
4,VTI,0,2022-05-20
5,VOO,0,2022-08-11
6,VO,0,2021-08-13
7,VB,0,2021-01-11
8,VEA,0,2021-05-25
9,VEA,0,2021-05-26


In [1110]:
#Remane column
data = data.rename(columns={"Adj Close": "Adj_Close"})

In [1111]:
data.head()


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Open,High,Low,Close,Adj_Close,Volume,Returns
Asset Class,Ticker,Date,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
crypto,AAVE-USD,2020-10-15,51.387611,52.080685,44.018326,44.019184,44.019184,5883146,-0.153387
crypto,AAVE-USD,2020-10-16,42.890606,43.906731,39.142956,39.495846,39.495846,52855205,-0.10843
crypto,AAVE-USD,2020-10-17,39.486191,43.077374,38.633614,41.366993,41.366993,43339687,0.046288
crypto,AAVE-USD,2020-10-18,41.389599,41.749489,39.531586,40.746971,40.746971,40441163,-0.015102
crypto,AAVE-USD,2020-10-19,40.836758,40.995186,35.951363,36.012318,36.012318,49289897,-0.12352


### Step 3: Calculate returns for each asset
The `calculate_returns` function calculates the percentage returns for each asset, based on either the adjusted close or the close price. The function takes in a DataFrame containing the historical data for an asset and checks if the 'Adj Close' column is present. If it is, it calculates the returns using the 'Adj Close' column, and if not, it uses the 'Close' column.

In [1112]:
# VAR 1 Regular returns
# Calculate returns for each asset
# def calculate_returns(df):
#     # If the 'Adj Close' column is present, use it for calculating returns
#     if 'Adj Close' in df.columns:
#         return df['Adj Close'].pct_change()
#     # If the 'Adj Close' column is not present, use the 'Close' column
#     else:
#         return  df['Close'].pct_change()
        

# VAR 2 Logarithmic returns returns
# Calculate logarithmic returns for each asset 
"""
Calculate the logarithmic returns of an asset.
If the 'Adj Close' column is present, use it for calculating log. returns.
Otherwise use the 'Close' column
"""

def calculate_log_returns(df):
    
    if 'Adj Close' in df.columns:
        return np.log(1 + df['Adj Close'].pct_change())    
    else:
        return np.log(1 + df['Close'].pct_change())
    

# data.groupby(['Asset Class', 'Ticker'], group_keys=False).apply(o_c_pct_change)
data["Returns"] = data.groupby(['Asset Class', 'Ticker'], group_keys=False).apply(calculate_log_returns)


In [1113]:
display(data.head(20))
data.tail(20)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Open,High,Low,Close,Adj_Close,Volume,Returns
Asset Class,Ticker,Date,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
crypto,AAVE-USD,2020-10-15,51.387611,52.080685,44.018326,44.019184,44.019184,5883146,
crypto,AAVE-USD,2020-10-16,42.890606,43.906731,39.142956,39.495846,39.495846,52855205,-0.10843
crypto,AAVE-USD,2020-10-17,39.486191,43.077374,38.633614,41.366993,41.366993,43339687,0.046288
crypto,AAVE-USD,2020-10-18,41.389599,41.749489,39.531586,40.746971,40.746971,40441163,-0.015102
crypto,AAVE-USD,2020-10-19,40.836758,40.995186,35.951363,36.012318,36.012318,49289897,-0.12352
crypto,AAVE-USD,2020-10-20,36.043785,36.319069,32.101101,32.180016,32.180016,60632326,-0.112515
crypto,AAVE-USD,2020-10-21,32.25349,37.078201,32.048149,35.160358,35.160358,43831235,0.088574
crypto,AAVE-USD,2020-10-22,35.097488,39.856922,33.589462,38.031651,38.031651,54717366,0.078499
crypto,AAVE-USD,2020-10-23,37.730595,42.546658,36.924671,41.686638,41.686638,76081860,0.091762
crypto,AAVE-USD,2020-10-24,41.686638,42.439705,38.728821,39.455021,39.455021,57658170,-0.055019


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Open,High,Low,Close,Adj_Close,Volume,Returns
Asset Class,Ticker,Date,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
bonds,SCHZ,2023-02-22,45.84,45.93,45.790001,45.810001,45.573334,1119500,0.002623
bonds,SCHZ,2023-02-23,45.849998,46.009998,45.66,46.009998,45.772301,1060800,0.004356
bonds,SCHZ,2023-02-24,45.759998,45.77,45.639999,45.720001,45.483799,804500,-0.006323
bonds,SCHZ,2023-02-27,45.849998,45.880001,45.759998,45.799999,45.563385,388600,0.001748
bonds,SCHZ,2023-02-28,45.700001,45.849998,45.650002,45.84,45.60318,1149500,0.000873
bonds,SCHZ,2023-03-01,45.599998,45.610001,45.419998,45.450001,45.327923,854700,-0.008544
bonds,SCHZ,2023-03-02,45.360001,45.360001,45.240002,45.330002,45.208244,805100,-0.002644
bonds,SCHZ,2023-03-03,45.560001,45.700001,45.470001,45.700001,45.577248,371200,0.008129
bonds,SCHZ,2023-03-06,45.75,45.790001,45.560001,45.57,45.447598,355300,-0.002849
bonds,SCHZ,2023-03-07,45.66,45.669998,45.450001,45.5,45.377785,1004500,-0.001537


In [1120]:
# Save the pulled data to CSV file
data.to_csv('../Capital_Allocation_Optimization/data.csv')

In [1116]:
data.isna().sum()

Open         0
High         0
Low          0
Close        0
Adj_Close    0
Volume       0
Returns      0
dtype: int64

In [997]:
# Group the data by 'Asset Class' and 'Ticker', and apply the 'calculate_returns' function to each group.
# Assign the resulting series of calculated returns back to the original DataFrame as a new column named "Returns".
# data_with_returns = data.groupby(['Asset Class', 'Ticker'], group_keys=False).apply(calculate_returns)

# data_with_returns = data.groupby(['Asset Class', 'Ticker'], group_keys=False).apply(lambda x: calculate_returns(x, asset_class=x.name[0]))
# data_with_returns = data.groupby(['Asset Class', 'Ticker'], group_keys=False).apply(lambda x: calculate_returns(x, asset_class=x.name[0]))


In [998]:
# #Rewiev df
# display(data_with_returns.head(20))
# data_with_returns.tail(20)

### Step 4:Transpose the data DataFrame
Define a function `transpose_df` that takes in a pandas dataframe df and an asset_class string as input. The goal of the function is to transpose a subset of the dataframe based on the given asset_class and return the resulting pivoted dataframe with NaN values removed.

In [999]:
#Define function 
 # Selects data from the dataframe based on the given asset_class and Returns columns
def transpose_df(df, asset_class):
    df_data = df.loc[(asset_class),['Returns']].reset_index()

    # Pivot the dataframe with index as 'Ticker', columns as 'Date', and values as 'Returns'
    df_data_pivot = df_data.pivot(index='Ticker',columns='Date', values='Returns')

    # Drop columns containing any NaN values from the pivoted dataframe
    df_data_pivot = df_data_pivot.dropna(axis=1)
    return df_data_pivot



In [1000]:
# Check for NaN values 
df_crypto_data_transposed.isna().sum().sum()


0

In [1001]:
df_stocks_data_transposed.isna().sum().sum()


0

In [1002]:
df_commodities_data_transposed.isna().sum().sum()


0

In [1003]:
df_bonds_data_transposed.isna().sum().sum()

0

In [1004]:

# Transpose data subset for all asset classes and store in a new dataframe
df_crypto_data_transposed = transpose_df(data,'crypto')
df_stocks_data_transposed = transpose_df(data,'stocks')
df_commodities_data_transposed = transpose_df(data,'commodities')
df_bonds_data_transposed = transpose_df(data,'bonds')

In [1005]:
# data.index

In [1006]:
#Review transposed DataFrame for crypto assets 
df_crypto_data_transposed


Date,2020-10-15,2020-10-16,2020-10-17,2020-10-18,2020-10-19,2020-10-20,2020-10-21,2020-10-22,2020-10-23,2020-10-24,...,2023-03-12,2023-03-13,2023-03-14,2023-03-15,2023-03-16,2023-03-17,2023-03-18,2023-03-19,2023-03-20,2023-03-21
Ticker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
AAVE-USD,-0.153387,-0.10843,0.046288,-0.015102,-0.12352,-0.112515,0.088574,0.078499,0.091762,-0.055019,...,0.079546,0.042564,0.041045,-0.06944,0.014093,0.071859,-0.03613,0.02297,-0.044166,0.0371
ALGO-USD,-0.036625,-0.052106,0.043541,0.011925,-0.043294,-0.052727,0.048412,0.044629,-0.021238,-0.004266,...,0.067286,0.050767,0.044997,-0.083579,0.020757,0.082869,-0.052225,0.024458,-0.05027,0.066129
BAT-USD,-0.009601,-0.028967,0.036172,-0.0039,0.001644,-0.026485,0.029207,0.029983,-0.006866,0.00551,...,0.074826,0.057636,0.044676,-0.077281,0.019404,0.085176,-0.039028,0.037944,-0.039704,0.028248
BCH-USD,0.00865,-0.043232,-0.01613,0.014078,0.003587,-0.03187,0.062526,0.030319,0.014315,0.017628,...,0.056855,0.048577,0.033567,-0.062009,0.021323,0.072324,-0.032165,0.036023,-0.041412,0.029111
BTC-USD,0.005744,-0.015184,0.003173,0.010968,0.022276,0.014735,0.073384,0.011028,-0.002653,0.013558,...,0.071604,0.087784,0.022416,-0.015069,0.027388,0.090431,-0.016844,0.039013,-0.009728,0.014607
DAI-USD,0.003123,-0.002023,-0.001756,-0.000822,0.005126,-0.003665,-8.5e-05,0.000138,0.000329,-0.008978,...,0.019036,0.006358,6.1e-05,0.000188,0.000231,0.001078,-0.002316,0.000149,-0.001506,0.002242
ETH-USD,-0.005396,-0.030158,0.007147,0.025053,0.004542,-0.028834,0.060579,0.053571,-0.00973,0.006545,...,0.07011,0.055059,0.013711,-0.028175,0.012621,0.066468,-0.017347,0.013485,-0.028549,0.040343
LINK-USD,-0.019492,-0.011069,0.0031,0.021195,0.000512,-0.097133,0.089603,0.099866,0.018626,0.045619,...,0.059714,0.027893,0.029025,-0.062838,0.018666,0.079303,-0.041224,0.024091,-0.006341,0.048378
LTC-USD,-0.00717,-0.036996,-0.013488,0.011949,0.010016,-0.022027,0.119914,0.022775,0.017742,0.067493,...,0.090528,0.069061,0.031092,-0.09863,0.033774,0.078418,-0.029899,0.01735,-0.073858,0.05351
MATIC-USD,-0.002614,-0.05605,0.029643,0.008624,-0.041224,-0.036,0.046924,0.060456,-0.021137,0.03704,...,0.08352,0.030986,0.006148,-0.074779,0.032235,0.062121,-0.044152,-0.002269,-0.057084,0.046376


In [1007]:
# data.loc[('crypto','SHIB-USD')]

In [1008]:
#Review transposed DataFrame for stock assets 
df_stocks_data_transposed

Date,2020-10-15,2020-10-16,2020-10-19,2020-10-20,2020-10-21,2020-10-22,2020-10-23,2020-10-26,2020-10-27,2020-10-28,...,2023-03-08,2023-03-09,2023-03-10,2023-03-13,2023-03-14,2023-03-15,2023-03-16,2023-03-17,2023-03-20,2023-03-21
Ticker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
IWM,0.010252,-0.002461,-0.012021,0.002179,-0.008621,0.017107,0.005781,-0.021822,-0.008624,-0.031272,...,0.000482,-0.028208,-0.029254,-0.016424,0.018352,-0.016623,0.013559,-0.027363,0.012882,0.018281
QQQ,-0.006734,-0.005496,-0.01646,0.002147,-0.000774,-3.5e-05,0.001969,-0.01511,0.007778,-0.039767,...,0.004982,-0.017478,-0.014143,0.007389,0.02272,0.005232,0.026019,-0.004737,0.001996,0.014181
SPY,-0.001237,-0.000604,-0.01532,0.003998,-0.001895,0.00547,0.003389,-0.018653,-0.003453,-0.034777,...,0.001631,-0.018622,-0.014535,-0.001426,0.016395,-0.006274,0.017393,-0.015571,0.00957,0.013045
VB,0.006645,-0.002773,-0.011903,0.003903,-0.008066,0.016067,0.006199,-0.022082,-0.009553,-0.029857,...,5.1e-05,-0.026892,-0.030215,-0.018797,0.017019,-0.01938,0.013866,-0.023532,0.013561,0.018607
VEA,-0.011042,0.003133,-0.004341,0.005543,-0.00337,0.000964,0.005526,-0.018864,-0.007597,-0.029711,...,0.005185,-0.009944,-0.009814,-0.00575,0.011696,-0.030323,0.015392,-0.011639,0.011407,0.01288
VO,0.00296,-0.001183,-0.01245,0.003426,-0.002936,0.009051,0.008863,-0.020803,-0.006628,-0.030186,...,0.002332,-0.024378,-0.026653,-0.00822,0.014037,-0.01671,0.014026,-0.018048,0.012117,0.015367
VOO,-0.00119,-0.000846,-0.015225,0.003939,-0.001809,0.00548,0.003406,-0.018588,-0.003309,-0.035037,...,0.001556,-0.018475,-0.014444,-0.001693,0.01647,-0.006186,0.016963,-0.010998,0.00899,0.012996
VTI,0.000337,-0.001801,-0.014409,0.003479,-0.003422,0.00706,0.003285,-0.018319,-0.00404,-0.033696,...,0.001297,-0.019945,-0.016982,-0.004148,0.017665,-0.007738,0.017138,-0.01298,0.008672,0.014171
VWO,-0.008046,0.004031,-0.003134,0.010039,0.001996,0.000222,0.00464,-0.013092,0.003567,-0.025244,...,0.005243,-0.02063,-0.005608,-0.001023,0.000512,-0.016764,0.012663,-0.005149,0.001032,0.010517
XLB,-0.002883,0.006965,-0.011687,0.002287,-0.003815,0.00549,0.003643,-0.025318,-0.009525,-0.028646,...,0.004891,-0.026072,-0.021512,-0.010286,0.012713,-0.032947,0.011408,-0.015637,0.016028,0.012048


In [1009]:
#Review transposed DataFrame for commodities
df_commodities_data_transposed 

Date,2020-10-15,2020-10-16,2020-10-19,2020-10-20,2020-10-21,2020-10-22,2020-10-23,2020-10-26,2020-10-27,2020-10-28,...,2023-03-08,2023-03-09,2023-03-10,2023-03-13,2023-03-14,2023-03-15,2023-03-16,2023-03-17,2023-03-20,2023-03-21
Ticker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
DBA,-0.001344,-0.003368,0.00404,-0.000672,0.000672,0.002685,0.00134,-0.003352,-0.006739,-0.013615,...,-0.007446,-0.005997,0.004002,-0.005005,0.0,-0.0111,0.007078,-0.001008,-0.000504,0.001009
DBC,0.002999,-0.006759,0.000753,0.011232,-0.008224,0.002999,-0.007513,-0.014432,0.006862,-0.026956,...,-0.005468,-0.012307,0.010618,-0.006357,-0.012837,-0.02353,0.003082,-0.003082,0.00396,0.003507
DBE,-0.003069,-0.00823,-0.005179,0.012384,-0.023872,0.011488,-0.015699,-0.02348,0.019252,-0.043297,...,-0.012632,-0.019012,0.01382,-0.01478,-0.027266,-0.040287,0.005126,-0.014937,0.007754,0.014823
GLD,0.003639,-0.003471,0.000505,0.004809,0.007503,-0.009849,-0.001063,-0.000504,0.002629,-0.016275,...,-0.000475,0.009801,0.021334,0.022689,-0.005808,0.007774,0.002018,0.028704,0.000381,-0.019056
GSG,0.005425,-0.005425,-0.003633,0.009058,-0.009968,0.003636,-0.008201,-0.016606,0.011101,-0.029881,...,-0.005431,-0.013958,0.010487,-0.010989,-0.017735,-0.028521,0.002627,-0.008959,0.006858,0.00994
IAU,0.003858,-0.003858,0.001104,0.004952,0.008199,-0.010947,-0.000551,-0.000551,0.002751,-0.01718,...,0.000291,0.008974,0.021666,0.023137,-0.006081,0.007458,0.002474,0.028682,0.000267,-0.018843
JJG,0.023408,-0.006265,0.001644,0.013322,0.006213,-0.004076,0.018567,-0.015008,-0.005178,-0.022574,...,-0.007063,-0.015994,0.009361,-0.009292,0.012511,0.003932,0.002884,0.001697,-0.000597,-0.012096
JO,0.000906,-0.029734,-0.000156,-0.013945,-0.011744,0.026404,-0.007742,0.013695,-0.004182,-0.017378,...,-0.031168,-0.009234,0.010843,0.011037,-0.019504,-0.01434,0.038526,-0.015092,0.00806,0.011403
MOO,-0.001152,0.001008,-0.008532,0.001161,0.000725,0.002317,0.002311,-0.015705,-0.004996,-0.023699,...,0.008665,-0.020717,-0.024432,-0.010132,0.015276,-0.029228,0.00527,-0.011896,0.014401,0.018065
NIB,-0.027544,0.015983,0.025397,-0.012444,0.016216,0.020661,0.004349,0.009302,-0.030214,-0.033619,...,-0.009421,-0.004405,-0.010925,-0.022918,0.007349,-0.019008,0.02665,0.02966,0.018636,0.011148


In [1010]:
#Review transposed DataFrame for bonds
df_bonds_data_transposed 

Date,2020-10-15,2020-10-16,2020-10-19,2020-10-20,2020-10-21,2020-10-22,2020-10-23,2020-10-26,2020-10-27,2020-10-28,...,2023-03-08,2023-03-09,2023-03-10,2023-03-13,2023-03-14,2023-03-15,2023-03-16,2023-03-17,2023-03-20,2023-03-21
Ticker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
AGG,-0.000848,-0.000509,-0.001188,-0.00085,-0.001191,-0.001789,0.001448,0.001361,0.002293,-0.001443,...,-0.001035,0.003616,0.011587,0.00792,-0.005884,0.009921,-0.003936,0.006251,-0.004028,-0.003032
BKLN,-0.001834,-0.001378,-0.00092,0.001379,-0.002299,-0.000461,0.0,-0.003692,-0.000462,-0.003707,...,0.001428,-0.004291,-0.000956,-0.015422,0.006294,-0.007752,0.004368,-0.006315,-0.004396,0.008772
BND,-0.000567,-0.000795,-0.000909,-0.001706,-0.001139,-0.001711,0.001255,0.001481,0.002388,-0.001364,...,-0.000838,0.003348,0.011905,0.00795,-0.0063,0.010251,-0.004771,0.007215,-0.00435,-0.002729
BNDX,0.000856,0.000855,-0.000342,-0.001027,-0.001885,-0.001373,0.00103,0.000858,0.001884,-0.000171,...,0.001676,0.001046,0.009778,0.012755,-0.005946,0.007171,-0.004912,0.007155,-0.000815,-0.004905
EMB,-0.004011,0.001517,-0.005814,0.000538,-0.005123,-0.004697,0.004697,-0.003611,0.005951,-0.008578,...,-0.00474,0.000237,0.009338,-0.001648,-0.002714,0.003656,-0.004247,-0.004503,0.000119,0.0071
FLOT,-0.000395,0.000197,-0.000197,0.000197,0.000197,-0.000197,0.000197,-0.000197,0.0,0.0,...,0.000394,-0.000394,-0.000197,-0.015911,0.003402,-0.010848,0.008247,-0.000802,0.001002,0.007582
GSY,0.000198,-0.000198,-0.000396,0.0,0.0,-0.000198,0.000198,0.000198,0.000198,-0.000198,...,-0.000202,0.000403,0.00141,0.002011,-0.001005,0.000402,-0.000402,0.000402,-0.00302,-0.000605
HYG,-0.000708,-0.001181,-0.003551,0.003551,-0.000118,0.001771,0.001179,-0.008876,0.000475,-0.008352,...,-0.004729,-0.005568,0.000136,-0.005462,0.00723,-0.003268,0.006524,-0.005979,-0.00273,0.010874
IEF,-0.001071,-0.000577,-0.001651,-0.00215,-0.001326,-0.003488,0.001413,0.002655,0.002234,-8.3e-05,...,-0.000316,0.006193,0.018248,0.012051,-0.009486,0.014346,-0.009031,0.011755,-0.004038,-0.008737
JNK,-0.000569,-0.002091,-0.002095,0.003426,-0.000285,0.001614,0.001043,-0.007993,0.000191,-0.008441,...,-0.005063,-0.006199,-0.000333,-0.005122,0.007785,-0.004552,0.006544,-0.005988,-0.002673,0.011311


In [1011]:
#Prints the sum of all NaN values in each of the dataframes.
print(df_crypto_data_transposed.isna().sum().sum())
print(df_stocks_data_transposed.isna().sum().sum())
print(df_commodities_data_transposed.isna().sum().sum())
print(df_bonds_data_transposed.isna().sum().sum())

0
0
0
0


### Step 5: Perform Principal Component Analysis (PCA)
Apply Principal Component Analysis (PCA) on the transposed data of each asset class to reduce the dimensionality of the data while preserving most of the variability in the data.
We replace infinite values with NaN before filling missing values with the mean of the corresponding column. This ensures that no infinite values are present in the data before it is passed to the PCA function.

In [1012]:

# def pca_function(df):
#     pca = PCA(n_components=0.9) # Choose the number of principal components to explain 90% of the variance
#     pca.fit(df)
#     results = pca.transform(df)
#     results_df = pd.DataFrame(results,index = df.index)
#     return results_df

# #Define function for PCA
# def perform_pca(df):
#     """
#     Perform PCA on a given dataframe and return a dataframe of principal components.

#     Parameters:
#     df (pandas.DataFrame): The dataframe to perform PCA on.

#     Returns:
#     pandas.DataFrame: A dataframe of principal components.
#     """
#     # Choose the number of principal components to explain 90% of the variance
#     pca = PCA(n_components=0.9)

#     # Fit the PCA model to the data
#     pca.fit(df)

#     # Transform the data into principal components
#     results = pca.transform(df)

#     # Create a dataframe from the principal components
#     results_df = pd.DataFrame(results, index=df.index)

#     # Return the dataframe of principal components
#     return results_df

# Define function for PCA
def perform_pca(df):
    """
    Perform PCA on a given dataframe and return a dataframe of principal components.

    Parameters:
    df (pandas.DataFrame): The dataframe to perform PCA on.

    Returns:
    pandas.DataFrame: A dataframe of principal components.
    """
    # Replace NaN and infinite values with column means
    # df_clean = df.replace([np.inf, -np.inf], np.nan).fillna(df.mean())

    # Choose the number of principal components to explain 90% of the variance
    pca = PCA(n_components=0.9)

    # Fit the PCA model to the cleaned data
    pca.fit(df)

    # Transform the data into principal components
    results = pca.transform(df)

    # Create a dataframe from the principal components
    results_df = pd.DataFrame(results, index=df.index)

    # Return the dataframe of principal components
    return results_df



We replace infinite values with NaN before filling missing values with the mean of the corresponding column. This ensures that no infinite values are present in the data before it is passed to the PCA function.

In [1013]:
# # Replace missing values with the mean value of the corresponding column for all 4 asset classes
# df_crypto_data_transposed_clean = df_crypto_data_transposed.fillna(df_crypto_data_transposed.mean())
# df_stocks_data_transposed_clean = df_stocks_data_transposed.fillna(df_stocks_data_transposed.mean())
# df_commodities_data_transposed_clean = df_commodities_data_transposed.fillna(df_commodities_data_transposed.mean())
# df_bonds_data_transposed_clean = df_bonds_data_transposed.fillna(df_bonds_data_transposed.mean())

# Apply PCA function to transposed data for all asset classes
crypto_pca = perform_pca(df_crypto_data_transposed)
stocks_pca = perform_pca(df_stocks_data_transposed)
commodities_pca = perform_pca(df_commodities_data_transposed)
bonds_pca = perform_pca(df_bonds_data_transposed)



In [1014]:
#Prints the sum of all NaN values in each of the dataframes.
print(crypto_pca.isna().sum().sum())
print(stocks_pca.isna().sum().sum())
print(commodities_pca.isna().sum().sum())
print(bonds_pca.isna().sum().sum())

0
0
0
0


In [1015]:
crypto_pca

Unnamed: 0_level_0,0,1,2,3,4,5,6,7,8
Ticker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
AAVE-USD,-0.570583,0.045162,0.25559,-0.076656,0.783203,0.280939,-0.676692,-0.14189,0.147873
ALGO-USD,-0.36898,0.032286,-0.302564,-0.17144,-0.290116,0.471319,-0.097419,0.733359,-0.19939
BAT-USD,-0.278138,-0.326183,-0.187603,-0.237274,-0.338506,0.717847,0.269857,-0.586403,-0.039578
BCH-USD,-0.068406,-0.362772,-0.212868,-0.295586,-0.135632,-0.466894,-0.004749,-0.049279,0.085336
BTC-USD,0.358404,-0.077265,-0.050898,-0.050554,-0.027205,-0.18152,0.000767,-0.104665,-0.156667
DAI-USD,1.107127,0.220279,0.081335,0.206858,0.032129,0.105692,-0.019474,0.010108,-0.02261
ETH-USD,-0.076685,-0.131541,0.059386,-0.131362,0.142088,-0.185418,-0.010006,-0.02521,-0.112901
LINK-USD,-0.403722,-0.262198,-0.143312,-0.181907,-0.000243,-0.054175,-0.21062,0.057319,-0.30992
LTC-USD,-0.085793,-0.294933,-0.162413,-0.240789,-0.107033,-0.397762,-0.134817,-0.069799,-0.14244
MATIC-USD,-0.822859,-0.48176,0.424593,1.132643,-0.248953,-0.062154,0.01918,0.017384,-0.01809


In [1016]:
stocks_pca

Unnamed: 0_level_0,0,1,2,3,4,5,6,7
Ticker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
IWM,0.012886,-0.101292,-0.090697,0.02036,0.056707,0.008966,-0.011206,0.072932
QQQ,-0.156376,-0.088899,0.054146,-0.043808,-0.002318,-0.018999,-0.008897,-0.013738
SPY,-0.036351,-0.004146,0.009576,-0.025161,-0.02289,-0.013464,-0.007912,-0.007474
VB,0.010293,-0.075654,-0.077632,0.017578,0.040089,0.008298,-0.010274,0.044894
VEA,0.021215,0.014856,0.029539,0.085787,-0.005862,0.003942,-0.004957,-0.034707
VO,-0.020809,-0.026702,-0.03677,-0.001473,0.019454,-0.000538,-0.015299,0.011775
VOO,-0.037568,-0.0034,0.010707,-0.023787,-0.023081,-0.013531,-0.006489,-0.006143
VTI,-0.037163,-0.023851,-0.000369,-0.01553,-0.009055,-0.011083,-0.010095,0.005463
VWO,-0.000883,-0.009515,0.117407,0.177681,0.048815,-0.000349,0.009203,0.000632
XLB,0.065693,0.012672,-0.066969,0.051899,-0.039383,0.009027,-0.013965,-0.053213


In [1017]:
commodities_pca

Unnamed: 0_level_0,0,1,2,3,4,5,6
Ticker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
DBA,-0.039407,-0.097435,-0.134896,0.064394,0.00325,0.028167,-0.017601
DBC,-0.021652,0.123272,-0.020835,-0.015946,-0.006957,0.00117,-0.009583
DBE,0.003098,0.291482,0.026495,-0.011596,0.021545,-0.000267,-0.009479
GLD,-0.057061,-0.168668,0.004464,-0.100962,0.056514,0.027809,-0.042175
GSG,-0.019705,0.178463,-0.021293,-0.018129,-0.008966,-0.008214,-0.005558
IAU,-0.057116,-0.168714,0.004331,-0.100295,0.055907,0.028169,-0.042617
JJG,-0.055212,-0.091587,-0.260542,0.012931,-0.161407,-0.01678,-0.011287
JO,-0.061607,-0.130252,-0.157016,0.293037,0.292952,-0.214596,-0.003002
MOO,-0.020704,-0.10475,0.082499,0.073959,-0.028945,0.070369,-0.024779
NIB,-0.063128,-0.122247,-0.044223,0.134471,0.140341,0.327366,0.000419


In [1018]:
bonds_pca

Unnamed: 0_level_0,0,1,2,3,4
Ticker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
AGG,0.023421,-0.020149,-0.001272,0.004559,0.003085
BKLN,-0.068651,0.002684,0.013647,0.004495,-0.027401
BND,0.024377,-0.020996,-0.001416,0.003688,0.002496
BNDX,0.002123,-0.036523,-0.002504,0.001497,-0.000624
EMB,0.011134,0.097262,-0.055784,-0.023916,-0.004744
FLOT,-0.062849,-0.0503,0.006166,-0.015301,-0.018688
GSY,-0.053319,-0.057636,-0.003072,-0.011358,-0.012646
HYG,-0.044384,0.068331,0.004768,0.043569,0.001371
IEF,0.055183,-0.027486,-0.002897,0.006921,0.010824
JNK,-0.046375,0.067984,0.006243,0.044114,-0.001141


In [1019]:

# Var 1 define clusters, pick the best asset from eachj cluster based on sharpe ratio
def define_clusters(df, df_transpose_returns):  
    # Define an empty list to store the inertias of each KMeans object
    inertias = []
    
    # Iterate over the numbers 1 to 10 (inclusive)
    for n in range(1, 11):
        # Instantiate a KMeans object with n clusters
        kmeans = KMeans(n_clusters=n, random_state=42)
        # Fit the KMeans object to the DataFrame and calculate the inertia
        kmeans.fit(df)
        # Append the inertia of the KMeans object to the list
        inertias.append(kmeans.inertia_)
    
    # Find the number of clusters that produces the lowest inertia
    best_num_clusters = np.argmin(inertias) + 1
    # Calculate the mean inertia of the KMeans objects
    mean_inertia = np.mean(inertias)
    
    # Iterate over the numbers 1 to 10 (inclusive)
    for k in range(1, 11):
        # If the inertia of the KMeans object is less than or equal to the mean inertia
        if inertias[k-1] <= mean_inertia:
            # Set the number of clusters to k and break the loop
            best_num_clusters = k
            break
    
    # Instantiate a KMeans object with the best number of clusters
    kmeans_final = KMeans(n_clusters=best_num_clusters, random_state=42)
    # Fit the KMeans object to the DataFrame
    kmeans_final.fit(df)   
    # Create a DataFrame of cluster labels
    final_df = pd.DataFrame(kmeans_final.predict(df), index=df.index, columns=['Clusters'])
    
    # Calculate the Sharpe ratio of the asset class
    sharpe_ratio = df_transpose_returns.apply(lambda x: x.mean() / x.std(), axis=1)
    
    # Combine the cluster labels with the Sharpe ratios
    sharpe_cluster = pd.concat([final_df, sharpe_ratio], axis=1).rename(columns={0: 'Sharpe_Ratio'})
    
    # Get the best Sharpe ratios for each cluster
    best_cluster_sharpes = sharpe_cluster.groupby('Clusters').max()['Sharpe_Ratio'].to_list()
    
    # Get the tickers for the assets with the best Sharpe ratios
    best_cluster_sharpes_df = sharpe_cluster.loc[sharpe_cluster['Sharpe_Ratio'].isin(best_cluster_sharpes)]
    list_best_tickers = best_cluster_sharpes_df.index.to_list()
    
    # Return the DataFrame of cluster labels, the DataFrame of tickers with the best Sharpe ratios, and the list of tickers
    return sharpe_cluster, best_cluster_sharpes_df, list_best_tickers

In [1020]:
# # VAR 1 Call the function
crypto_sharpe_ratio, best_cluster_crypto, best_crypto_ticekrs = define_clusters(crypto_pca, df_crypto_data_transposed)
stocks_sharpe_ratio, best_cluster_stocks, best_stocks_ticekrs = define_clusters(stocks_pca,  df_stocks_data_transposed)
commodities_sharpe_ratio, best_cluster_commodities, best_commodities_ticekrs = define_clusters(commodities_pca, df_commodities_data_transposed)
bonds_sharpe_ratio, best_cluster_bonds, best_bonds_ticekrs = define_clusters(bonds_pca, df_bonds_data_transposed)


 Pull data for benchmarks and the daily yield of a short-term U.S. government bond using yfinance package.

In [1021]:
# Define benchmark tickers for each asset class
benchmark_tickers = {
    'crypto': 'BTC-USD',  # Bitcoin as a benchmark for cryptocurrencies
    'stocks': 'SPY',      # S&P 500 ETF (SPY) as a benchmark for stocks
    'commodities': 'GSG', # S&P GSCI Commodity Index ETF (GSG) as a benchmark for commodities
    'bonds': 'AGG'        # iShares Core U.S. Aggregate Bond ETF (AGG) as a benchmark for bonds
}

# Define the ticker for a short-term U.S. government bond (1-3 month T-bill)
risk_free_rate_ticker = '^IRX'  # 13 Week Treasury Bill as risk-free rate


# Download historical data for benchmark tickers
benchmark_data = yf.download(list(benchmark_tickers.values()), start=selected_start_date, end=selected_end_date)['Adj Close']

# Fill NaN values with the previous data point (forward fill)
benchmark_data.fillna(method='ffill', inplace=True)

# Download historical data for the risk-free rate (13 Week Treasury Bill)
risk_free_rate_data = yf.download(risk_free_rate_ticker, start=selected_start_date, end=selected_end_date)['Adj Close']

# Fill NaN values with the previous data point (forward fill)
risk_free_rate_data.fillna(method='ffill', inplace=True)

[*********************100%***********************]  4 of 4 completed
[*********************100%***********************]  1 of 1 completed


In [1022]:
#!!! Devide crypto and other assets drop

In [1023]:
 benchmark_data.head(14)

Unnamed: 0_level_0,AGG,BTC-USD,GSG,SPY
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2020-10-14,112.047577,11429.506836,11.03,335.158539
2020-10-15,111.952675,11495.349609,11.09,334.744293
2020-10-16,111.895721,11322.123047,11.03,334.542023
2020-10-17,111.895721,11358.101562,11.03,334.542023
2020-10-18,111.895721,11483.359375,11.03,334.542023
2020-10-19,111.762825,11742.037109,10.99,329.45575
2020-10-20,111.667908,11916.334961,11.09,330.775574
2020-10-21,111.535042,12823.689453,10.98,330.149384
2020-10-22,111.335693,12965.891602,11.02,331.960327
2020-10-23,111.497101,12931.539062,10.93,333.087402


In [1024]:
# risk_free_rate_data.head(14)

The next step is to calculate daily returns for both the benchmark_data and risk_free_rate_data. We'll also calculate the daily excess returns for each asset class, subtracting the risk-free rate from the asset returns.

Excess returns, also known as risk-adjusted returns, represent the difference between the actual returns of an investment and the returns of a risk-free investment (usually a short-term government bond). In other words, excess returns measure the performance of an investment after accounting for the risk-free rate, which is the return an investor would expect from an investment with zero risk.

The excess return helps to evaluate the performance of an investment by comparing it to a benchmark, such as a risk-free investment or a market index, and determining whether the investment has outperformed or underperformed the benchmark. This allows investors to determine the additional return they are receiving for taking on additional risk, as higher-risk investments are generally expected to yield higher returns.

In [1025]:
# Calculate daily log returns for benchmark_data of each 
# crypto_benchmark_log_returns = np.log(1 + benchmark_data['BTC-USD'].pct_change())

In [1026]:
# crypto_benchmark_log_returns.head(14)

In [1027]:
# stocks_benchmark_log_returns = np.log(1 + benchmark_data['SPY'].pct_change())

In [1028]:
# stocks_benchmark_log_returns.head(14)

In [1029]:
# commodities_benchmark_log_returns = np.log(1 + benchmark_data['GSG'].pct_change())

In [1030]:
# commodities_benchmark_log_returns.head(14)

In [1031]:
# bonds_benchmark_log_returns = np.log(1 + benchmark_data['AGG'].pct_change())

In [1032]:
# bonds_benchmark_log_returns.head(14)

In [1033]:
#!!! Resk free rate log returns doesn't have data for weekends. When calculating exess returns we need to substract risk_free_rate_log_returns!!!

In [1034]:
# # Calculate excess log returns for each asset class by subtracting the risk-free rate daily log returns from asset daily log returns
# crypto_excess_log_returns = df_crypto_data_transposed.T.subtract(risk_free_rate_log_returns, axis=0)
# excess_log_returns_stocks = df_stocks_data_transposed.T.subtract(risk_free_rate_log_returns, axis=0)
# excess_log_returns_commodities = df_commodities_data_transposed.T.subtract(risk_free_rate_log_returns, axis=0)
# excess_log_returns_bonds = df_bonds_data_transposed.T.subtract(risk_free_rate_log_returns, axis=0)

In [1035]:
# benchmark_log_returns.head(10)

In [1036]:
# risk_free_rate_log_returns.head(10)

In [1037]:
# excess_log_returns_crypto.head(10)

In [1038]:
# excess_log_returns_stocks.head(10)

In [1039]:
best_cluster_crypto

Unnamed: 0_level_0,Clusters,Sharpe_Ratio
Ticker,Unnamed: 1_level_1,Unnamed: 2_level_1
ETH-USD,0,0.035994
MATIC-USD,4,0.061082
NEAR-USD,2,0.008232
PAXG-USD,3,0.00349
SOL-USD,1,0.034282


In [1040]:
best_cluster_stocks

Unnamed: 0_level_0,Clusters,Sharpe_Ratio
Ticker,Unnamed: 1_level_1,Unnamed: 2_level_1
VOO,0,0.019366
VWO,4,-0.018204
XLE,2,0.073293
XLF,1,0.029483
XLV,3,0.028377


In [1041]:
best_cluster_commodities

Unnamed: 0_level_0,Clusters,Sharpe_Ratio
Ticker,Unnamed: 1_level_1,Unnamed: 2_level_1
DBA,3,0.052786
PPLT,4,0.009417
REMX,0,0.037281
UGA,1,0.068994
UNG,2,-0.01649


In [1042]:
best_cluster_bonds

Unnamed: 0_level_0,Clusters,Sharpe_Ratio
Ticker,Unnamed: 1_level_1,Unnamed: 2_level_1
FLOT,1,-0.009948
HYG,0,-0.03873
TIP,3,-0.052549
TLT,2,-0.065528


In [1043]:
crypto_results = df_crypto_data_transposed.loc[best_crypto_ticekrs].T
stocks_results = df_stocks_data_transposed.loc[best_stocks_ticekrs].T
commodities_results = df_commodities_data_transposed.loc[best_commodities_ticekrs].T
bonds_results = df_bonds_data_transposed.loc[best_bonds_ticekrs].T

In [1044]:
crypto_results.head(14)

Ticker,ETH-USD,MATIC-USD,NEAR-USD,PAXG-USD,SOL-USD
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020-10-15,-0.005396,-0.002614,-0.052145,0.001917,-0.021043
2020-10-16,-0.030158,-0.05605,-0.319769,-0.002297,-0.053026
2020-10-17,0.007147,0.029643,-0.011534,-0.003785,0.024113
2020-10-18,0.025053,0.008624,0.075685,0.001764,0.005627
2020-10-19,0.004542,-0.041224,-0.085346,0.001931,-0.073148
2020-10-20,-0.028834,-0.036,-0.168786,0.00081,-0.0859
2020-10-21,0.060579,0.046924,0.036804,0.01146,0.000903
2020-10-22,0.053571,0.060456,0.065964,-0.008752,0.033947
2020-10-23,-0.00973,-0.021137,-0.028327,0.001672,0.002238
2020-10-24,0.006545,0.03704,0.014674,0.002168,-0.024902


In [1045]:
stocks_results.head(14)

Ticker,VOO,VWO,XLE,XLF,XLV
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020-10-15,-0.00119,-0.008046,0.01205,0.007638,-0.007273
2020-10-16,-0.000846,0.004031,-0.022922,0.000801,0.009871
2020-10-19,-0.015225,-0.003134,-0.021427,-0.015323,-0.015972
2020-10-20,0.003939,0.010039,0.011775,0.008094,0.001317
2020-10-21,-0.001809,0.001996,-0.019248,-0.005254,-0.004996
2020-10-22,0.00548,0.000222,0.040426,0.01966,0.015007
2020-10-23,0.003406,0.00464,-0.004924,0.003173,0.003717
2020-10-26,-0.018588,-0.013092,-0.036526,-0.022833,-0.011192
2020-10-27,-0.003309,0.003567,-0.013054,-0.018402,-0.00687
2020-10-28,-0.035037,-0.025244,-0.04274,-0.025921,-0.032245


In [1046]:
commodities_results.head(14)

Ticker,DBA,PPLT,REMX,UGA,UNG
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020-10-15,-0.001344,0.007156,-0.006091,-0.0114,0.049638
2020-10-16,-0.003368,-0.002832,-0.005145,-0.016081,-0.008918
2020-10-19,0.00404,-0.00929,0.009778,-0.007117,0.016956
2020-10-20,-0.000672,0.020812,0.008236,0.022201,-0.008846
2020-10-21,0.000672,0.017518,0.001206,-0.032457,0.020784
2020-10-22,0.002685,-0.007573,-0.005073,0.01433,-0.019977
2020-10-23,0.00134,0.025143,-0.001454,-0.018462,-0.023684
2020-10-26,-0.003352,-0.032045,-0.014659,-0.026752,0.016394
2020-10-27,-0.006739,0.005453,0.009797,0.026234,0.018526
2020-10-28,-0.013615,-0.015344,-0.029432,-0.057555,-0.005602


In [1047]:
bonds_results.head(14)

Ticker,FLOT,HYG,TIP,TLT
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2020-10-15,-0.000395,-0.000708,-0.000317,-0.001729
2020-10-16,0.000197,-0.001181,0.000396,-0.002784
2020-10-19,-0.000197,-0.003551,-0.002377,-0.003787
2020-10-20,0.000197,0.003551,-0.000873,-0.009562
2020-10-21,0.000197,-0.000118,-0.000318,-0.00346
2020-10-22,-0.000197,0.001771,-0.00151,-0.010451
2020-10-23,0.000197,0.001179,0.001193,0.006094
2020-10-26,-0.000197,-0.008876,0.000794,0.00926
2020-10-27,0.0,0.000475,0.001983,0.006687
2020-10-28,0.0,-0.008352,-0.001269,0.000934


In [1048]:
# Define the number of tickers selected for each asset class
crypto_selected_tickers = crypto_results.columns.tolist()
stocks_selected_tickers = stocks_results.columns.tolist()
commodities_selected_tickers = commodities_results.columns.tolist()
bonds_selected_tickers = bonds_results.columns.tolist()


In [1049]:
 crypto_selected_tickers

['ETH-USD', 'MATIC-USD', 'NEAR-USD', 'PAXG-USD', 'SOL-USD']

In [1050]:
stocks_selected_tickers

['VOO', 'VWO', 'XLE', 'XLF ', 'XLV']

In [1051]:
commodities_selected_tickers

['DBA', 'PPLT', 'REMX', 'UGA', 'UNG']

In [1052]:
bonds_selected_tickers

['FLOT', 'HYG', 'TIP', 'TLT']

In [1053]:
# Calculate the number of tickers selected for each asset class
crypto_num_selected_tickers = len(crypto_selected_tickers)
stocks_num_selected_tickers = len(stocks_selected_tickers)
commodities_num_selected_tickers = len(commodities_selected_tickers)
bonds_num_selected_tickers = len(bonds_selected_tickers)

In [1054]:
crypto_num_selected_tickers

5

In [1055]:
stocks_num_selected_tickers

5

In [1056]:
commodities_num_selected_tickers

5

In [1057]:
bonds_num_selected_tickers

4

In [1058]:
# Define trading days constant
CRYPTO_TRADING_DAYS = 365
ETFS_TRADING_DAYS = 252

In [1059]:
def generate_random_weights(selected_tickers):
    """
    Generates an array of random weights for a given number of tickers.

    Args:
    - selected_tickers: int. The number of tickers for which the weights need to be generated.

    Returns:
    - numpy array of shape (selected_tickers,). An array of random weights between 0 and 1.
    """
    random_weights = np.array(np.random.random(selected_tickers))
    return random_weights

In [1060]:
# Call function to generate random weights for each asset class
crypto_random_wts = generate_random_weights(crypto_num_selected_tickers)
stocks_random_wts = generate_random_weights(stocks_num_selected_tickers)
commodities_random_wts = generate_random_weights(commodities_num_selected_tickers)
bonds_random_wts = generate_random_weights(bonds_num_selected_tickers)

In [1061]:
crypto_random_wts

array([0.93641117, 0.10371076, 0.04940502, 0.22976793, 0.05566767])

In [1062]:
stocks_random_wts

array([0.05812705, 0.81756422, 0.96483741, 0.90319675, 0.14395183])

In [1063]:
commodities_random_wts

array([0.8586228 , 0.77162041, 0.23714954, 0.39812822, 0.93402107])

In [1064]:
bonds_random_wts 

array([0.68657784, 0.15683695, 0.93713592, 0.68871314])

In [1065]:
def rebalance_weights(random_wts):
    """
    Rebalances the given array of random weights so that each asset class has a 100% allocation.

    Args:
    - random_wts: numpy array. The array of random weights to be rebalanced.

    Returns:
    - numpy array. The rebalanced array of weights, where each asset class has a 100% allocation.
    """
    # Divide each weight by the sum of all weights to get the percentage allocation for each asset class
    rebalanced_wts = random_wts / np.sum(random_wts)
    return rebalanced_wts

In [1066]:
 # Call function to rebalance weights, so each assect class would wave 100% allocation 
crypto_rebalanced_wts = rebalance_weights(crypto_random_wts)
stocks_rebalanced_wts = rebalance_weights(stocks_random_wts)
commodities_rebalanced_wts = rebalance_weights(commodities_random_wts)
bonds_rebalanced_wts = rebalance_weights(bonds_random_wts)

In [1067]:
crypto_rebalanced_wts

array([0.68104486, 0.07542806, 0.0359319 , 0.1671085 , 0.04048668])

In [1068]:
stocks_rebalanced_wts

array([0.02012935, 0.28312174, 0.33412231, 0.31277621, 0.04985039])

In [1069]:
commodities_rebalanced_wts

array([0.26835803, 0.24116589, 0.07411984, 0.12443288, 0.29192336])

In [1070]:
bonds_rebalanced_wts

array([0.27804961, 0.06351567, 0.37952037, 0.27891436])

In [1071]:
def calculate_expected_returns(results, rebalanced_wts):
    """
    Calculate expected returns for an asset class and annualize them

    param log_returns: numpy array of log returns for an asset class
    param rebalanced_wts: numpy array of weights for each asset in the asset class

    Crypto is annnualized by 365 trading days
    Stocks and commodities are anualized by 252 trading days

    Returns: expected returns for the asset class
    
    """
    if results is crypto_results and rebalanced_wts is crypto_rebalanced_wts:
        trading_days = CRYPTO_TRADING_DAYS
    else:
        trading_days = ETFS_TRADING_DAYS

    expected_returns = np.sum((results.mean() * rebalanced_wts) * trading_days)

    return expected_returns

In [1072]:
# Call function to calculate expected returns for each asset class and annualize them.
# Annualize crypto 365, ETF's 252 days
crypto_expected_returns = calculate_expected_returns(crypto_results, crypto_rebalanced_wts)
stocks_expected_returns = calculate_expected_returns(stocks_results, stocks_rebalanced_wts)
commodities_expected_returns = calculate_expected_returns(commodities_results, commodities_rebalanced_wts)
bonds_expected_returns = calculate_expected_returns(bonds_results, bonds_rebalanced_wts)

In [1073]:
crypto_expected_returns

0.6151901127838836

In [1074]:
stocks_expected_returns

0.15503989867349113

In [1075]:
commodities_expected_returns

0.06341716148722026

In [1076]:
bonds_expected_returns

-0.078260219009983

In [1077]:
def calculate_expected_volatility(results, rebalanced_wts):
    """
    Calculates the expected volatility based on the logarithmic returns and rebalanced weights.

    Parameters:
    log_returns (pandas.DataFrame): A DataFrame containing the logarithmic returns for the asset.
    rebalanced_wts (numpy.ndarray): An array containing the rebalanced weights for the asset.

    Returns:
    expected_volatility (float): The expected volatility for the asset.
    """

    if results is crypto_results and rebalanced_wts is crypto_rebalanced_wts:
        trading_days = CRYPTO_TRADING_DAYS
    else:
        trading_days = ETFS_TRADING_DAYS

    expected_volatility = np.sqrt(np.dot(rebalanced_wts.T,
                            np.dot(results.cov() * trading_days, rebalanced_wts)))

    return expected_volatility

In [1078]:
# Call function to calculate expected volatility for each asset
crypto_expected_volatility = calculate_expected_volatility(crypto_results, crypto_rebalanced_wts)
stocks_expected_volatility = calculate_expected_volatility(stocks_results, stocks_rebalanced_wts)
commodities_expected_volatility = calculate_expected_volatility(commodities_results, commodities_rebalanced_wts)
# bonds_expected_volatility = calculate_expected_volatility(bonds_results, commodities_rebalanced_wts)

In [1079]:
crypto_expected_volatility

0.7925546350557089

In [1080]:
stocks_expected_volatility

0.20509086042368066

In [1081]:
commodities_expected_volatility

0.25137732850557054

In [1082]:
bonds_expected_volatility

NameError: name 'bonds_expected_volatility' is not defined

In [1083]:
def calculate_sharpe_ratio(expected_returns, expected_volatility):
    """
    Calculates the Sharpe Ratio given expected returns and volatility.

    Args:
    expected_returns (float): The expected return of the asset.
    expected_volatility (float): The expected volatility of the asset.

    Returns:
    float: The Sharpe Ratio of the asset.
    """
    sharpe_ratio = expected_returns / expected_volatility
    return sharpe_ratio

In [1084]:
# Call function to calculate the Sharpe Ratio for each asset class using the calculate_sharpe_ratio function
crypto_sharpe = calculate_sharpe_ratio(crypto_expected_returns, crypto_expected_volatility)
stocks_sharpe = calculate_sharpe_ratio(stocks_expected_returns, stocks_expected_volatility)
commodities_sharpe = calculate_sharpe_ratio(commodities_expected_returns, commodities_expected_volatility)
# bonds_sharpe = calculate_sharpe_ratio(bonds_expected_returns, bonds_expected_volatility)

In [1085]:
crypto_sharpe

0.7762116144089445

In [1086]:
stocks_sharpe

0.7559571321374668

In [1087]:
commodities_sharpe

0.2522787630222386

In [1088]:
bonds_sharpe

NameError: name 'bonds_sharpe' is not defined

In [1089]:
# Prompt user to choose number of simulations for Streamlit !!!
#num_of_portfolios = st.slider("Choose number of portfolios simulated:", min_value=500, max_value=5000, step=500)

In [1090]:
#Define number of simulations(portfolios generated by Monte Carlo)
num_of_portfolios = 3000

In [1091]:
# Declare an array to store the weights for each of selected assets for for each asset class for itteratios of {num_of_portfolios}
crypto_all_weights = np.zeros((num_of_portfolios, crypto_num_selected_tickers))
stocks_all_weights = np.zeros((num_of_portfolios, stocks_num_selected_tickers))
commodities_all_weights = np.zeros((num_of_portfolios, commodities_num_selected_tickers))
bonds_all_weights = np.zeros((num_of_portfolios, bonds_num_selected_tickers))

In [1092]:
# Define array to store the returns after they were generated, {num_of_portfolios} possible return values.
# The arrays are filled with zerroes, so when the Montecarlo simulation runs the zero values will be filled with values
crypto_ret_arr = np.zeros(num_of_portfolios)
stocks_ret_arr = np.zeros(num_of_portfolios)
commodities_ret_arr = np.zeros(num_of_portfolios)
bonds_ret_arr = np.zeros(num_of_portfolios)

In [1093]:
# Define array to store the volatilities after they were generated, {num_of_portfolios} possible return values.
crypto_vol_arr = np.zeros(num_of_portfolios)
stocks_vol_arr = np.zeros(num_of_portfolios)
commodities_vol_arr = np.zeros(num_of_portfolios)
bonds_vol_arr = np.zeros(num_of_portfolios)

In [1094]:
# Define array to store the sharpe ratios after they were generated, {num_of_portfolios} possible return values.
crypto_sharpe_ratio_arr = np.zeros(num_of_portfolios)
stocks_sharpe_ratio_arr = np.zeros(num_of_portfolios)
commodities_sharpe_ratio_arr = np.zeros(num_of_portfolios)
bonds_sharpe_ratio_arr = np.zeros(num_of_portfolios)

In [1095]:

# Start the simulations.
for ind in range(num_of_portfolios):
    # Calculate the weights.
    crypto_weights = np.array(np.random.random(crypto_num_selected_tickers))
    crypto_weights = crypto_weights / np.sum(crypto_weights )
    
    stocks_weights = np.array(np.random.random(stocks_num_selected_tickers))
    stocks_weights = stocks_weights / np.sum(stocks_weights )
    
    commodities_weights = np.array(np.random.random(commodities_num_selected_tickers))
    commodities_weights = commodities_weights / np.sum(commodities_weights )
    
    # bonds_weights = np.array(np.random.random(bonds_num_selected_tickers))
    # bonds_weights = bonds_weights / np.sum(bonds_weights )
    
    # Add the weights, add to the `weights_arrays`.
    crypto_all_weights[ind, :] = crypto_weights
    stocks_all_weights[ind, :] = stocks_weights
    commodities_all_weights[ind, :] = commodities_weights
    # bonds_all_weights[ind, :] = commodities_weights
    
    # Calculate expected log returns, add to the `returns_array`.
    crypto_ret_arr[ind] = np.sum((crypto_results.mean() * crypto_weights) * 365)
    stocks_ret_arr[ind] = np.sum((stocks_results.mean() * stocks_weights) * 252)
    commodities_ret_arr[ind] = np.sum((commodities_results.mean() * commodities_weights) * 252)
    # bonds_ret_arr[ind] = np.sum((bonds_resultss.mean() * bonds_weights) * 252)
     
    # Calculate the volatility and add it to the volatility_array. 
    # Apply square root to calculate how each part of the portfolio contributes to the whole portfolio.
    # Take correlation between assets into consideration. 
    
    # crypto_vol_arr[ind] = np.sqrt(
    # np.dot(crypto_weights.T, np.dot(crypto_log_returns.cov() * 365, crypto_weights)))
    
    crypto_vol_arr[ind] = np.dot(crypto_weights.T, np.dot(crypto_results.cov() * 365, crypto_weights))


    # stocks_vol_arr[ind] = np.sqrt(
    # np.dot(stocks_weights.T, np.dot(stocks_log_returns.cov() * 252, stocks_weights)))
    
    stocks_vol_arr[ind] = np.dot(stocks_weights.T, np.dot(stocks_results.cov() * 252, stocks_weights))


    # commodities_vol_arr[ind] = np.sqrt(
    # np.dot(commodities_weights.T, np.dot(commodities_log_returns.cov() * 252, commodities_weights)))
    
    commodities_vol_arr[ind] = np.dot(commodities_weights.T, np.dot(commodities_results.cov() * 252, commodities_weights))
    
    # bonds_vol_arr[ind] = np.dot(bonds_weights.T, np.dot(bonds_results.cov() * 252, bonds_weights))
        
    # Calculate Sharpe Ratio, add it to the `sharpe_ratio_array`.
    crypto_sharpe_ratio_arr[ind] = crypto_ret_arr[ind]/ crypto_vol_arr[ind]
    stocks_sharpe_ratio_arr[ind] = stocks_ret_arr[ind]/ stocks_vol_arr[ind]
    commodities_sharpe_ratio_arr[ind] = commodities_ret_arr[ind]/ commodities_vol_arr[ind]
    # bonds_sharpe_ratio_arr[ind] = bonds_ret_arr[ind]/ bonds_vol_arr[ind]

# # Monte Carlo simulation function
# def mc_sim(num_of_portfolios, results, selected_tickers):
#     """
#     Initializes the Monte Carlo simulation.
#     Initializes the arrays used in the Monte Carlo simulation.
#     """
#     # Declare an array to store the weights for each selected asset for each asset class
#     all_weights = np.zeros((num_of_portfolios, selected_tickers))
    
#     # Define arrays to store the returns, volatilities, and Sharpe ratios for each asset class
#     ret_arr = np.zeros(num_of_portfolios)
#     vol_arr = np.zeros(num_of_portfolios)
#     sharpe_ratio_arr = np.zeros(num_of_portfolios)
    
#     # Run the Monte Carlo simulation
#     for ind in range(num_of_portfolios, selected_tickers):

#         # Calculate and rebalance the random weights.
#         weights = np.array(np.random.random(selected_tickers))
#         weights = weights / np.sum(weights)

#         # Add the weights, to the `weights_arrays`.
#         all_weights[ind, :] = weights

#         # Calculate expected log returns, add to the `returns_array`.
#         if ret_arr is crypto_ret_arr:
#             ret_arr[ind] = np.sum((results.mean() * weights) * CRYPTO_TRADING_DAYS)
#         else: 
#             ret_arr[ind] = np.sum((results.mean() * weights) * ETFS_TRADING_DAYS_TRADING_DAYS)

#         # Calculate the volatility and add it to the volatility_array. 
#         # Apply square root to calculate how each part of the portfolio contributes to the whole portfolio.
#         # Take correlation between assets into consideration. 
#         if vol_arr is crypto_vol_arr:
#             vol_arr[ind] = np.dot(weights.T, np.dot(results.cov() * CRYPTO_TRADING_DAYS, weights))
#         else:
#             vol_arr[ind] = np.dot(weights.T, np.dot(results.cov() * ETFS_TRADING_DAYS, weights))

#             # Calculate the sharpe ratio and add it to the sharpe ratio  array.
#             sharpe_ratio_arr[ind] = ret_arr[ind] / vol_arr[ind]

#     return all_weights, ret_arr, vol_arr, sharpe_ratio_arr
    

In [1096]:
# # Call the Monte Carlo simulation function for each asset class.
# crypto_all_weights, crypto_ret_arr, crypto_vol_arr, crypto_sharpe_ratio_arr = mc_sim(num_of_portfolios, crypto_results, crypto_num_selected_tickers)
# stocks_all_weights, stocks_ret_arr, stocks_vol_arr, stocks_sharpe_ratio_arr = mc_sim(num_of_portfolios, stocks_results, stocks_num_selected_tickers)
# commodities_all_weights, commodities_ret_arr, commodities_vol_arr, commodities_sharpe_ratio_arr =  mc_sim(num_of_portfolios, commodities_results, commodities_num_selected_tickers)
# bonds_all_weights, bonds_ret_arr, bonds_vol_arr, bonds_sharpe_ratio_arr = mc_sim(num_of_portfolios, bonds_results, bonds_num_selected_tickers)

# Create data frame with the weights, the returns, the volatility, and the Sharpe Ratio for each asset class
crypto_simulations_data = [crypto_ret_arr, crypto_vol_arr, crypto_sharpe_ratio_arr, crypto_all_weights]
stocks_simulations_data = [stocks_ret_arr,  stocks_vol_arr,  stocks_sharpe_ratio_arr,  stocks_all_weights]
commodities_simulations_data = [commodities_ret_arr,  commodities_vol_arr,  commodities_sharpe_ratio_arr,  commodities_all_weights]
bonds_simulations_data = [bonds_ret_arr,  commodities_vol_arr,  commodities_sharpe_ratio_arr,  commodities_all_weights]

In [1097]:
#  # Create data frame with the weights, the returns, the volatility, and the Sharpe Ratio for each asset class
# crypto_simulations_data = [crypto_ret_arr, crypto_vol_arr, crypto_sharpe_ratio_arr, crypto_all_weights]
# stocks_simulations_data = [stocks_ret_arr,  stocks_vol_arr,  stocks_sharpe_ratio_arr,  stocks_all_weights]
# commodities_simulations_data = [commodities_ret_arr,  commodities_vol_arr,  commodities_sharpe_ratio_arr,  commodities_all_weights]
# bonds_simulations_data = [bonds_ret_arr, bonds_vol_arr, bonds_sharpe_ratio_arr, bonds_all_weights]


# Create a DataFrame from sim data and Transpose, so it will look like our original one.
crypto_simulations_df = pd.DataFrame(data=crypto_simulations_data).T
stocks_simulations_df = pd.DataFrame(data=stocks_simulations_data).T
commodities_simulations_df = pd.DataFrame(data=commodities_simulations_data).T
bonds_simulations_df = pd.DataFrame(data=bonds_simulations_data).T

In [1098]:
# # Create a DataFrame from sim data and Transpose, so will look like our original one.
# crypto_simulations_df = pd.DataFrame(data=crypto_simulations_data).T
# stocks_simulations_df = pd.DataFrame(data=stocks_simulations_data).T
# commodities_simulations_df = pd.DataFrame(data=commodities_simulations_data).T
# bonds_simulations_df = pd.DataFrame(data=bonds_simulations_data).T

In [1099]:
# Give the columns names for crypto
crypto_simulations_df.columns = [
    'Returns',
    'Volatility',
    'Sharpe Ratio',
    'Portfolio Weights'
]

# Make sure the data types are correct, we don't want our floats to be strings.
# Infer data types to convert columns with mixed data types to their appropriate data types.
crypto_simulations_df = crypto_simulations_df.infer_objects()


In [1100]:
crypto_simulations_df.head(10)

Unnamed: 0,Returns,Volatility,Sharpe Ratio,Portfolio Weights
0,0.629877,0.559258,1.126272,"[0.5275038241308848, 0.10221585440962987, 0.020017751721146396, 0.235277333377573, 0.11498523636076595]"
1,0.854718,0.887923,0.962604,"[0.00908885313205067, 0.3499960260674407, 0.19659696798771398, 0.23503053636101534, 0.20928761645177923]"
2,0.399843,0.743631,0.53769,"[0.02339116437414249, 0.009722012676309929, 0.3500324360705186, 0.3134984358737164, 0.30335595100531254]"
3,0.56347,0.695323,0.810372,"[0.31080981699939286, 0.05020052685578676, 0.1767346562531003, 0.2120340271267678, 0.25022097276495225]"
4,0.663261,1.137419,0.583128,"[0.3088441386673681, 0.21130452030016497, 0.4301680145725001, 0.04928016782085873, 0.0004031586391082078]"
5,0.540528,0.446708,1.210024,"[0.1461603074832507, 0.1340418033896009, 0.12144816311526749, 0.4031740717623949, 0.195175654249486]"
6,0.772891,0.556172,1.389662,"[0.21207056544323524, 0.31960752363993405, 0.044211621663244854, 0.3489014683388144, 0.07520882091477146]"
7,0.595296,1.046749,0.56871,"[0.2051466865884386, 0.015246929039003801, 0.2779738317351273, 0.10426029158148227, 0.397372261055948]"
8,0.858848,1.025193,0.837743,"[0.07654250140021376, 0.40561465679447517, 0.3020603764665769, 0.1761239917404191, 0.03965847359831509]"
9,0.85987,0.974205,0.882638,"[0.11124984758006186, 0.285491181862981, 0.18415499233283128, 0.15123554518269955, 0.26786843304142627]"


In [1101]:
# Find the Max Sharpe Ratio to find better portfoio that provides the largest risk-adjusted returns. 
crypto_max_sharpe_ratio = crypto_simulations_df.loc[crypto_simulations_df['Sharpe Ratio'].idxmax()]

# Find the minimum volatility from the simulations to identify a portfolio that takes on the least amount of risk.
crypto_min_volatility = crypto_simulations_df.loc[crypto_simulations_df['Volatility'].idxmin()]




crypto_max_sharpe_ratio_row = pd.Series({
    'Returns': crypto_max_sharpe_ratio['Returns'],
    'Volatility': crypto_max_sharpe_ratio['Volatility'],
    'Sharpe Ratio': crypto_max_sharpe_ratio['Sharpe Ratio'],
    'Portfolio Weights': crypto_max_sharpe_ratio['Portfolio Weights']
})

crypto_min_volatility_row = pd.Series({
    'Returns': crypto_min_volatility['Returns'],
    'Volatility': crypto_min_volatility['Volatility'],
    'Sharpe Ratio': crypto_min_volatility['Sharpe Ratio'],
    'Portfolio Weights': crypto_min_volatility['Portfolio Weights']
})

In [1102]:
crypto_max_sharpe_ratio_row

Returns                                                                                                            0.394408
Volatility                                                                                                         0.157897
Sharpe Ratio                                                                                                       2.497886
Portfolio Weights    [0.02474339772876649, 0.1656859350150329, 0.0321868576847935, 0.6947819291463575, 0.08260188042504957]
dtype: object

In [1103]:
crypto_min_volatility_row

Returns                                                                                                            0.394408
Volatility                                                                                                         0.157897
Sharpe Ratio                                                                                                       2.497886
Portfolio Weights    [0.02474339772876649, 0.1656859350150329, 0.0321868576847935, 0.6947819291463575, 0.08260188042504957]
dtype: object

In [1104]:

def create_scatter(simulations_df, max_sharpe_ratio, min_volatility):
     """
    Plot the simulated portfolios on a Scatter plot.

    Args:
    simulations_df: The expected return of the asset.
    max_sharpe_ratio (float): The maximum sharpe ratio portfolio of the asset.
    min_volatility (float): The minimum volatility portfolio of the asset.
    
    Returns:
    plot: The plot of simulated portfolios
    """
    
plt.scatter(
    y=simulations_df['Returns'],
    x=simulations_df['Volatility'],
    c=simulations_df['Sharpe Ratio'],
    cmap='RdYlBu'
)

# Give the Plot labels, and titles.
plt.title('Portfolio Returns Vs. Risk for {}')
plt.colorbar(label='Sharpe Ratio')
plt.xlabel('Standard Deviation')
plt.ylabel('Returns')

# Plot the Max Sharpe Ratio, using a `Red Star`.
plt.scatter(
    max_sharpe_ratio[1],
    max_sharpe_ratio[0],
    marker=(5, 1, 0),
    color='r',
    s=600
)

# Plot the Min Volatility, using a `Blue Star`.
plt.scatter(
    min_volatility[1],
    min_volatility[0],
    marker=(5, 1, 0),
    color='b',
    s=600
)

# Show the plot.
return plt.show()

NameError: name 'simulations_df' is not defined

In [883]:
crypto_sim_plot = create_scatter(crypto_simulations_df, crypto_max_sharpe_ratio, crypto_min_volatility )

In [54]:
# Monte Carlo simulation
def mc_sim(num_of_portfolios, returns, num_selected_tickers):
    """
    Initializes the Monte Carlo simulation.
    Initializes the arrays used in the Monte Carlo simulation.
    """
    # Declare an array to store the weights for each selected asset for each asset class
    all_weights = np.zeros((num_of_portfolios, num_selected_tickers))
    
    # Define arrays to store the returns, volatilities, and Sharpe ratios for each asset class
    ret_arr = np.zeros(num_of_portfolios)
    vol_arr = np.zeros(num_of_portfolios)
    sharpe_ratio_arr = np.zeros(num_of_portfolios)
    
    # Run the Monte Carlo simulation
    for ind in range(num_of_portfolios):
        
        # Calculate the weights.
        weights = np.array(np.random.random(num_selected_tickers))
        weights = weights / np.sum(weights)

        # Add the weights, add to the `weights_arrays`.
        all_weights[ind, :] = weights

        # Calculate expected log returns, add to the `returns_array`.
        # if ret_arr is crypto_ret_arr:
        #     ret_arr[ind] = np.sum((returns.mean() * weights) * CRYPTO_TRADING_DAYS)
        # else: 
        #     ret_arr[ind] = np.sum((returns.mean() * weights) * ETFS_TRADING_DAYS)

        # Calculate the volatility and add it to the volatility_array. 
        # Apply square root to calculate how each part of the portfolio contributes to the whole portfolio.
        # Take correlation between assets into consideration. 
#         if vol_arr is crypto_vol_arr:
#             vol_arr[ind] = np.dot(weights.T, np.dot(returns.cov() * CRYPTO_TRADING_DAYS, weights))
#         else:
#             vol_arr[ind] = np.dot(weights.T, np.dot(returns.cov() * ETFS_TRADING_DAYS, weights))

#         sharpe_ratio_arr[ind] = ret_arr[ind] / vol_arr[ind]
    
    # return all_weights, ret_arr, vol_arr, sharpe_ratio_arr
    return all_weights

In [55]:
mc_sim(num_of_portfolios,crypto_results,len(crypto_results.columns))

array([[0.00488305, 0.25689092, 0.24101648, 0.49373403, 0.00347552],
       [0.04608826, 0.26296286, 0.27386544, 0.03769482, 0.37938861],
       [0.07096909, 0.15627075, 0.16071097, 0.28882179, 0.32322739],
       ...,
       [0.02549348, 0.03406787, 0.25483701, 0.09237253, 0.59322911],
       [0.29843931, 0.27564481, 0.2609915 , 0.05507633, 0.10984804],
       [0.1898564 , 0.19483502, 0.24092683, 0.15371993, 0.22066182]])

In [56]:
len(crypto_results.columns)

5

In [57]:
crypto_portfolio = df_crypto_data_pivot.loc[best_crypto_ticekrs].T @ weights_crypto
stocks_portfolio = df_stocks_data_pivot.loc[best_stocks_ticekrs].T @ weights_stocks
commodities_portfolio = df_ccommodities_data_pivot.loc[best_commodities_ticekrs].T @ weights_commodities
bonds_portfolio = df_cbonds_data_pivot.loc[best_bonds_ticekrs].T @ weights_bonds

NameError: name 'df_crypto_data_pivot' is not defined

In [None]:
weights_crypto = []
weights_stocks = []
weights_commodities = []
weights_bonds = []

In [None]:
# def calculate_log_returns(crypto_price_df, stocks_price_df, commodities_price_df):
#     """
#     Calculate the log returns for each asset class dataframe.

#     Args:
#         crypto_price_df (DataFrame): DataFrame of cryptocurrency prices.
#         stocks_price_df (DataFrame): DataFrame of stock prices.
#         commodities_price_df (DataFrame): DataFrame of commodity prices.

#     Returns:
#         tuple: A tuple of pandas.DataFrame objects containing the log returns for each asset class.
#     """

#     crypto_log_returns = np.log(1 + crypto_price_df.pct_change())
#     stocks_log_returns = np.log(1 + stocks_price_df.pct_change())
#     commodities_log_returns = np.log(1 + commodities_price_df.pct_change())
#     return crypto_log_returns, stocks_log_returns, commodities_log_returns

In [None]:
def create_price_df(data: dict, api_pull: dict):
    """Creates a dataframe for each asset class with the 'Close' column
    
    Args:
    data (dict): a dictionary containing asset class names as keys and their respective price data as Pandas dataframes
    api_pull (dict): a dictionary containing selected asset classes and their respective tickers
    
    Returns:
    DataFrame:  DataFrames for asset classes and their respective price dataframes with the 'Close' column
    """
    crypto_price_df = pd.DataFrame({ticker: data['crypto'][ticker]['Close'] for ticker in api_pull['crypto']})
    stocks_price_df = pd.DataFrame({ticker: data['stocks'][ticker]['Close'] for ticker in api_pull['stocks']})
    commodities_price_df = pd.DataFrame({ticker: data['commodities'][ticker]['Close'] for ticker in api_pull['commodities']})
    
    return crypto_price_df,stocks_price_df,commodities_price_df

In [None]:
# def calculate_log_returns(crypto_price_df, stocks_price_df, commodities_price_df):
#     """
#     Calculate the log returns for each asset class dataframe.

#     Args:
#         crypto_price_df (DataFrame): DataFrame of cryptocurrency prices.
#         stocks_price_df (DataFrame): DataFrame of stock prices.
#         commodities_price_df (DataFrame): DataFrame of commodity prices.

#     Returns:
#         tuple: A tuple of pandas.DataFrame objects containing the log returns for each asset class.
#     """

#     crypto_log_returns = np.log(1 + crypto_price_df.pct_change())
#     stocks_log_returns = np.log(1 + stocks_price_df.pct_change())
#     commodities_log_returns = np.log(1 + commodities_price_df.pct_change())
#     return crypto_log_returns, stocks_log_returns, commodities_log_returns

In [None]:
# Call function to calculate expected returns for each asset class and annualize them.
        # Annualize crypto 365, stock and commodities 252 days
        crypto_expected_returns = calculate_expected_returns(crypto_log_returns, weights_crypto)
        stocks_expected_returns = calculate_expected_returns(stocks_log_returns, weights_stocks)
        commodities_expected_returns = calculate_expected_returns(commodities_log_returns, weights_commodities)
        
        
        weights_crypto = []
weights_stocks = []
weights_commodities = []
weights_bonds = []

In [None]:
def calculate_expected_returns(log_returns, weights):
    """
    Calculate expected returns for an asset class and annualize them

    param log_returns: numpy array of log returns for an asset class
    param rebalanced_wts: numpy array of weights for each asset in the asset class

    Crypto is annnualized by 365 trading days
    Stocks and commodities are anualized by 252 trading days

    Returns: expected returns for the asset class
    
    """
    if log_returns is crypto_log_returns and weights is weights_crypto:
        trading_days = CRYPTO_TRADING_DAYS
    else:
        trading_days = ETFS_TRADING_DAYS

    expected_returns = np.sum((log_returns.mean() * weights) * trading_days)

    return expected_returns

In [None]:
# Call function to calculate expected returns for each asset class and annualize them.
        # Annualize crypto 365, stock and commodities 252 days
        crypto_expected_returns = calculate_expected_returns(crypto_log_returns, weights_crypto)
        stocks_expected_returns = calculate_expected_returns(stocks_log_returns, weights_stocks)
        commodities_expected_returns = calculate_expected_returns(commodities_log_returns, weights_commodities)

In [None]:
def calculate_expected_volatility(log_returns, rebalanced_wts):
    """
    Calculates the expected volatility based on the logarithmic returns and rebalanced weights.

    Parameters:
    log_returns (pandas.DataFrame): A DataFrame containing the logarithmic returns for the asset.
    rebalanced_wts (numpy.ndarray): An array containing the rebalanced weights for the asset.

    Returns:
    expected_volatility (float): The expected volatility for the asset.
    """

    if log_returns is crypto_log_returns and weights is weights_crypto:
        trading_days = CRYPTO_TRADING_DAYS
    else:
        trading_days = ETFS_TRADING_DAYS

    expected_volatility = np.sqrt(np.dot(weights.T,
                             np.dot(log_returns.cov() * trading_days, weights)))

    return expected_volatility

In [None]:
# Call function to calculate expected volatility for each asset
        crypto_expected_volatility = calculate_expected_volatility(crypto_log_returns, weights_crypto)
        stocks_expected_volatility = calculate_expected_volatility(stocks_log_returns, weights_stocks)
        commodities_expected_volatility = calculate_expected_volatility(commodities_log_returns, weights_commodities)

In [None]:
ef calculate_sharpe_ratio(expected_returns, expected_volatility):
    """
    Calculates the Sharpe Ratio given expected returns and volatility.

    Args:
    expected_returns (float): The expected return of the asset.
    expected_volatility (float): The expected volatility of the asset.

    Returns:
    float: The Sharpe Ratio of the asset.
    """
    sharpe_ratio = expected_returns / expected_volatility
    return sharpe_ratio

In [None]:
 # Call function to calculate the Sharpe Ratio for each asset class using the calculate_sharpe_ratio function
        crypto_sharpe_ratio = calculate_sharpe_ratio(crypto_expected_returns, crypto_expected_volatility)
        stocks_sharpe_ratio = calculate_sharpe_ratio(stocks_expected_returns, stocks_expected_volatility)
        commodities_sharpe_ratio = calculate_sharpe_ratio(commodities_expected_returns, commodities_expected_volatility)

In [254]:
# Monte Carlo simulation
def mc_sim(num_of_portfolios, returns, num_selected_tickers):
    """
    Initializes the Monte Carlo simulation.
    Initializes the arrays used in the Monte Carlo simulation.
    """
    # Declare an array to store the weights for each selected asset for each asset class
    all_weights = np.zeros((num_of_portfolios, num_selected_tickers))
    
    # Define arrays to store the returns, volatilities, and Sharpe ratios for each asset class
    ret_arr = np.zeros(num_of_portfolios)
    vol_arr = np.zeros(num_of_portfolios)
    sharpe_ratio_arr = np.zeros(num_of_portfolios)
    
    # Run the Monte Carlo simulation
    for ind in range(num_of_portfolios):
        
        # Calculate the weights.
        # weights = np.array(np.random.random(num_selected_tickers))
        weights = weights / np.sum(weights)

        # Add the weights, add to the `weights_arrays`.
        all_weights[ind, :] = weights

        # Calculate expected log returns, add to the `returns_array`.
        if ret_arr is crypto_ret_arr:
            ret_arr[ind] = np.sum((returns.mean() * weights) * CRYPTO_TRADING_DAYS)
        else: 
            ret_arr[ind] = np.sum((returns.mean() * weights) * STOCKS_COMMODITIES_TRADING_DAYS)

        # Calculate the volatility and add it to the volatility_array. 
        # Apply square root to calculate how each part of the portfolio contributes to the whole portfolio.
        # # Take correlation between assets into consideration. 
        # if vol_arr is crypto_vol_arr:
        #     vol_arr[ind] = np.dot(weights.T, np.dot(log_returns.cov() * CRYPTO_TRADING_DAYS, weights))
        # else:
        #     vol_arr[ind] = np.dot(weights.T, np.dot(log_returns.cov() * STOCKS_COMMODITIES_TRADING_DAYS, weights))

        # sharpe_ratio_arr[ind] = ret_arr[ind] / vol_arr[ind]
    
    return all_weights, ret_arr, vol_arr, sharpe_ratio_arr

In [None]:
# Monte Carlo simulation
def mc_sim(num_of_portfolios, returns, num_selected_tickers):
    """
    Initializes the Monte Carlo simulation.
    Initializes the arrays used in the Monte Carlo simulation.
    """
    # Declare an array to store the weights for each selected asset for each asset class
    all_weights = np.zeros((num_of_portfolios, num_selected_tickers))
    
    # Define arrays to store the returns, volatilities, and Sharpe ratios for each asset class
    ret_arr = np.zeros(num_of_portfolios)
    vol_arr = np.zeros(num_of_portfolios)
    sharpe_ratio_arr = np.zeros(num_of_portfolios)
    
    # Run the Monte Carlo simulation
    for ind in range(num_of_portfolios):
        
        # Calculate the weights.
        # weights = np.array(np.random.random(num_selected_tickers))
        weights = weights / np.sum(weights)

        # Add the weights, add to the `weights_arrays`.
        all_weights[ind, :] = weights

        # Calculate expected log returns, add to the `returns_array`.
        if ret_arr is crypto_ret_arr:
            ret_arr[ind] = np.sum((returns.mean() * weights) * CRYPTO_TRADING_DAYS)
        else: 
            ret_arr[ind] = np.sum((returns.mean() * weights) * STOCKS_COMMODITIES_TRADING_DAYS)

        # Calculate the volatility and add it to the volatility_array. 
        # Apply square root to calculate how each part of the portfolio contributes to the whole portfolio.
        # Take correlation between assets into consideration. 
        if vol_arr is crypto_vol_arr:
            vol_arr[ind] = np.dot(weights.T, np.dot(log_returns.cov() * CRYPTO_TRADING_DAYS, weights))
        else:
            vol_arr[ind] = np.dot(weights.T, np.dot(log_returns.cov() * STOCKS_COMMODITIES_TRADING_DAYS, weights))

        sharpe_ratio_arr[ind] = ret_arr[ind] / vol_arr[ind]
    
    return all_weights, ret_arr, vol_arr, sharpe_ratio_arr