In [16]:
import numpy as np
import pandas as pd
import yfinance as yf

# List of stock symbols
stocks = {
    'Technology': ['AAPL', 'MSFT', 'GOOGL'],
    'Finance': ['JPM', 'BAC', 'GS'],
    'Healthcare': ['JNJ', 'PFE', 'MRK'],
    'Consumer Discretionary': ['AMZN', 'TSLA', 'DIS'],
    'Energy': ['XOM', 'CVX', 'COP'],
}


# Define date range for historical data
start_date = "2020-01-01"
end_date = "2022-01-01"

# Download historical price data
data = {}
for sector, symbols in stocks.items():
    sector_data = pd.DataFrame()
    for symbol in symbols:
        stock = yf.download(symbol, start=start_date, end=end_date,keepna=False)
        returns = stock['Adj Close'].pct_change().dropna()  # Calculate daily returns
        sector_data[symbol] = returns
    data[sector] = sector_data

# Calculate mean and variance of daily returns
mu_hat = {}
v_hat = {}
for sector, sector_data in data.items():
    mu_hat[sector] = sector_data.mean()
    v_hat[sector] = sector_data.var()

# Portfolio optimization
b = 0.001  # Expected return (assumed to be constant for simplicity)
w_b = {}
for sector, sector_data in data.items():
    cov_matrix = sector_data.cov()  # Covariance matrix
    ones_vector = np.ones(len(stocks[sector]))  # Vector of ones
    inv_cov_matrix = np.linalg.inv(cov_matrix)

    w_b[sector] = np.dot(inv_cov_matrix, mu_hat[sector] - b * ones_vector) / np.sum(np.dot(inv_cov_matrix, ones_vector))


# Simulate portfolio performance and calculate mean squared error
months_to_simulate = [1, 3, 6]
results = {}
for month in months_to_simulate:
    results[month] = []
    for sector, sector_data in data.items():
        try:
            port_returns = np.dot(sector_data.values, w_b[sector])  # Portfolio returns
            overall_port_returns = np.dot(port_returns, np.ones(len(port_returns)))  # Overall portfolio return
            mse = np.mean((overall_port_returns - mu_hat[sector]) ** 2)  # Mean squared error
            results[month].append(mse)
        except ValueError as e:
            print(f"Error processing {sector}: {e}")

# Compare results with initial return
initial_mse = {}
for sector, sector_data in data.items():
    try:
        initial_returns = np.dot(sector_data.iloc[0], w_b[sector])  # Initial portfolio returns
        initial_mse[sector] = (initial_returns - mu_hat[sector][0]) ** 2
    except ValueError as e:
        print(f"Error processing {sector}: {e}")

# Print or analyze the results as needed
print("Results for 1 month:", results[1])
print("Results for 3 months:", results[3])
print("Results for 6 months:", results[6])
print("Initial MSE:", initial_mse)

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


1 Failed download:
['BAC']: Exception('%ticker%: No price data found, symbol may be delisted (1d 2020-01-01 -> 2022-01-01)')



[*********************100%%**********************]  1 of 1 completed


1 Failed download:
['GS']: Exception('%ticker%: No price data found, symbol may be delisted (1d 2020-01-01 -> 2022-01-01)')



[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


1 Failed download:
['DIS']: Exception('%ticker%: No price data found, symbol may be delisted (1d 2020-01-01 -> 2022-01-01)')



[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


1 Failed download:
['CVX']: Exception('%ticker%: No price data found, symbol may be delisted (1d 2020-01-01 -> 2022-01-01)')



[*********************100%%**********************]  1 of 1 completed
Results for 1 month: [1.1378060733805143e-06, nan, 3.498110281684707e-07, nan, nan]
Results for 3 months: [1.1378060733805143e-06, nan, 3.498110281684707e-07, nan, nan]
Results for 6 months: [1.1378060733805143e-06, nan, 3.498110281684707e-07, nan, nan]
Initial MSE: {'Technology': 4.089287512343273e-06, 'Finance': nan, 'Healthcare': 2.719071834956343e-07, 'Consumer Discretionary': nan, 'Energy': nan}


In [5]:
mu_hat

{'Pharma': MEDPLUS.NS   -0.012024
 DRREDDY.NS    0.007464
 dtype: float64,
 'Banking': PNB.NS         0.000607
 ANGELONE.BO    0.005829
 dtype: float64,
 'Technology': INFY.NS     0.001802
 WIPRO.NS    0.002641
 dtype: float64,
 'Agriculture': RALLIS.NS    0.000130
 IPL.BO      -0.000537
 dtype: float64,
 'Energy': BPCL.NS    0.000725
 IOC.BO     0.000954
 dtype: float64}

In [6]:
v_hat

{'Pharma': MEDPLUS.NS    0.002061
 DRREDDY.NS    0.000167
 dtype: float64,
 'Banking': PNB.NS         0.000656
 ANGELONE.BO    0.001532
 dtype: float64,
 'Technology': INFY.NS     0.000173
 WIPRO.NS    0.000311
 dtype: float64,
 'Agriculture': RALLIS.NS    0.000348
 IPL.BO       0.000369
 dtype: float64,
 'Energy': BPCL.NS    0.000271
 IOC.BO     0.000304
 dtype: float64}

In [7]:
w_b

{'Pharma': array([-0.00185841,  0.00860429]),
 'Banking': array([-0.00063997,  0.00166974]),
 'Technology': array([8.01419459e-05, 8.50824419e-04]),
 'Agriculture': array([-0.00029632, -0.00089243]),
 'Energy': array([-0.00041186,  0.0002303 ])}

In [8]:
initial_mse

{'Pharma': 0.00014391055712466668,
 'Banking': 3.4186670854179626e-07,
 'Technology': 3.1744385770120902e-06,
 'Agriculture': nan,
 'Energy': 5.38411228382243e-07}

In [9]:
for month in months_to_simulate:
    print(f"Results for {month} month:")
    for sector, mse in zip(stocks.keys(), results[month]):
        print(f"{sector}: {mse:.8f}")

Results for 1 month:
Pharma: 0.00010279
Banking: 0.00000765
Technology: 0.00000283
Agriculture: nan
Energy: 0.00000075
Results for 3 month:
Pharma: 0.00010279
Banking: 0.00000765
Technology: 0.00000283
Agriculture: nan
Energy: 0.00000075
Results for 6 month:
Pharma: 0.00010279
Banking: 0.00000765
Technology: 0.00000283
Agriculture: nan
Energy: 0.00000075


In [10]:
# Simulate portfolio performance and calculate mean squared error
months_to_simulate = [1, 3, 6]
results = {}
for month in months_to_simulate:
    results[month] = []
    for sector, sector_data in data.items():
        try:
            port_returns = np.dot(sector_data.values, w_b[sector])  # Portfolio returns
            overall_port_returns = np.dot(port_returns, np.ones(len(port_returns)))  # Overall portfolio return
            mse = np.mean((overall_port_returns - mu_hat[sector]) ** 2)  # Mean squared error
            results[month].append(mse)
        except ValueError as e:
            print(f"Error processing {sector}: {e}")

In [13]:
import requests
import io
url="https://pkgstore.datahub.io/core/nasdaq-listings/nasdaq-listed_csv/data/7665719fb51081ba0bd834fde71ce822/nasdaq-listed_csv.csv"
s = requests.get(url).content
companies = pd.read_csv(io.StringIO(s.decode('utf-8')))

In [16]:
companies.to_csv('nasdaq-listed.csv', index=False)