In [None]:
!pip install yfinance




In [None]:
import yfinance as yf
import pandas as pd
import time

# Stock list
stocks = [
    'AAPL', 'MSFT', 'NVDA', 'JPM', 'XOM',
    'JNJ', 'PG', 'KO', 'AMZN', 'TSLA', '^GSPC'
]

# Download last 5 years data
data = yf.download(
    stocks,
    period='5y',
    interval='1d',
    auto_adjust=True
)

# Take only Adjusted Close prices
prices = data['Close']

print(prices.head())


[*********************100%***********************]  11 of 11 completed

Ticker            AAPL        AMZN         JNJ         JPM         KO  \
Date                                                                    
2021-01-29  128.456757  160.309998  141.529755  113.310226  41.425762   
2021-02-01  130.578918  167.143997  141.165344  114.146812  41.709667   
2021-02-02  131.406342  169.000000  139.898682  117.660515  42.122631   
2021-02-03  130.384216  165.626495  139.248016  119.007866  41.959171   
2021-02-04  133.742599  166.550003  140.540726  121.746620  42.165646   

Ticker            MSFT       NVDA          PG        TSLA        XOM  \
Date                                                                   
2021-01-29  222.574844  12.953324  113.013580  264.510010  36.648190   
2021-02-01  229.953674  13.199879  113.683479  279.936676  36.713573   
2021-02-02  229.819366  13.518733  113.524818  290.929993  37.293861   
2021-02-03  233.168152  13.492558  113.665848  284.896667  38.756847   
2021-02-04  232.218201  13.625933  113.736359  283.32998




In [None]:
prices.reset_index(inplace=True)
prices.to_csv("stock_prices.csv", index=False)


In [None]:
def fetch_data(tickers, period='5y', retries=3):
    for attempt in range(retries):
        try:
            data = yf.download(
                tickers,
                period=period,
                interval='1d',
                auto_adjust=True,   # Handles splits & dividends
                group_by='ticker'
            )
            print("Data downloaded successfully")
            return data
        except Exception as e:
            print(f" Attempt {attempt+1} failed. Retrying...")
            time.sleep(5)
    raise Exception(" Failed to download data after retries")


In [None]:
raw_data = fetch_data(stocks)


[*********************100%***********************]  11 of 11 completed

Data downloaded successfully





In [None]:
prices = pd.DataFrame()

for stock in stocks:
    prices[stock] = raw_data[stock]['Close']


In [None]:
# Remove rows where all prices are missing
prices.dropna(how='all', inplace=True)

# Forward fill missing values (market holidays etc.)
prices.fillna(method='ffill', inplace=True)



  prices.fillna(method='ffill', inplace=True)


In [None]:
daily_returns = prices.pct_change().dropna()


In [None]:
prices.to_csv("alphapulse_clean_prices.csv")
daily_returns.to_csv("alphapulse_daily_returns.csv")

print(" Clean datasets saved successfully")


 Clean datasets saved successfully


**Week 2: Quantitative Analysis**

In [None]:
import pandas as pd
import numpy as np


In [None]:
prices = pd.read_csv("stock_prices.csv")
prices.head()


Unnamed: 0,Date,AAPL,AMZN,JNJ,JPM,KO,MSFT,NVDA,PG,TSLA,XOM,^GSPC
0,2021-01-29,128.456757,160.309998,141.52977,113.310219,41.425755,222.574844,12.953323,113.013565,264.51001,36.648186,3714.23999
1,2021-02-01,130.578857,167.143997,141.16539,114.146805,41.709671,229.95369,13.199879,113.683479,279.936676,36.71357,3773.860107
2,2021-02-02,131.406311,169.0,139.898682,117.66053,42.122639,229.819366,13.518734,113.524818,290.929993,37.293858,3826.310059
3,2021-02-03,130.384201,165.626495,139.248032,119.007881,41.959167,233.168167,13.492556,113.665855,284.896667,38.756844,3830.169922
4,2021-02-04,133.742615,166.550003,140.54071,121.746628,42.165657,232.218216,13.625934,113.736366,283.329987,39.500603,3871.73999


In [None]:
tickers = [
    'AAPL','MSFT','GOOGL','AMZN','TSLA',
    'JPM','JNJ','NVDA','META','WMT'
]


In [None]:
valid_tickers=prices.columns.intersection(tickers)

In [None]:
log_returns = np.log(
    prices[valid_tickers] / prices[valid_tickers].shift(1)
)

log_returns = log_returns.dropna()
log_returns.head()


Unnamed: 0,AAPL,AMZN,JNJ,JPM,MSFT,NVDA,TSLA
1,0.016385,0.041746,-0.002578,0.007356,0.032615,0.018855,0.056684
2,0.006317,0.011043,-0.009014,0.030318,-0.000584,0.023869,0.038519
3,-0.007809,-0.020163,-0.004662,0.011386,0.014466,-0.001938,-0.020956
4,0.025432,0.00556,0.00924,0.022752,-0.004082,0.009837,-0.005514
5,-0.003103,0.006329,0.015072,-0.001955,0.000785,-0.005375,0.002632


In [None]:
log_returns.insert(
    0,
    "Date",
    prices["Date"].iloc[1:].values
)


In [None]:
log_returns.to_csv("log_returns.csv", index=False)
print("log_returns.csv saved")


log_returns.csv saved


In [None]:
correlation_matrix = log_returns.corr(numeric_only=True)
correlation_matrix

Unnamed: 0,AAPL,AMZN,JNJ,JPM,MSFT,NVDA,TSLA
AAPL,1.0,0.557487,0.152634,0.357415,0.629119,0.520487,0.498301
AMZN,0.557487,1.0,0.010545,0.351866,0.655935,0.558867,0.447946
JNJ,0.152634,0.010545,1.0,0.193949,0.068271,-0.096401,-0.022675
JPM,0.357415,0.351866,0.193949,1.0,0.315454,0.318602,0.308425
MSFT,0.629119,0.655935,0.068271,0.315454,1.0,0.629428,0.421975
NVDA,0.520487,0.558867,-0.096401,0.318602,0.629428,1.0,0.477156
TSLA,0.498301,0.447946,-0.022675,0.308425,0.421975,0.477156,1.0


In [None]:
log_returns.index = prices["Date"].iloc[1:].values
log_returns.head()


Unnamed: 0,Date,AAPL,AMZN,JNJ,JPM,MSFT,NVDA,TSLA
2021-02-01,2021-02-01,0.016385,0.041746,-0.002578,0.007356,0.032615,0.018855,0.056684
2021-02-02,2021-02-02,0.006317,0.011043,-0.009014,0.030318,-0.000584,0.023869,0.038519
2021-02-03,2021-02-03,-0.007809,-0.020163,-0.004662,0.011386,0.014466,-0.001938,-0.020956
2021-02-04,2021-02-04,0.025432,0.00556,0.00924,0.022752,-0.004082,0.009837,-0.005514
2021-02-05,2021-02-05,-0.003103,0.006329,0.015072,-0.001955,0.000785,-0.005375,0.002632


In [None]:
correlation_matrix.to_csv("correlation_matrix.csv")
print("correlation_matrix.csv saved")


correlation_matrix.csv saved


In [None]:
log_returns_mc = log_returns[valid_tickers].astype(float)

mean_returns = log_returns_mc.mean().values
cov_matrix = log_returns_mc.cov().values

# Make covariance matrix stable
cov_matrix = cov_matrix + np.eye(cov_matrix.shape[0]) * 1e-10


In [None]:
log_returns_numeric = log_returns.select_dtypes(include=[np.number])

In [None]:
mean_returns = log_returns_numeric.mean().values
cov_matrix = log_returns_numeric.cov().values

In [None]:
num_assets = len(log_returns_numeric.columns)
weights = np.array([1/num_assets] * num_assets)

In [None]:
num_simulations = 10000  # or any number you want
time_horizon = 252        # 1 year
initial_value = 100000   # starting portfolio value

num_assets = len(log_returns_numeric.columns)
weights = np.array([1/num_assets] * num_assets)

simulated_portfolios = np.zeros(num_simulations)


In [None]:
for i in range(num_simulations):
  random_returns = np.random.multivariate_normal(
        mean_returns,
        cov_matrix,
        time_horizon
    )
portfolio_returns = random_returns @ weights
simulated_portfolios[i] = initial_value * np.exp(portfolio_returns.sum())

print("Simulation complete!")

Simulation complete!


In [None]:
mc_df = pd.DataFrame({
    "Simulation": range(1, num_simulations + 1),
    "PortfolioValue": simulated_portfolios
})

mc_df.to_csv("monte_carlo_simulation.csv", index=False)
print("monte_carlo_simulation.csv saved")


monte_carlo_simulation.csv saved


In [None]:
VaR_95 = np.percentile(simulated_portfolios, 5)

VaR_df = pd.DataFrame({
    "Confidence_Level": ["95%"],
    "Value_at_Risk": [VaR_95]
})

VaR_df.to_csv("VaR_summary.csv", index=False)
print("VaR_summary.csv saved")


VaR_summary.csv saved


In [None]:
VaR_df.head()


Unnamed: 0,Confidence_Level,Value_at_Risk
0,95%,0.0


In [None]:
from scipy.stats import skew, kurtosis

print("Skewness:", skew(simulated_portfolios))
print("Kurtosis:", kurtosis(simulated_portfolios))


Skewness: 99.98499937495626
Kurtosis: 9995.000100010004


In [None]:
from google.colab import files

files.download("stock_prices.csv")
files.download("log_returns.csv")
files.download("correlation_matrix.csv")
files.download("monte_carlo_simulation.csv")
files.download("VaR_summary.csv")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>