In [1]:
!pip install yfinance




In [2]:
import yfinance as yf
import pandas as pd
import time

# Stock list
stocks = [
    'AAPL', 'MSFT', 'NVDA', 'JPM', 'XOM',
    'JNJ', 'PG', 'KO', 'AMZN', 'TSLA', '^GSPC'
]

# Download last 5 years data
data = yf.download(
    stocks,
    period='5y',
    interval='1d',
    auto_adjust=True
)

# Take only Adjusted Close prices
prices = data['Close']

print(prices.head())


[*********************100%***********************]  11 of 11 completed

Ticker            AAPL        AMZN         JNJ         JPM         KO  \
Date                                                                    
2021-02-16  129.726471  163.447495  143.212875  127.382652  43.249702   
2021-02-17  127.437561  165.432007  143.724792  127.778954  43.129246   
2021-02-18  126.336975  166.411499  143.794174  128.210464  43.679867   
2021-02-19  126.492775  162.494995  141.399628  130.350357  43.112041   
2021-02-22  122.723442  159.037003  141.312347  131.574432  43.559418   

Ticker            MSFT       NVDA          PG        TSLA        XOM  \
Date                                                                   
2021-02-16  233.839844  15.287260  112.757935  265.406677  42.968716   
2021-02-17  234.859268  14.864197  113.233948  266.049988  43.637531   
2021-02-18  234.464966  14.787413  114.177101  262.459991  42.952209   
2021-02-19  231.752808  14.884641  112.052773  260.433319  43.241196   
2021-02-22  225.539917  14.315493  111.576752  238.16667




In [3]:
prices.reset_index(inplace=True)
prices.to_csv("stock_prices.csv", index=False)


In [4]:
def fetch_data(tickers, period='5y', retries=3):
    for attempt in range(retries):
        try:
            data = yf.download(
                tickers,
                period=period,
                interval='1d',
                auto_adjust=True,   # Handles splits & dividends
                group_by='ticker'
            )
            print("Data downloaded successfully")
            return data
        except Exception as e:
            print(f" Attempt {attempt+1} failed. Retrying...")
            time.sleep(5)
    raise Exception(" Failed to download data after retries")


In [5]:
raw_data = fetch_data(stocks)


[*********************100%***********************]  11 of 11 completed

Data downloaded successfully





In [6]:
prices = pd.DataFrame()

for stock in stocks:
    prices[stock] = raw_data[stock]['Close']


In [7]:
# Remove rows where all prices are missing
prices.dropna(how='all', inplace=True)

# Forward fill missing values (market holidays etc.)
prices.fillna(method='ffill', inplace=True)



  prices.fillna(method='ffill', inplace=True)


In [8]:
daily_returns = prices.pct_change().dropna()


In [9]:
prices.to_csv("alphapulse_clean_prices.csv")
daily_returns.to_csv("alphapulse_daily_returns.csv")

print(" Clean datasets saved successfully")


 Clean datasets saved successfully


**Week 2: Quantitative Analysis**

In [10]:
import pandas as pd
import numpy as np


In [11]:
prices = pd.read_csv("stock_prices.csv")
prices.head()


Unnamed: 0,Date,AAPL,AMZN,JNJ,JPM,KO,MSFT,NVDA,PG,TSLA,XOM,^GSPC
0,2021-02-16,129.726471,163.447495,143.212875,127.382652,43.249702,233.839844,15.28726,112.757935,265.406677,42.968716,3932.590088
1,2021-02-17,127.437561,165.432007,143.724792,127.778954,43.129246,234.859268,14.864197,113.233948,266.049988,43.637531,3931.330078
2,2021-02-18,126.336975,166.411499,143.794174,128.210464,43.679867,234.464966,14.787413,114.177101,262.459991,42.952209,3913.969971
3,2021-02-19,126.492775,162.494995,141.399628,130.350357,43.112041,231.752808,14.884641,112.052773,260.433319,43.241196,3906.709961
4,2021-02-22,122.723442,159.037003,141.312347,131.574432,43.559418,225.539917,14.315493,111.576752,238.166672,44.83477,3876.5


In [12]:
tickers = [
    'AAPL','MSFT','GOOGL','AMZN','TSLA',
    'JPM','JNJ','NVDA','META','WMT'
]


In [13]:
valid_tickers=prices.columns.intersection(tickers)

In [14]:
log_returns = np.log(
    prices[valid_tickers] / prices[valid_tickers].shift(1)
)

log_returns = log_returns.dropna()
log_returns.head()


Unnamed: 0,AAPL,AMZN,JNJ,JPM,MSFT,NVDA,TSLA
1,-0.017802,0.012068,0.003568,0.003106,0.00435,-0.028064,0.002421
2,-0.008674,0.005903,0.000483,0.003371,-0.00168,-0.005179,-0.013586
3,0.001232,-0.023816,-0.016793,0.016553,-0.011635,0.006554,-0.007752
4,-0.030252,-0.02151,-0.000617,0.009347,-0.027174,-0.038988,-0.089376
5,-0.001112,0.004317,-0.008874,0.007999,-0.005302,-0.015002,-0.022161


In [15]:
log_returns.insert(
    0,
    "Date",
    prices["Date"].iloc[1:].values
)


In [16]:
log_returns.to_csv("log_returns.csv", index=False)
print("log_returns.csv saved")


log_returns.csv saved


In [17]:
correlation_matrix = log_returns.corr(numeric_only=True)
correlation_matrix

Unnamed: 0,AAPL,AMZN,JNJ,JPM,MSFT,NVDA,TSLA
AAPL,1.0,0.551656,0.151475,0.359523,0.607469,0.51642,0.492858
AMZN,0.551656,1.0,0.005027,0.347141,0.641254,0.553637,0.442879
JNJ,0.151475,0.005027,1.0,0.193362,0.061314,-0.096563,-0.023523
JPM,0.359523,0.347141,0.193362,1.0,0.307122,0.318259,0.307797
MSFT,0.607469,0.641254,0.061314,0.307122,1.0,0.619481,0.417531
NVDA,0.51642,0.553637,-0.096563,0.318259,0.619481,1.0,0.479821
TSLA,0.492858,0.442879,-0.023523,0.307797,0.417531,0.479821,1.0


In [19]:
log_returns_numeric = log_returns.drop(columns=['Date'])

corr = log_returns_numeric.corr()


In [20]:
corr = log_returns.select_dtypes(include='number').corr()


In [21]:
corr_long = corr.reset_index().melt(id_vars='index')
corr_long.columns = ['Stock 1', 'Stock 2', 'Correlation']

corr_long.to_csv("correlation_matrix.csv", index=False)


In [22]:
log_returns.index = prices["Date"].iloc[1:].values
log_returns.head()


Unnamed: 0,Date,AAPL,AMZN,JNJ,JPM,MSFT,NVDA,TSLA
2021-02-17,2021-02-17,-0.017802,0.012068,0.003568,0.003106,0.00435,-0.028064,0.002421
2021-02-18,2021-02-18,-0.008674,0.005903,0.000483,0.003371,-0.00168,-0.005179,-0.013586
2021-02-19,2021-02-19,0.001232,-0.023816,-0.016793,0.016553,-0.011635,0.006554,-0.007752
2021-02-22,2021-02-22,-0.030252,-0.02151,-0.000617,0.009347,-0.027174,-0.038988,-0.089376
2021-02-23,2021-02-23,-0.001112,0.004317,-0.008874,0.007999,-0.005302,-0.015002,-0.022161


In [23]:
correlation_matrix.to_csv("correlation_matrix.csv")
print("correlation_matrix.csv saved")


correlation_matrix.csv saved


In [24]:
log_returns_mc = log_returns[valid_tickers].astype(float)

mean_returns = log_returns_mc.mean().values
cov_matrix = log_returns_mc.cov().values

# Make covariance matrix stable
cov_matrix = cov_matrix + np.eye(cov_matrix.shape[0]) * 1e-10


In [25]:
log_returns_numeric = log_returns.select_dtypes(include=[np.number])

In [26]:
mean_returns = log_returns_numeric.mean().values
cov_matrix = log_returns_numeric.cov().values

In [27]:
num_assets = len(log_returns_numeric.columns)
weights = np.array([1/num_assets] * num_assets)

In [28]:
num_simulations = 10000  # or any number you want
time_horizon = 252        # 1 year
initial_value = 100000   # starting portfolio value

num_assets = len(log_returns_numeric.columns)
weights = np.array([1/num_assets] * num_assets)

simulated_portfolios = np.zeros(num_simulations)


In [29]:
for i in range(num_simulations):
  random_returns = np.random.multivariate_normal(
        mean_returns,
        cov_matrix,
        time_horizon
    )
portfolio_returns = random_returns @ weights
simulated_portfolios[i] = initial_value * np.exp(portfolio_returns.sum())

print("Simulation complete!")

Simulation complete!


In [30]:
mc_df = pd.DataFrame({
    "Simulation": range(1, num_simulations + 1),
    "PortfolioValue": simulated_portfolios
})

mc_df.to_csv("monte_carlo_simulation.csv", index=False)
print("monte_carlo_simulation.csv saved")


monte_carlo_simulation.csv saved


In [31]:
VaR_95 = np.percentile(simulated_portfolios, 5)

VaR_df = pd.DataFrame({
    "Confidence_Level": ["95%"],
    "Value_at_Risk": [VaR_95]
})

VaR_df.to_csv("VaR_summary.csv", index=False)
print("VaR_summary.csv saved")


VaR_summary.csv saved


In [32]:
VaR_df.head()


Unnamed: 0,Confidence_Level,Value_at_Risk
0,95%,0.0


In [33]:
from scipy.stats import skew, kurtosis

print("Skewness:", skew(simulated_portfolios))
print("Kurtosis:", kurtosis(simulated_portfolios))


Skewness: 99.98499937495626
Kurtosis: 9995.000100010004
