### Problem 1

In [2]:
import requests
from datetime import datetime, timedelta
import pandas as pd
import numpy as np
import yfinance as yf

In [3]:
# ---------------------------------------------------------
# Step 1: Fetch Nifty50 data from NSE
# ---------------------------------------------------------
session = requests.Session()
session.headers.update({
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)",
    "Accept": "application/json, text/javascript, */*; q=0.01",
    "Referer": "https://www.nseindia.com/"
})
session.get("https://www.nseindia.com")

nifty50_url = "https://www.nseindia.com/api/equity-stockIndices?index=NIFTY%2050"
response = session.get(nifty50_url)
response.raise_for_status()
data = response.json()

symbols = [item['symbol'] for item in data['data']]

if "NIFTY 50" in symbols:
    symbols.remove("NIFTY 50")

nse_details = {}
for item in data['data']:
    sym = item['symbol']
    nse_details[sym] = {
        "LTP": item.get('lastPrice'),
        "Volume": item.get('totalTradedVolume'),
        "%Change": item.get('pChange'),
        "52W_High": item.get('yearHigh'),
        "52W_Low": item.get('yearLow'),
        "Day_High": item.get('dayHigh'),
        "Day_Low": item.get('dayLow')
    }

# ---------------------------------------------------------
# Step 2: Fetch PE, EPS, Market Cap from yfinance's stock.info
# ---------------------------------------------------------
info_data = {}
for sym in symbols:
    yahoo_symbol = sym + ".NS"
    try:
        stock = yf.Ticker(yahoo_symbol)
        info = stock.info
    except:
        info = {}
    pe = info.get('trailingPE', np.nan)
    eps = info.get('trailingEps', np.nan)
    market_cap = info.get('marketCap', np.nan)

    info_data[sym] = {
        "PE": pe,
        "EPS": eps,
        "Market_Cap": market_cap
    }


# ---------------------------------------------------------
# Step 3: Compute Returns from Yahoo Finance (6M, 1Y, 5Y)
# ---------------------------------------------------------
def get_returns(symbol):
    yahoo_symbol = symbol + ".NS"
    end = datetime.now()
    start_5y = end - timedelta(days=5 * 365)
    try:
        df_yf = yf.download(yahoo_symbol, start=start_5y, end=end, progress=False)
    except:
        return None, None, None

    if df_yf.empty:
        return None, None, None

    current_price = df_yf['Adj Close'].iloc[-1].item() if hasattr(df_yf['Adj Close'].iloc[-1], 'item') else \
    df_yf['Adj Close'].iloc[-1]

    # 6-month return
    start_6m = end - timedelta(days=182)
    df_6m = df_yf[df_yf.index >= start_6m]
    ret_6m = None
    if not df_6m.empty:
        old_6m = df_6m['Adj Close'].iloc[0].item() if hasattr(df_6m['Adj Close'].iloc[0], 'item') else \
        df_6m['Adj Close'].iloc[0]
        ret_6m = ((current_price - old_6m) / old_6m) * 100

    # 1-year return
    start_1y = end - timedelta(days=365)
    df_1y = df_yf[df_yf.index >= start_1y]
    ret_1y = None
    if not df_1y.empty:
        old_1y = df_1y['Adj Close'].iloc[0].item() if hasattr(df_1y['Adj Close'].iloc[0], 'item') else \
        df_1y['Adj Close'].iloc[0]
        ret_1y = ((current_price - old_1y) / old_1y) * 100

    # 5-year return
    old_5y = df_yf['Adj Close'].iloc[0].item() if hasattr(df_yf['Adj Close'].iloc[0], 'item') else \
    df_yf['Adj Close'].iloc[0]
    ret_5y = ((current_price - old_5y) / old_5y) * 100

    return ret_6m, ret_1y, ret_5y


returns_data = {}
for sym in symbols:
    returns_data[sym] = get_returns(sym)

# ---------------------------------------------------------
# Step 4: Combine all data into a DataFrame
# ---------------------------------------------------------
all_rows = []
for sym in symbols:
    rets = returns_data[sym]

    row = {
        "Symbol": sym,
        "LTP": nse_details[sym].get("LTP"),
        "Volume": nse_details[sym].get("Volume"),
        "%Change": nse_details[sym].get("%Change"),
        "PE": info_data[sym].get("PE"),
        "EPS": info_data[sym].get("EPS"),
        "Market_Cap": info_data[sym].get("Market_Cap"),
        "52W_High": nse_details[sym].get("52W_High"),
        "52W_Low": nse_details[sym].get("52W_Low"),
        "Upper_Circuit": nse_details[sym].get("Day_High"),
        "Lower_Circuit": nse_details[sym].get("Day_Low"),
        "6M_Return%": rets[0] if rets else np.nan,
        "1Y_Return%": rets[1] if rets else np.nan,
        "5Y_Return%": rets[2] if rets else np.nan
    }

    all_rows.append(row)

df = pd.DataFrame(all_rows)
print(df.head())

       Symbol       LTP    Volume  %Change         PE     EPS      Market_Cap  \
0  BHARTIARTL   1682.00  12900707     4.44  81.283226   20.69  10068587184128   
1         ITC    471.00  31753858     2.26  28.658537   16.40   5880076042240   
2   KOTAKBANK   1804.90   3893531     2.05  16.474909  109.60   3589921046528   
3  HINDUNILVR   2391.25   2260902     1.97  54.643350   43.74   5615755198464   
4  ULTRACEMCO  12082.35    408880     1.90  53.190865  227.18   3485226500096   

   52W_High  52W_Low  Upper_Circuit  Lower_Circuit  6M_Return%  1Y_Return%  \
0    1779.0   960.00        1685.00        1606.80   18.396895   70.777290   
1     528.5   399.35         474.40         451.65    9.620991    7.464088   
2    1942.0  1543.85        1809.00        1748.05    5.131385   -2.128963   
3    3035.0  2172.05        2394.55        2333.45   -3.137340   -4.093450   
4   12138.0  9250.00       12118.50       11730.00    9.321947   21.917101   

   5Y_Return%  
0  296.572930  
1  140.53833

### Problem 2

In [4]:
import yfinance as yf
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import norm, t
from math import sqrt

# Fetch historical data for a chosen stock, e.g., Apple (AAPL)
ticker = "AAPL"
start_date = "2020-01-01"
end_date = "2023-01-01"
df = yf.download(ticker, start=start_date, end=end_date)

# Compute daily returns
df['Daily_Return'] = df['Adj Close'].pct_change()
df = df.dropna(subset=['Daily_Return'])

# Basic statistics
mean_return = df['Daily_Return'].mean()
std_return = df['Daily_Return'].std()
n = len(df['Daily_Return'])

print("Statistics:")
print(f"Mean Daily Return: {mean_return}")
print(f"Std Dev Daily Return: {std_return}")
print(f"Sample Size (n): {n}")

# Z-scores for daily returns
df['Daily_Return_Z'] = (df['Daily_Return'] - mean_return) / std_return

# For T-scores, we typically use the sample mean and sample std with a t-distribution.
# Confidence Interval for mean daily return (95% CI)
alpha = 0.05
df_degrees = n - 1
t_crit = t.ppf(1 - alpha/2, df_degrees)
margin_of_error = t_crit * std_return / sqrt(n)
ci_lower = mean_return - margin_of_error
ci_upper = mean_return + margin_of_error
print(f"95% Confidence Interval for Mean Daily Return (t-based): [{ci_lower}, {ci_upper}]")

# If we assume population std known (hypothetical), we can use Z-based CI:
z_crit = norm.ppf(1 - alpha/2)
z_margin_of_error = z_crit * std_return / sqrt(n)
z_ci_lower = mean_return - z_margin_of_error
z_ci_upper = mean_return + z_margin_of_error
print(f"95% CI (z-based): [{z_ci_lower}, {z_ci_upper}]")

# Plot Probability Distributions
sns.set_style('whitegrid')

# Distribution of Daily Volume
plt.figure(figsize=(10, 6))
sns.histplot(df['Volume'], kde=True, stat="density")
plt.title(f"Distribution of Daily Volume for {ticker}")
plt.xlabel("Volume")
plt.ylabel("Density")
plt.show()

# Distribution of Daily Close Price
plt.figure(figsize=(10, 6))
sns.histplot(df['Adj Close'], kde=True, stat="density")
plt.title(f"Distribution of Daily Close Price for {ticker}")
plt.xlabel("Adjusted Close Price")
plt.ylabel("Density")
plt.show()

# Distribution of Daily Returns
plt.figure(figsize=(10, 6))
sns.histplot(df['Daily_Return'], kde=True, stat="density")
plt.title(f"Distribution of Daily Returns for {ticker}")
plt.xlabel("Daily Return")
plt.ylabel("Density")
plt.show()

# Additionally, overlay a fitted normal PDF on daily returns
mean_dr = mean_return
std_dr = std_return
x_values = np.linspace(df['Daily_Return'].min(), df['Daily_Return'].max(), 1000)
pdf = norm.pdf(x_values, mean_dr, std_dr)

plt.figure(figsize=(10, 6))
sns.histplot(df['Daily_Return'], kde=False, stat="density", label='Data')
plt.plot(x_values, pdf, 'r', label='Fitted Normal PDF')
plt.title(f"Daily Returns Distribution with Normal Fit - {ticker}")
plt.xlabel("Daily Return")
plt.ylabel("Density")
plt.legend()
plt.show()


[*********************100%***********************]  1 of 1 completed


KeyError: ['Daily_Return']

### Bonus

In [5]:
from statsmodels.tsa.stattools import adfuller, kpss

# Choose any two stocks: e.g., Apple (AAPL), Microsoft (MSFT)
# Adjust the date range as needed. For demonstration, we take data from 2023-01-01 to today.
stock_symbols = ["AAPL", "MSFT"]
start_date = "2023-01-01"
end_date = "2024-01-01"  # Adjust if future date is unrealistic, or use str(datetime.today().date())

# Fetch data from Yahoo Finance
data = yf.download(stock_symbols, start=start_date, end=end_date)['Adj Close'].dropna()

# If multi-column: data is a DataFrame with a MultiIndex (Stock symbol, fields)
# If single column, just handle accordingly.
# Let's assume we have a separate series for each stock:
returns_data = {}
for symbol in stock_symbols:
    # Compute daily returns
    stock_prices = data[symbol].dropna()
    stock_returns = stock_prices.pct_change().dropna()
    returns_data[symbol] = stock_returns

    # Perform ADF test
    adf_result = adfuller(stock_returns, autolag='AIC')
    adf_stat, adf_pvalue = adf_result[0], adf_result[1]

    # Perform KPSS test
    # KPSS requires a trend='c' or 'ct', commonly 'c' is used for level stationarity
    kpss_result = kpss(stock_returns, regression='c')
    kpss_stat, kpss_pvalue = kpss_result[0], kpss_result[1]

    print(f"Results for {symbol}:")
    print("ADF Test:")
    print(f"  Test Statistic: {adf_stat}")
    print(f"  p-value: {adf_pvalue}")
    if adf_pvalue < 0.05:
        print("  The series is likely stationary based on ADF.")
    else:
        print("  The series is likely non-stationary based on ADF.")

    print("KPSS Test:")
    print(f"  Test Statistic: {kpss_stat}")
    print(f"  p-value: {kpss_pvalue}")
    if kpss_pvalue < 0.05:
        print("  The series is likely non-stationary based on KPSS.")
    else:
        print("  The series is likely stationary based on KPSS.")

    print("-" * 50)

# Interpretation:
# ADF null: series is non-stationary; reject null if p < 0.05 => stationary
# KPSS null: series is stationary; reject null if p < 0.05 => non-stationary
# If ADF says stationary and KPSS says stationary, then the series is likely stationary.
# If ADF says non-stationary and KPSS says non-stationary, it suggests the series is likely non-stationary.
# Contradicting results might require further analysis.


[*********************100%***********************]  2 of 2 completed

Results for AAPL:
ADF Test:
  Test Statistic: -14.86434114215758
  p-value: 1.700428345558108e-27
  The series is likely stationary based on ADF.
KPSS Test:
  Test Statistic: 0.36915915418316775
  p-value: 0.09044864043828976
  The series is likely stationary based on KPSS.
--------------------------------------------------
Results for MSFT:
ADF Test:
  Test Statistic: -13.491226513539486
  p-value: 3.103435404614185e-25
  The series is likely stationary based on ADF.
KPSS Test:
  Test Statistic: 0.10853812271998156
  p-value: 0.1
  The series is likely stationary based on KPSS.
--------------------------------------------------



look-up table. The actual p-value is greater than the p-value returned.

  kpss_result = kpss(stock_returns, regression='c')


The warning from the KPSS test indicates that the test statistic is outside the precomputed lookup table range for p-values. This often means the p-value is just at the boundary of the test’s internal tables. However, since the reported p-values are above 0.05, the conclusion remains that we cannot reject stationarity.

Conclusion:
Both the ADF and KPSS tests suggest that the daily returns of AAPL and MSFT over the given 2023–2024 period are stationary time series.