STEP 3

In [None]:
# Upload 15 CSVs for Financial Institutions
from google.colab import files
import pandas as pd
import io

print("Upload 15 Financial Institution CSV files...")
financial_uploads = files.upload()


Upload 15 Financial Institution CSV files...


In [None]:
# Process the Financial Institution CSVs
financial_data = {}

for filename in financial_uploads:
    try:
        df = pd.read_csv(io.BytesIO(financial_uploads[filename]))
        df['Date'] = pd.to_datetime(df['Date'], errors='coerce')
        df = df.sort_values('Date')
        df.set_index('Date', inplace=True)

        df['Price'] = df['Price'].astype(str).str.replace(',', '').str.replace('-', '')
        df['Price'] = pd.to_numeric(df['Price'], errors='coerce')
        df = df[['Price']].dropna()

        ticker = filename.replace('.csv', '').strip()
        financial_data[ticker] = df['Price']
    except Exception as e:
        print(f"Error loading {filename}: {e}")

financial_prices = pd.DataFrame(financial_data)


In [None]:
# Upload 15 CSVs for Non-Financial Institutions
print("Upload 15 Non-Financial Institution CSV files...")
non_financial_uploads = files.upload()


In [None]:
# Process the Non-Financial Institution CSVs
non_financial_data = {}

for filename in non_financial_uploads:
    try:
        df = pd.read_csv(io.BytesIO(non_financial_uploads[filename]))
        df['Date'] = pd.to_datetime(df['Date'], errors='coerce')
        df = df.sort_values('Date')
        df.set_index('Date', inplace=True)

        df['Price'] = df['Price'].astype(str).str.replace(',', '').str.replace('-', '')
        df['Price'] = pd.to_numeric(df['Price'], errors='coerce')
        df = df[['Price']].dropna()

        ticker = filename.replace('.csv', '').strip()
        non_financial_data[ticker] = df['Price']
    except Exception as e:
        print(f"Error loading {filename}: {e}")

non_financial_prices = pd.DataFrame(non_financial_data)


In [None]:
# Combine all price data
all_prices = pd.concat([financial_prices, non_financial_prices], axis=1)
all_prices_clean = all_prices.dropna()

print("First 5 rows of combined price data:")
print(all_prices_clean.head())


In [None]:
# Compute daily returns
daily_returns = all_prices_clean.pct_change().dropna()

print("\nFirst 5 rows of daily returns:")
print(daily_returns.head())


In [None]:
# Plot daily returns for all 30 institutions
import matplotlib.pyplot as plt

plt.figure(figsize=(14, 7))
daily_returns.plot(figsize=(14, 7), linewidth=1, alpha=0.7)
plt.title('Daily Returns of Financial and Non-Financial Institutions (Sep–Oct 2008)')
plt.xlabel('Date')
plt.ylabel('Daily Return')
plt.legend(loc='upper right', fontsize='small', ncol=2)
plt.grid(True)
plt.tight_layout()
plt.show()


STEP 4

In [None]:
# Compute the 30x30 correlation matrix
correlation_matrix = daily_returns.corr()
print("30x30 Correlation Matrix:")
print(correlation_matrix)


In [None]:
# Produce the heatmap for the correlation matrix
import seaborn as sns
import matplotlib.pyplot as plt

plt.figure(figsize=(14, 12))
sns.heatmap(correlation_matrix, cmap="coolwarm", annot=True, fmt=".2f", linewidths=0.5)
plt.title("Heatmap of 30x30 Correlation Matrix", fontsize=16)
plt.xticks(rotation=90)
plt.yticks(rotation=0)
plt.tight_layout()
plt.show()


In [None]:
# Clustered heatmap to group similar correlations
# Hierarchical clustering logic
sns.clustermap(
    correlation_matrix,
    cmap="coolwarm",
    annot=True,
    fmt=".2f",
    linewidths=0.5,
    figsize=(15, 15),
    xticklabels=True,
    yticklabels=True
)

plt.suptitle("Clustered Heatmap of Correlation Matrix", y=1.02, fontsize=16)
plt.show()


STEP 6

Team member A



Pseudocode: Upper Confidence Bound (UCB) Algorithm for k-Armed Bandit
Inputs:

k: number of arms (actions)

T: total number of time steps

c: confidence level parameter (controls exploration)

μ: true reward means (used for simulation)

σ: standard deviation of reward noise (assumed 1 here)

Initialize:

For each action a in {1, 2, ..., k}:

Set Q[a] ← 0  # estimated value (average reward)

Set N[a] ← 0  # count of times action a was chosen

For t from 1 to T:

a. For each action a in {1, 2, ..., k}:

If N[a] == 0:

Set UCB[a] ← ∞  # force at least one exploration of each action

Else:

Set UCB[a] ← Q[a] + c * sqrt(ln(t) / N[a])

b. Choose action A ← argmax_a UCB[a]

c. Simulate reward:

Draw R from normal distribution with mean μ[A] and standard deviation σ

d. Update estimates:

N[A] ← N[A] + 1

Q[A] ← Q[A] + (1 / N[A]) * (R - Q[A])  # incremental average



TEAM MEMBER B

In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Set seed for reproducibility
np.random.seed(42)

# Parameters
num_bandits = 10         # Number of arms
num_steps = 1000         # Total time steps
true_means = np.random.normal(0, 1, num_bandits)  # True reward means for each arm
reward_std = 1.0         # Standard deviation of rewards
confidence_level = 2     # Exploration parameter 'c' in UCB

# Tracking variables
Q_values = np.zeros(num_bandits)    # Estimated values (Q)
action_counts = np.zeros(num_bandits)  # Count of times each arm was pulled
rewards = np.zeros(num_steps)       # Reward at each time step
optimal_action = np.argmax(true_means)

# UCB algorithm
for t in range(1, num_steps + 1):
    ucb_values = np.zeros(num_bandits)

    for i in range(num_bandits):
        if action_counts[i] == 0:
            ucb_values[i] = float('inf')  # Force exploration of each arm once
        else:
            bonus = confidence_level * np.sqrt(np.log(t) / action_counts[i])
            ucb_values[i] = Q_values[i] + bonus

    # Select action with highest UCB
    chosen_action = np.argmax(ucb_values)

    # Simulate reward from chosen arm
    reward = np.random.normal(loc=true_means[chosen_action], scale=reward_std)

    # Update counts and estimated Q-value
    action_counts[chosen_action] += 1
    Q_values[chosen_action] += (reward - Q_values[chosen_action]) / action_counts[chosen_action]

    # Store reward
    rewards[t - 1] = reward




In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Set seed for reproducibility
np.random.seed(42)

# Prepare the environment
stocks = daily_returns.columns.tolist()        # 30 stocks
num_bandits = len(stocks)                      # Number of arms (30)
num_steps = len(daily_returns)                 # Each time step is a trading day

# Use the actual return matrix as the reward source
return_matrix = daily_returns[stocks].values

# Initialize parameters
Q_values = np.zeros(num_bandits)
action_counts = np.zeros(num_bandits)
rewards = np.zeros(num_steps)
chosen_arms = []

confidence_level = 2  # UCB 'c' parameter

# UCB loop across time steps (i.e., trading days)
for t in range(1, num_steps + 1):
    ucb_values = np.zeros(num_bandits)

    for i in range(num_bandits):
        if action_counts[i] == 0:
            ucb_values[i] = float('inf')  # Force initial exploration
        else:
            bonus = confidence_level * np.sqrt(np.log(t) / action_counts[i])
            ucb_values[i] = Q_values[i] + bonus

    # Choose the arm with the highest UCB
    chosen_action = np.argmax(ucb_values)
    chosen_arms.append(stocks[chosen_action])

    # Use actual return on that day for the chosen stock as reward
    reward = return_matrix[t - 1, chosen_action]

    # Update counts and Q-values
    action_counts[chosen_action] += 1
    Q_values[chosen_action] += (reward - Q_values[chosen_action]) / action_counts[chosen_action]
    rewards[t - 1] = reward

# Plot average reward over time
average_rewards = np.cumsum(rewards) / (np.arange(num_steps) + 1)

plt.figure(figsize=(10, 5))
plt.plot(average_rewards, label='Average Reward (UCB)', color='blue')
plt.xlabel('Day (Time Step)')
plt.ylabel('Average Daily Return')
plt.title('UCB Algorithm Applied to 30 Stocks')
plt.grid(True)
plt.legend()
plt.tight_layout()
plt.show()


In [None]:
import pandas as pd

# Count how many times each stock was selected
selection_counts = pd.Series(chosen_arms).value_counts().sort_values(ascending=False)

# Plot bar chart
plt.figure(figsize=(12, 6))
selection_counts.plot(kind='bar', color='skyblue', edgecolor='black')
plt.title("Frequency of Each Stock Selected by UCB Algorithm", fontsize=14)
plt.xlabel("Stock")
plt.ylabel("Number of Selections")
plt.xticks(rotation=45)
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.tight_layout()
plt.show()


In [None]:
# Create a DataFrame to store the selection counts and corresponding average return
average_returns = [np.mean(return_matrix[:, i][np.array(chosen_arms) == stocks[i]]) for i in range(num_bandits)]

# Create a DataFrame for easy viewing
stock_performance = pd.DataFrame({
    'Stock': stocks,
    'Selections': selection_counts.reindex(stocks).fillna(0).astype(int),  # Fixing the reindex and filling missing values with 0
    'Average Return': average_returns
})

# Sort by number of selections (descending) and then by average return (descending)
top_stocks = stock_performance.sort_values(by=['Selections', 'Average Return'], ascending=False).head(5)

# Display the top 5 stocks with their selections and average returns
top_stocks = top_stocks[['Stock', 'Selections', 'Average Return']]
top_stocks


STEP 8

Team member B

Pseudocode: ε-Greedy Algorithm for Non-Stationary k-Armed Bandits
Initialize:
Set number of bandits k

Set number of time steps T

Set exploration rate ε (epsilon)

Set step size α (for constant step-size version)

For each bandit i in 1 to k:

Initialize estimated value Q[i] ← 0

Initialize action count N[i] ← 0

For each episode:
Reset Q and N to zeros

Set current regime r ← 0

For each time step t = 1 to T:
If regime changes at time t, update current regime r

Get true reward means μ[1...k] for regime r

Choose action a using ε-greedy:

With probability ε, choose a random action

With probability 1 - ε, choose action a with highest Q[a]

If multiple actions tie, select randomly among them

Observe reward R by sampling from N(μ[a], 1)

Increment action count: N[a] ← N[a] + 1

Update Q-value:

If using sample averaging:
Q[a] ← Q[a] + (1 / N[a]) * (R - Q[a])

If using constant step-size:
Q[a] ← Q[a] + α * (R - Q[a])

(Optional) Track statistics like average reward and % optimal action

End For (time step loop)
Repeat for all episodes to average performance.

In [None]:
from graphviz import Digraph

# Create a new directed graph
dot = Digraph(comment='Epsilon-Greedy Algorithm Flowchart')

# Nodes
dot.node('A', 'Start')
dot.node('B', 'Initialize:\nQ[a] = 0\nN[a] = 0\nε, α, T')
dot.node('C', 'For each time step t = 1 to T')
dot.node('D', 'Generate random number\nr ∈ [0, 1]')
dot.node('E', 'r < ε?')
dot.node('F', 'Choose random action a_t')
dot.node('G', 'Choose greedy action:\na_t = argmax Q[a]')
dot.node('H', 'Observe reward r_t\nfrom action a_t')
dot.node('I', 'Increment count:\nN[a_t] += 1')
dot.node('J', 'Update estimate:\nQ[a_t] ← Q[a_t] + α * (r_t - Q[a_t])')
dot.node('K', 't < T?')
dot.node('L', 'Next time step')
dot.node('M', 'End')

# Edges
dot.edge('A', 'B')
dot.edge('B', 'C')
dot.edge('C', 'D')
dot.edge('D', 'E')
dot.edge('E', 'F', label='Yes')
dot.edge('E', 'G', label='No')
dot.edge('F', 'H')
dot.edge('G', 'H')
dot.edge('H', 'I')
dot.edge('I', 'J')
dot.edge('J', 'K')
dot.edge('K', 'L', label='Yes')
dot.edge('L', 'C')
dot.edge('K', 'M', label='No')

# Save or render
dot.render('epsilon_greedy_flowchart', format='png', cleanup=False)


TEAM MEMEBR C

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

# ---------------------------
# Parameters
# ---------------------------
NK = 10                    # Number of bandits (we'll use the first 10 stocks)
ITEMAX = 1000              # Time steps (for this simulation)
NEPISODES = 1000           # Runs for averaging
EPSILON = 0.1              # ε for exploration
ALPHA = 0.2                # Constant step-size for non-stationary update

# ---------------------------
# Prepare Data (from your dataset)
# ---------------------------
# Use first 10 stocks for simplicity
stocks = daily_returns.columns[:NK]  # First 10 columns (stocks)
returns = daily_returns[stocks]  # Subset of returns

# ---------------------------
# ε-greedy Selection Function
# ---------------------------
def select_action(qvalue, epsilon):
    """
    ε-greedy action selection.
    With probability ε: random action.
    With probability 1 - ε: greedy action.
    """
    if np.random.rand() < epsilon:
        return np.random.randint(len(qvalue))  # Explore
    else:
        max_q = np.max(qvalue)
        best_actions = np.where(qvalue == max_q)[0]
        return np.random.choice(best_actions)  # Exploit with tie-breaking

# ---------------------------
# Reward Update Function
# ---------------------------
def update_qvalue(qvalue_old, action, reward, alpha):
    qvalue_new = qvalue_old.copy()
    qvalue_new[action] += alpha * (reward - qvalue_old[action])
    return qvalue_new

# Ensure ITEMAX does not exceed the number of rows in `returns`
ITEMAX = len(returns)  # Set ITEMAX to the number of available time steps (rows)

# ---------------------------
# Run Simulation
# ---------------------------
reward_avg = np.zeros((ITEMAX, 2))      # [averaging, constant-alpha]
optimal_avg = np.zeros((ITEMAX, 2))

for update_type in range(2):  # 0: averaging, 1: constant-alpha
    for episode in range(NEPISODES):
        qvalue = np.zeros(NK)
        action_count = np.zeros(NK)
        regime_index = 0
        for t in range(ITEMAX):
            true_means = returns.iloc[t].values  # Get the returns for the current time step

            # Choose action
            action = select_action(qvalue, EPSILON)

            # Sample reward (daily return of the chosen stock)
            reward = returns.iloc[t, action]

            # Update counts and Q-values
            action_count[action] += 1
            if update_type == 0:
                alpha = 1.0 / action_count[action]  # Sample averaging
            else:
                alpha = ALPHA                      # Constant step-size

            qvalue = update_qvalue(qvalue, action, reward, alpha)

            # Logging performance
            reward_avg[t, update_type] += reward / NEPISODES
            if action == np.argmax(true_means):
                optimal_avg[t, update_type] += 1.0 / NEPISODES

# ---------------------------
# Plotting results
# ---------------------------
plt.figure(figsize=(10, 4))

plt.subplot(1, 2, 1)
plt.plot(reward_avg[:, 0], label='Sample Averaging')
plt.plot(reward_avg[:, 1], label='Constant Step-Size')
plt.xlabel("Steps")
plt.ylabel("Average Reward")
plt.title("Reward over Time")
plt.legend()
plt.grid(True)

plt.subplot(1, 2, 2)
plt.plot(optimal_avg[:, 0], label='Sample Averaging')
plt.plot(optimal_avg[:, 1], label='Constant Step-Size')
plt.xlabel("Steps")
plt.ylabel("Optimal Action %")
plt.title("Optimal Action Over Time")
plt.legend()
plt.grid(True)

plt.tight_layout()
plt.show()


In [None]:
import matplotlib.pyplot as plt
import numpy as np
from numpy.random import rand, seed

# ---------------------------
# Parameters
# ---------------------------
NK = 10                    # Number of bandits
ITEMAX = 1000              # Time steps
NEPISODES = 1000           # Runs for averaging
EPSILON = 0.1              # ε for exploration
ALPHA = 0.2                # Constant step-size for non-stationary update
EPSILON_M = [0.0, 0.1]     # For comparison (greedy vs ε-greedy)

seed(1234)

# ---------------------------
# Regime Change Points
# ---------------------------
PCHANGE = 0.01  # Probability of reward distribution change
tchanges = np.zeros(())
ntchanges = 0
for t in range(ITEMAX):
    if rand() < PCHANGE:
        tchanges = np.append(tchanges, t)
        ntchanges += 1

# Initialize true reward means for all regimes
NMEANS = np.random.normal(loc=0.0, scale=1.0, size=(NK, ntchanges + 1))

# ---------------------------
# Plot reward structure across regimes
# ---------------------------
for bandit in range(NK):
    plt.plot(NMEANS[bandit, :])
plt.plot(np.max(NMEANS, axis=0), linewidth=3.0, label="Max Reward")
plt.xlabel("Regime")
plt.title("True Bandit Reward Means")
plt.grid(True)
plt.show()

# ---------------------------
# ε-greedy Selection Function
# ---------------------------
def select_action(qvalue, epsilon):
    """
    ε-greedy action selection.
    With probability ε: random action.
    With probability 1 - ε: greedy action.
    """
    if rand() < epsilon:
        return np.random.randint(len(qvalue))  # Explore
    else:
        max_q = np.max(qvalue)
        best_actions = np.where(qvalue == max_q)[0]
        return np.random.choice(best_actions)  # Exploit with tie-breaking

# ---------------------------
# Reward Update Function
# ---------------------------
def update_qvalue(qvalue_old, action, reward, alpha):
    qvalue_new = qvalue_old.copy()
    qvalue_new[action] += alpha * (reward - qvalue_old[action])
    return qvalue_new

# ---------------------------
# Run Simulation
# ---------------------------
reward_avg = np.zeros((ITEMAX, 2))      # [averaging, constant-alpha]
optimal_avg = np.zeros((ITEMAX, 2))

# Ensure the action is within the bounds of available stocks (columns in returns)
N_STOCKS = returns.shape[1]  # Number of available stocks (columns)

# Ensure that the loop doesn't exceed the number of available rows in returns
n_rows = returns.shape[0]  # Number of rows in the returns DataFrame

for update_type in range(2):  # 0: averaging, 1: constant-alpha
    for episode in range(NEPISODES):
        qvalue = np.zeros(NK)
        action_count = np.zeros(NK)
        regime_index = 0
        for t in range(min(ITEMAX, n_rows)):  # Ensure t doesn't exceed the number of rows
            true_means = returns.iloc[t].values  # Get the returns for the current time step

            # Choose action, ensuring the action index is within bounds
            action = np.random.choice(range(N_STOCKS))  # Random action between 0 and N_STOCKS-1

            # Sample reward (daily return of the chosen stock)
            reward = returns.iloc[t, action]

            # Update counts and Q-values
            action_count[action] += 1
            if update_type == 0:
                alpha = 1.0 / action_count[action]  # Sample averaging
            else:
                alpha = ALPHA                      # Constant step-size

            qvalue = update_qvalue(qvalue, action, reward, alpha)

            # Logging performance
            reward_avg[t, update_type] += reward / NEPISODES
            if action == np.argmax(true_means):
                optimal_avg[t, update_type] += 1.0 / NEPISODES

# ---------------------------
# Plotting results
# ---------------------------
plt.figure(figsize=(10, 4))

plt.subplot(1, 2, 1)
plt.plot(reward_avg[:, 0], label='Sample Averaging')
plt.plot(reward_avg[:, 1], label='Constant Step-Size')
plt.xlabel("Steps")
plt.ylabel("Average Reward")
plt.title("Reward over Time")
plt.legend()
plt.grid(True)

plt.subplot(1, 2, 2)
plt.plot(optimal_avg[:, 0], label='Sample Averaging')
plt.plot(optimal_avg[:, 1], label='Constant Step-Size')
plt.xlabel("Steps")
plt.ylabel("Optimal Action %")
plt.title("Optimal Action Over Time")
plt.legend()
plt.grid(True)

plt.tight_layout()
plt.show()


TEAM MEMBER A

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from numpy.random import rand, seed

# Assuming you already have 'all_prices_clean' and 'daily_returns' computed

# ---------------------------
# Parameters
# ---------------------------
NK = len(daily_returns.columns)        # Number of bandits (stocks)
ITEMAX = len(daily_returns)            # Number of time steps (number of days)
NEPISODES = 1000                      # Runs for averaging
EPSILON = 0.1                         # ε for exploration
ALPHA = 0.2                           # Constant step-size for non-stationary update
EPSILON_M = [0.0, 0.1]                # For comparison (greedy vs ε-greedy)

seed(1234)

# ---------------------------
# ε-greedy Selection Function
# ---------------------------
def select_action(qvalue, epsilon):
    if rand() < epsilon:
        return np.random.randint(len(qvalue))  # Explore
    else:
        max_q = np.max(qvalue)
        best_actions = np.where(qvalue == max_q)[0]
        return np.random.choice(best_actions)  # Exploit with tie-breaking

# ---------------------------
# Reward Update Function
# ---------------------------
def update_qvalue(qvalue_old, action, reward, alpha):
    qvalue_new = qvalue_old.copy()
    qvalue_new[action] += alpha * (reward - qvalue_old[action])
    return qvalue_new

# ---------------------------
# Run Simulation
# ---------------------------
reward_avg = np.zeros((ITEMAX, 2))      # [averaging, constant-alpha]
optimal_avg = np.zeros((ITEMAX, 2))

for update_type in range(2):  # 0: averaging, 1: constant-alpha
    for episode in range(NEPISODES):
        qvalue = np.zeros(NK)          # Initial Q-values (estimates)
        action_count = np.zeros(NK)    # Count of actions taken for each bandit
        for t in range(ITEMAX):
            # Select action based on current Q-values and epsilon
            action = select_action(qvalue, EPSILON)

            # Sample reward (daily return of the chosen stock)
            reward = daily_returns.iloc[t, action]

            # Update counts and Q-values
            action_count[action] += 1
            if update_type == 0:
                alpha = 1.0 / action_count[action]  # Sample averaging
            else:
                alpha = ALPHA                      # Constant step-size

            qvalue = update_qvalue(qvalue, action, reward, alpha)

            # Logging performance
            reward_avg[t, update_type] += reward / NEPISODES
            if action == np.argmax(daily_returns.iloc[t, :]):
                optimal_avg[t, update_type] += 1.0 / NEPISODES

# ---------------------------
# Plotting results
# ---------------------------
plt.figure(figsize=(10, 4))

plt.subplot(1, 2, 1)
plt.plot(reward_avg[:, 0], label='Sample Averaging')
plt.plot(reward_avg[:, 1], label='Constant Step-Size')
plt.xlabel("Steps")
plt.ylabel("Average Reward")
plt.title("Reward over Time")
plt.legend()
plt.grid(True)

plt.subplot(1, 2, 2)
plt.plot(optimal_avg[:, 0], label='Sample Averaging')
plt.plot(optimal_avg[:, 1], label='Constant Step-Size')
plt.xlabel("Steps")
plt.ylabel("Optimal Action %")
plt.title("Optimal Action Over Time")
plt.legend()
plt.grid(True)

plt.tight_layout()
plt.show()
