## Self Organizing Map (SOM) for clustering DOW Jones Stocks

In [1]:
import numpy as np
import pandas as pd
from minisom import MiniSom
import yfinance as yf 
import matplotlib.pyplot as plt

In [2]:
# getting the stock data
def stock_data(ticker,start_date,end_date) :
    stock_data = yf.download(ticker,start=start_date,end=end_date)
    return stock_data['Close'].values.reshape(-1,1)

In [3]:
# normalizing the data 
def data_normalize(data):
    return (data - np.mean(data))/np.std(data)    

In [4]:
# function to train the Self Organizing Maps:
def train_som(data,som_size,num_iterations) :
    som = MiniSom(som_size[0],som_size[1],data.shape[1],sigma=0.3,learning_rate=0.5)
    som.train_random(data, num_iterations)
    return som

In [5]:
# function to visualize the SOM clusters:
def visualize_clusters(data, som, tickers):
    plt.figure(figsize=(10, 8))
    for i, stock in enumerate(data):
        winner = som.winner(stock)
        color_value = (stock[0] - np.min(data)) / (np.max(data) - np.min(data))  # Color based on normalized data
        plt.scatter(winner[0] + np.random.rand(), winner[1] + np.random.rand(), marker='o', color=plt.cm.viridis(color_value))

        # Check if the index is within the bounds of the tickers list
        if i < len(tickers):
            # Annotate each point with its corresponding ticker
            plt.annotate(tickers[i], (winner[0], winner[1]), textcoords="offset points", xytext=(0, 5), ha='center')

    plt.title('DOW Jones Stock Clusters using SOM')
    plt.show()

In [9]:
def main():
    dow_stocks = ['AAPL', 'MSFT', 'IBM', 'GOOGL', 'AMZN', 'JPM', 'GS', 'DIS', 'BA', 'CSCO']
    
    start_date = '2023-01-01'
    end_date = '2024-01-01'
    
    # Fetching the historical data
    stock_data = [stock_data(ticker,start_date,end_date) for ticker in dow_stocks]
    
    #Normalize the given data
    normalized_data = np.concatenate([data_normalize(data) for data in stock_data],axis=1)
    
    # training the SOM
    som_size = (5,5)
    num_iterations = 500
    som = train_som(normalized_data,som_size,num_iterations)
    
    # visualizing
    visualize_clusters(normalized_data,som,dow_stocks)
    