<a href="https://colab.research.google.com/github/anirbanghoshsbi/.github.io/blob/master/work/portfolio/temp.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [64]:
import pandas as pd
import numpy as np
from sklearn.decomposition import PCA
from sklearn.cluster import AgglomerativeClustering, AffinityPropagation, KMeans
import yfinance as yf
start_date = "2023-10-01"
end_date = "2023-12-31"
def get_nifty50_tickers():
    #data = pd.read_csv('https://raw.githubusercontent.com/anirbanghoshsbi/data/main/STOCKS100.txt', sep='\t')
    data=pd.read_csv('https://raw.githubusercontent.com/anirbanghoshsbi/data/main/ind_nifty100list.csv')
    nifty100 = data['Symbol'].apply(lambda x: x + ".NS").tolist()
    reject_lst=['MOTHERSUMI.NS', 'CADILAHC.NS', 'ADANITRANS.NS', 'SRTRANSFIN.NS', 'INFRATEL-EQ.NS','HDFC.NS','LTI.NS']
    nifty_final= [item for item in nifty100 if item not in reject_lst]
    return nifty_final
def perform_clustering(stock_returns, method):
    if method == "agglomerative":
        # Agglomerative clustering with pre-determined number of clusters
        clustering = AgglomerativeClustering(n_clusters=15, metric='euclidean', linkage='ward')
        clusters = clustering.fit_predict(stock_returns)
    elif method == "affinity_propagation":
        # Affinity propagation clustering
        clustering = AffinityPropagation()
        clusters = clustering.fit_predict(stock_returns)
    elif method == "kmeans":
        # KMeans clustering (benchmark)
        kmeans = KMeans(n_clusters=10)
        clusters = kmeans.fit_predict(stock_returns)
    else:
        raise ValueError("Invalid clustering method specified.")
    return clusters

def construct_portfolio(clusters, embedding_data):
    # Logic to select stocks based on clusters and distance in embedded space
    portfolio = []
    for cluster_id in np.unique(clusters):
        cluster_indices = np.where(clusters == cluster_id)[0]
        if len(cluster_indices) >= 10:
            # Select top ten stocks with minimum Euclidean distance to cluster centers
            distances = np.linalg.norm(embedding_data[cluster_indices] - np.mean(embedding_data[cluster_indices], axis=0), axis=1)
            top_ten_indices = cluster_indices[np.argsort(distances)[:10]]
            portfolio.extend(top_ten_indices)
    return portfolio

# ----- Main Loop -----
stock_symbols=get_nifty50_tickers()
data = yf.download(stock_symbols, start=start_date, end=end_date)["Adj Close"]
returns = data.pct_change().fillna(0)

# Placeholder for handling quarterly updates
for quarter in range(1, 5):
    # Subset data by quarter (assuming quarterly data)
    # ... Inside your main loop
    for quarter in range(1, 5):
      start_date = pd.to_datetime(f"2023-Q{quarter}")  # Dynamically define quarter start
      end_date = start_date + pd.offsets.QuarterEnd()
    print(start_date)
    print(end_date)
    quaterly_data = returns.loc[start_date:end_date]

    # ... Now perform dimensionality reduction, clustering,
    # and portfolio generation on 'quarterly_data'

    # embedded_data = perform_dimensionality_reduction(data)
    n_components = 2  # Reduce to 2 dimensions in this example
    pca = PCA(n_components=n_components)
    embedding_data = pca.fit_transform(quaterly_data)  # Obtain embeddings

    # correlation_network = calculate_graphical_lasso(data)

    clusters = perform_clustering(quaterly_data, method="agglomerative")  # Or your chosen method
    portfolio = construct_portfolio(clusters, embedding_data)

    # Evaluate portfolio performance
    # performance_metrics = evaluate_portfolio_performance(portfolio)


[*********************100%%**********************]  96 of 96 completed


2023-10-01 00:00:00
2023-12-31 00:00:00
2023-10-01 00:00:00
2023-12-31 00:00:00
2023-10-01 00:00:00
2023-12-31 00:00:00
2023-10-01 00:00:00
2023-12-31 00:00:00


In [65]:
len(portfolio)

20

In [66]:
portfolio

[57, 29, 3, 55, 38, 17, 39, 30, 21, 5, 52, 0, 12, 8, 26, 32, 33, 34, 36, 15]

In [67]:
import pandas as pd
import numpy as np
from sklearn.decomposition import PCA
from sklearn.cluster import AgglomerativeClustering, AffinityPropagation, KMeans
import yfinance as yf

start_date = "2023-10-01"
end_date = "2023-12-31"

def get_nifty50_tickers():
    data = pd.read_csv('https://raw.githubusercontent.com/anirbanghoshsbi/data/main/ind_nifty100list.csv')
    nifty100 = data['Symbol'].apply(lambda x: x + ".NS").tolist()
    reject_lst = ['MOTHERSUMI.NS', 'CADILAHC.NS', 'ADANITRANS.NS', 'SRTRANSFIN.NS',
                    'INFRATEL-EQ.NS', 'HDFC.NS', 'LTI.NS']
    return [item for item in nifty100 if item not in reject_lst]

def perform_clustering(stock_returns, method):
    if method == "agglomerative":
        # Agglomerative with pre-determined clusters (Euclidean metric has no effect with Ward)
        clustering = AgglomerativeClustering(n_clusters=15, linkage='ward')
        clusters = clustering.fit_predict(stock_returns)
    elif method == "affinity_propagation":
        clustering = AffinityPropagation()
        clusters = clustering.fit_predict(stock_returns)
    elif method == "kmeans":
        kmeans = KMeans(n_clusters=10)
        clusters = kmeans.fit_predict(stock_returns)
    else:
        raise ValueError("Invalid clustering method specified.")
    return clusters

def construct_portfolio(clusters, embedding_data):
    portfolio = []
    for cluster_id in np.unique(clusters):
        cluster_indices = np.where(clusters == cluster_id)[0]
        if len(cluster_indices) >= 10:
            distances = np.linalg.norm(embedding_data[cluster_indices] - np.mean(embedding_data[cluster_indices], axis=0), axis=1)
            top_ten_indices = cluster_indices[np.argsort(distances)[:10]]
            portfolio.extend(top_ten_indices)
    return portfolio

# ----- Main Loop -----
stock_symbols = get_nifty50_tickers()

# Data download with basic error handling
try:
    data = yf.download(stock_symbols, start=start_date, end=end_date)["Adj Close"]
    returns = data.pct_change().fillna(0)
except Exception as e:
    print(f"Error downloading data: {e}")
    exit()  # Or handle it differently



[*********************100%%**********************]  96 of 96 completed


In [68]:
# Quarter handling (assumes focusing on 2023)
for quarter in range(1, 5):
    for quarter in range(1, 5):
      start_date = pd.to_datetime(f"2023-Q{quarter}")  # Dynamically define quarter start
      end_date = start_date + pd.offsets.QuarterEnd()


    quarterly_data = returns.loc[start_date:end_date]
    print(len(quarterly_data))
    # Dimensionality Reduction
    n_components = 2
    pca = PCA(n_components=n_components)
    embedding_data = pca.fit_transform(quarterly_data)

    # Clustering
    clusters = perform_clustering(quarterly_data, method="agglomerative")
    print('Deleted Previous Portfolio')
    # Portfolio construction
    portfolio = construct_portfolio(clusters, embedding_data)
    print('Created Portfolio')

    # Placeholder: Portfolio evaluation would go here


60
Deleted Previous Portfolio
Created Portfolio
60
Deleted Previous Portfolio
Created Portfolio
60
Deleted Previous Portfolio
Created Portfolio
60
Deleted Previous Portfolio
Created Portfolio


In [69]:
#[quarterly_data[i] for i in portfolio]

KeyError: 57

In [70]:
len(portfolio)

20

In [None]:
def evaluate_portfolio_performance(portfolio, quarterly_data):
    """Calculates performance metrics for a portfolio against a benchmark.

    Args:
        portfolio (list): List of stock indices in the portfolio.
        quarterly_data (pandas.DataFrame): Dataframe of stock returns for the quarter.
        benchmark_data (pandas.Series): Returns of a benchmark index for the quarter.

    Returns:
        dict: Dictionary of performance metrics.
    """

    portfolio_returns = quarterly_data.iloc[portfolio].mean(axis=1)  # Average across portfolio
    #benchmark_returns = benchmark_data

    performance_metrics = {}

    # --- Example Metrics ---
    # 1. Absolute Return
    performance_metrics["portfolio_return"] = portfolio_returns.sum() * 100
    #performance_metrics["benchmark_return"] = benchmark_returns.sum() * 100

    # 2. Volatility (Standard Deviation)
    performance_metrics["portfolio_std"] = portfolio_returns.std() * np.sqrt(252) * 100  # Annualized
    #performance_metrics["benchmark_std"] = benchmark_returns.std() * np.sqrt(252) * 100

    # 3. Sharpe Ratio
    rf_rate = 0.05  # Placeholder: Assume a risk-free rate
    performance_metrics["sharpe_ratio"] = (portfolio_returns.mean() - rf_rate) / portfolio_returns.std()

    # ... Calculate other metrics as needed ...

    return performance_metrics


In [None]:
#quarterly_data.T.iloc[portfolio]

In [None]:
evaluate_portfolio_performance(portfolio, quarterly_data)

In [None]:
from sklearn.covariance import GraphicalLassoCV

# Replace this single-value calculation ...
# stock_returns = quarterly_data.iloc[portfolio].mean(axis=1)

# ...with these lines  to keep the selected stock data
stock_returns = quarterly_data.iloc[portfolio]  # No mean calculation here

# Graphical LASSO works with covariance, not direct returns
stock_covariance = np.cov(stock_returns.T)

# Model with cross-validation to  tune regularization
model = GraphicalLassoCV()
model.fit(stock_covariance)


# Estimated precision matrix
precision_matrix = model.precision_

# Convert this to a correlation matrix (often easier to interpret)
correlation_matrix = np.linalg.inv(precision_matrix)


In [None]:
#stock_returns

In [None]:
import matplotlib.pyplot as plt
import networkx as nx
# ... (Your previous code)

# Create a NetworkX graph object
G = nx.Graph()

# Add nodes (stocks)
G.add_nodes_from(stock_returns.columns)

# Add edges based on correlation strength
threshold = 0.05  # Set a threshold to focus on stronger correlations
for i, stock1 in enumerate(stock_returns.columns):
    for j, stock2 in enumerate(stock_returns.columns[i+1:]):
        correlation = correlation_matrix[i, i+j+1]  # Account for symmetric matrix
        if abs(correlation) > threshold:
            G.add_edge(stock1, stock2, weight=correlation)

# Customize visualization (optional)
edge_colors = [edge[2]['weight'] for edge in G.edges(data=True)]  # Color based on weight
pos = nx.spring_layout(G)  # Choose a layout algorithm

# Draw the graph
nx.draw(G, pos, with_labels=True, font_size=8, node_size=500, edge_color=edge_colors, edge_cmap=plt.cm.coolwarm)
plt.title("Correlation Network (Graphical LASSO)")
plt.show()
