# Fund Correlation Analysis

This notebook analyzes correlations between different funds using historical price data.

## Setup
First, let's import required libraries and define our analysis functions.

In [None]:
# Install required packages if needed
!pip install pandas numpy seaborn matplotlib scipy scikit-learn ipywidgets

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.cluster import hierarchy
from scipy.stats import pearsonr
from sklearn.cluster import KMeans
import ipywidgets as widgets
from IPython.display import display, HTML
import warnings
warnings.filterwarnings('ignore')

# Set plotting style
plt.style.use('seaborn')
%matplotlib inline

## Data Upload
Upload your CSV file using the widget below:

In [None]:
uploader = widgets.FileUpload(
    accept='.csv',
    multiple=False
)
display(uploader)

In [None]:
class CorrelationAnalyzer:
    def __init__(self, df, correlation_threshold=0.8):
        self.correlation_threshold = correlation_threshold
        self.df = df
        self.df['Date'] = pd.to_datetime(self.df['Date'])
        self.df.set_index('Date', inplace=True)
        
    def static_correlation_analysis(self):
        """Basic correlation analysis"""
        display(HTML("<h3>Static Correlation Analysis</h3>"))
        
        # Calculate correlation matrix
        self.correlation_matrix = self.df.corr()
        
        # Find highly correlated pairs
        high_correlations = []
        for i in range(len(self.correlation_matrix.columns)):
            for j in range(i+1, len(self.correlation_matrix.columns)):
                correlation = self.correlation_matrix.iloc[i, j]
                pvalue = pearsonr(self.df.iloc[:,i], self.df.iloc[:,j])[1]
                if abs(correlation) > self.correlation_threshold:
                    high_correlations.append({
                        'Fund1': self.correlation_matrix.columns[i],
                        'Fund2': self.correlation_matrix.columns[j],
                        'Correlation': correlation,
                        'P-value': pvalue
                    })
        
        # Sort and display results
        high_correlations.sort(key=lambda x: abs(x['Correlation']), reverse=True)
        display(HTML("<h4>Top 10 highly correlated pairs:</h4>"))
        for pair in high_correlations[:10]:
            print(f"{pair['Fund1']} - {pair['Fund2']}: {pair['Correlation']:.3f} (p-value: {pair['P-value']:.3e})")
        
        return high_correlations

    def rolling_correlation_analysis(self, window=60):
        """Time-varying correlation analysis"""
        display(HTML(f"<h3>Rolling Correlation Analysis (Window: {window} days)</h3>"))
        
        # Calculate rolling correlations for highly correlated pairs
        high_correlations = self.static_correlation_analysis()
        top_pairs = high_correlations[:5]  # Analyze top 5 pairs
        
        plt.figure(figsize=(15, 8))
        for pair in top_pairs:
            rolling_corr = self.df[pair['Fund1']].rolling(window).corr(self.df[pair['Fund2']])
            plt.plot(rolling_corr.index, rolling_corr.values, label=f"{pair['Fund1']} - {pair['Fund2']}")
        
        plt.title(f'Rolling Correlations (Window: {window} days)')
        plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
        plt.tight_layout()
        plt.show()

    def cluster_analysis(self, n_clusters=5):
        """Perform cluster analysis"""
        display(HTML(f"<h3>Cluster Analysis (K-means, {n_clusters} clusters)</h3>"))
        
        # Hierarchical Clustering
        linkage_matrix = hierarchy.linkage(self.correlation_matrix, method='ward')
        plt.figure(figsize=(15, 10))
        hierarchy.dendrogram(linkage_matrix, labels=self.correlation_matrix.columns)
        plt.title('Hierarchical Clustering Dendrogram')
        plt.xticks(rotation=90)
        plt.tight_layout()
        plt.show()

        # K-means Clustering
        kmeans = KMeans(n_clusters=n_clusters, random_state=42)
        clusters = kmeans.fit_predict(self.df.T)
        
        # Print cluster members
        for i in range(n_clusters):
            cluster_members = self.df.columns[clusters == i]
            print(f"\nCluster {i+1} members:")
            print(', '.join(cluster_members))

    def create_heatmap(self):
        """Create correlation heatmap"""
        display(HTML("<h3>Correlation Heatmap</h3>"))
        
        plt.figure(figsize=(15, 12))
        sns.heatmap(self.correlation_matrix, cmap='RdBu', center=0, annot=False)
        plt.title('Correlation Heatmap')
        plt.tight_layout()
        plt.show()

    def correlation_statistics(self):
        """Calculate and print correlation statistics"""
        display(HTML("<h3>Correlation Statistics</h3>"))
        
        correlations_flat = self.correlation_matrix.values[np.triu_indices_from(self.correlation_matrix.values, k=1)]
        
        stats = {
            'Mean': np.mean(correlations_flat),
            'Median': np.median(correlations_flat),
            'Std Dev': np.std(correlations_flat),
            'Min': np.min(correlations_flat),
            'Max': np.max(correlations_flat),
            'Skewness': pd.Series(correlations_flat).skew(),
            'Kurtosis': pd.Series(correlations_flat).kurtosis()
        }
        
        for stat, value in stats.items():
            print(f"{stat}: {value:.3f}")

## Run Analysis
Click the button below to run the analysis after uploading your CSV file:

In [None]:
def run_analysis(correlation_threshold=0.8, rolling_window=60, n_clusters=5):
    if not uploader.value:
        print("Please upload a CSV file first!")
        return
    
    # Read uploaded file
    content = uploader.value[list(uploader.value.keys())[0]]['content']
    df = pd.read_csv(pd.io.common.BytesIO(content))
    
    # Initialize analyzer
    analyzer = CorrelationAnalyzer(df, correlation_threshold=correlation_threshold)
    
    # Run analyses
    analyzer.create_heatmap()
    analyzer.rolling_correlation_analysis(window=rolling_window)
    analyzer.cluster_analysis(n_clusters=n_clusters)
    analyzer.correlation_statistics()

# Create interactive widgets
correlation_threshold = widgets.FloatSlider(
    value=0.8,
    min=0.0,
    max=1.0,
    step=0.05,
    description='Correlation Threshold:'
)

rolling_window = widgets.IntSlider(
    value=60,
    min=10,
    max=252,
    step=5,
    description='Rolling Window (days):'
)

n_clusters = widgets.IntSlider(
    value=5,
    min=2,
    max=10,
    step=1,
    description='Number of Clusters:'
)

run_button = widgets.Button(description='Run Analysis')

def on_button_clicked(b):
    run_analysis(
        correlation_threshold=correlation_threshold.value,
        rolling_window=rolling_window.value,
        n_clusters=n_clusters.value
    )

run_button.on_click(on_button_clicked)

# Display widgets
display(correlation_threshold, rolling_window, n_clusters, run_button)

## Analysis Results
The results will appear above after running the analysis. You can adjust the parameters and run the analysis multiple times to compare results.