In [None]:
# Imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans

In [None]:
# Data Creation
np.random.seed(42)
num_investors = 50
num_stocks = 5

investor_data = []
for i in range(num_investors):
    portfolio_returns = np.random.normal(0.001, 0.02, num_stocks)
    portfolio_volatility = np.random.uniform(0.01, 0.05, num_stocks)
    investor_data.append({
        'Investor_ID': f'INV{i+1}',
        'Stock1_Return': portfolio_returns[0],
        'Stock2_Return': portfolio_returns[1],
        'Stock3_Return': portfolio_returns[2],
        'Stock4_Return': portfolio_returns[3],
        'Stock5_Return': portfolio_returns[4],
        'Stock1_Volatility': portfolio_volatility[0],
        'Stock2_Volatility': portfolio_volatility[1],
        'Stock3_Volatility': portfolio_volatility[2],
        'Stock4_Volatility': portfolio_volatility[3],
        'Stock5_Volatility': portfolio_volatility[4]
    })

df = pd.DataFrame(investor_data)

In [None]:
# Feature Engineering
df['Portfolio_Return'] = df[['Stock1_Return','Stock2_Return','Stock3_Return','Stock4_Return','Stock5_Return']].mean(axis=1)
df['Portfolio_Volatility'] = df[['Stock1_Volatility','Stock2_Volatility','Stock3_Volatility','Stock4_Volatility','Stock5_Volatility']].mean(axis=1)
df['Max_Drawdown'] = np.random.uniform(0.01, 0.2, len(df))
df['Beta'] = np.random.uniform(0.8, 1.5, len(df))

weights = np.random.dirichlet(np.ones(num_stocks), size=len(df))
df['Diversification'] = 1 - (weights**2).sum(axis=1)

In [None]:
# Data Preprocessing
features = ['Portfolio_Return', 'Portfolio_Volatility', 'Max_Drawdown', 'Beta', 'Diversification']
X = df[features]

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [None]:
# K-Means Clustering
kmeans = KMeans(n_clusters=3, random_state=42)
df['Risk_Profile_Cluster'] = kmeans.fit_predict(X_scaled)

In [None]:
# Mapping clusters to risk labels
cluster_risk = df.groupby('Risk_Profile_Cluster')['Portfolio_Volatility'].mean().sort_values()
risk_map = {
    cluster_risk.index[0]: 'Conservative',
    cluster_risk.index[1]: 'Moderate',
    cluster_risk.index[2]: 'Aggressive'
}

df['Risk_Profile'] = df['Risk_Profile_Cluster'].map(risk_map)

In [None]:
# Visualization
plt.figure(figsize=(8,6))
sns.scatterplot(
    x='Portfolio_Volatility',
    y='Portfolio_Return',
    hue='Risk_Profile',
    data=df,
    palette='Set1',
    s=100
)
plt.title('Investor Risk Profiles')
plt.xlabel('Portfolio Volatility')
plt.ylabel('Portfolio Return')
plt.show()

In [None]:
# Save Results
df.to_csv('investor_risk_profiles.csv', index=False)

In [None]:
# Display final dataset
df[['Investor_ID','Portfolio_Return','Portfolio_Volatility','Max_Drawdown','Beta','Diversification','Risk_Profile']]