In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans

In [2]:
np.random.seed(42)
num_investors = 50
num_stocks = 5

investor_data = []
for i in range(num_investors):
    portfolio_returns = np.random.normal(0.001, 0.02, num_stocks)
    portfolio_volatility = np.random.uniform(0.01, 0.05, num_stocks)
    investor_data.append({
        'Investor_ID': f'INV{i+1}',
        'Stock1_Return': portfolio_returns[0],
        'Stock2_Return': portfolio_returns[1],
        'Stock3_Return': portfolio_returns[2],
        'Stock4_Return': portfolio_returns[3],
        'Stock5_Return': portfolio_returns[4],
        'Stock1_Volatility': portfolio_volatility[0],
        'Stock2_Volatility': portfolio_volatility[1],
        'Stock3_Volatility': portfolio_volatility[2],
        'Stock4_Volatility': portfolio_volatility[3],
        'Stock5_Volatility': portfolio_volatility[4]
    })

df = pd.DataFrame(investor_data)
print("Initial Dataset (first 5 rows):")
print(df.head())

Initial Dataset (first 5 rows):
  Investor_ID  Stock1_Return  Stock2_Return  Stock3_Return  Stock4_Return  Stock5_Return  Stock1_Volatility  Stock2_Volatility  Stock3_Volatility  Stock4_Volatility  Stock5_Volatility
0        INV1      0.010934      -0.001765       0.013954       0.031461      -0.003683            0.038      0.029               0.019               0.043               0.022
1        INV2     -0.003683       0.032584       0.016349      -0.008389       0.012851            0.031      0.015               0.045               0.011               0.027


In [3]:
df['Portfolio_Return'] = df[[
    'Stock1_Return','Stock2_Return','Stock3_Return','Stock4_Return','Stock5_Return'
]].mean(axis=1)

df['Portfolio_Volatility'] = df[[
    'Stock1_Volatility','Stock2_Volatility','Stock3_Volatility','Stock4_Volatility','Stock5_Volatility'
]].mean(axis=1)

df['Max_Drawdown'] = np.random.uniform(0.01, 0.2, len(df))
df['Beta'] = np.random.uniform(0.8, 1.5, len(df))

weights = np.random.dirichlet(np.ones(num_stocks), size=len(df))
df['Diversification'] = 1 - (weights**2).sum(axis=1)

In [4]:
features = ['Portfolio_Return','Portfolio_Volatility','Max_Drawdown','Beta','Diversification']
X = df[features]

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

kmeans = KMeans(n_clusters=3, random_state=42)
df['Risk_Profile_Cluster'] = kmeans.fit_predict(X_scaled)

In [5]:
cluster_risk = df.groupby('Risk_Profile_Cluster')['Portfolio_Volatility'].mean().sort_values()
risk_map = {
    cluster_risk.index[0]: 'Conservative',
    cluster_risk.index[1]: 'Moderate',
    cluster_risk.index[2]: 'Aggressive'
}

df['Risk_Profile'] = df['Risk_Profile_Cluster'].map(risk_map)
print('Risk Profile Distribution:')
print(df['Risk_Profile'].value_counts())

Risk Profile Distribution:
Moderate        18
Conservative    17
Aggressive      15


In [6]:
df[['Investor_ID','Portfolio_Return','Portfolio_Volatility','Max_Drawdown','Beta','Diversification','Risk_Profile']].head()

  Investor_ID  Portfolio_Return  Portfolio_Volatility  Max_Drawdown  Beta  Diversification   Risk_Profile
0        INV1          0.010980              0.030200      0.134512  1.21          0.782314    Moderate
1        INV2          0.009542              0.026184      0.091224  1.04          0.801224  Conservative
