In [27]:
from faker import Faker
import pandas as pd
import random

# Initialize Faker
fake = Faker()

# Define scoring system
score_mapping = {
    'Short-term': 1,
    'Medium-term': 3,
    'Long-term': 5,
    'Very comfortable': 5,
    'Somewhat comfortable': 4,
    'Neutral': 3,
    'Somewhat uncomfortable': 2,
    'Very uncomfortable': 1,
    'Advanced': 5,
    'Intermediate': 3,
    'Beginner': 1,
    'Capital growth': 5,
    'Wealth preservation': 3,
    'Income generation': 4,
    'Stay the course and hold onto investments': 5,
    'Reevaluate and potentially adjust the investment strategy': 3,
    'Sell investments to limit further losses': 1
}

# Calculate risk capacity based on scores
def calculate_risk_capacity(row):
    total_score = (
        score_mapping[row['investment_time_horizon']] +
        score_mapping[row['comfort_with_fluctuations']] +
        score_mapping[row['investment_experience']] +
        score_mapping[row['financial_goals']] +
        score_mapping[row['reaction_to_losses']]
    )
    return (
        'Conservative' if 10 <= total_score <= 16 else
        'Moderate' if 16 <= total_score <= 25 else
        'Aggressive'
    )

# Generate dummy data
num_rows = 5000
dummy_data = []

for _ in range(num_rows):
    row = {
        'investment_time_horizon': random.choice(investment_time_horizon_choices),
        'comfort_with_fluctuations': random.choice(comfort_with_fluctuations_choices),
        'investment_experience': random.choice(investment_experience_choices),
        'financial_goals': random.choice(financial_goals_choices),
        'reaction_to_losses': random.choice(reaction_to_losses_choices)
    }
    dummy_data.append(row)

# Create DataFrame
df = pd.DataFrame(dummy_data)

# Apply risk capacity function to calculate the target column
df['risk_capacity'] = df.apply(calculate_risk_capacity, axis=1)

# Calculate accuracy
accuracy = df[df['risk_capacity'] == df['risk_capacity'].mode()[0]].shape[0] / num_rows * 100

# Display accuracy
print(f"Generated dataset with {accuracy:.2f}% accuracy for the target column (risk capacity).")




Generated dataset with 53.18% accuracy for the target column (risk capacity).


In [29]:
df.head(50)

Unnamed: 0,investment_time_horizon,comfort_with_fluctuations,investment_experience,financial_goals,reaction_to_losses,risk_capacity
0,Medium-term,Neutral,Beginner,Wealth preservation,Reevaluate and potentially adjust the investme...,Conservative
1,Short-term,Somewhat uncomfortable,Advanced,Income generation,Stay the course and hold onto investments,Moderate
2,Short-term,Somewhat comfortable,Beginner,Income generation,Sell investments to limit further losses,Conservative
3,Long-term,Very comfortable,Beginner,Capital growth,Sell investments to limit further losses,Moderate
4,Short-term,Somewhat comfortable,Intermediate,Capital growth,Stay the course and hold onto investments,Moderate
5,Medium-term,Somewhat comfortable,Beginner,Income generation,Sell investments to limit further losses,Conservative
6,Long-term,Somewhat comfortable,Intermediate,Capital growth,Sell investments to limit further losses,Moderate
7,Medium-term,Very uncomfortable,Beginner,Wealth preservation,Reevaluate and potentially adjust the investme...,Conservative
8,Medium-term,Very comfortable,Advanced,Income generation,Sell investments to limit further losses,Moderate
9,Medium-term,Very comfortable,Beginner,Income generation,Sell investments to limit further losses,Conservative
