In [27]:
import pandas as pd
from faker import Faker
import random

# Set up Faker
fake = Faker()

# Generate dummy data for 5000 individuals
num_individuals = 5000
data = []

for _ in range(num_individuals):
    # Investment Time Horizon
    time_horizon = random.choice(['Short-term', 'Medium-term', 'Long-term'])

    # Comfort Level with Market Fluctuations
    market_comfort = random.choice(['Very comfortable', 'Somewhat comfortable', 'Neutral', 'Somewhat uncomfortable', 'Very uncomfortable'])

    # Previous Investment Experience
    investment_experience = random.choice(['Beginner (limited or no experience)', 'Intermediate (some experience)', 'Advanced (experienced in managing a diverse portfolio)'])

    # Financial Goals
    financial_goals = random.choice(['Wealth preservation (protecting existing capital)', 'Capital growth (maximizing returns)', 'Income generation (receiving regular dividends or interest)'])

    # Reaction to Potential Losses
    potential_losses_reaction = random.choice(['Stay the course and hold onto investments', 'Reevaluate and potentially adjust the investment strategy', 'Sell investments to limit further losses'])

    # Current Financial Situation
    financial_situation = random.choice(['Comfortable (Stable income and savings)', 'Adequate (Meeting basic needs with some room for savings)', 'Strained (Meeting basic needs with little room for savings)'])

    # Knowledge of Cryptocurrency
    cryptocurrency_knowledge = random.choice(['Expert (In-depth understanding)', 'Intermediate (Some understanding)', 'Novice (Limited understanding)'])

    # Emergency Fund
    emergency_fund = random.choice(['Yes, well-prepared', 'Partially prepared', 'Not prepared'])

    # Risk Capacity
    risk_capacity_score = random.randint(10, 25)

    # Liquidity Needs
    liquidity_needs = random.choice(['No immediate need (Long-term investment horizon)', 'Medium-term (Possible need within 3-5 years)', 'Short-term (Possible need within 1-3 years)'])

    # Calculate Total Score
        # Calculate Total Score
    # Calculate Total Score
    def extract_score(string):
        try:
            return int(string.split(": ")[1].split(" ")[0])
        except (IndexError, ValueError):
            return 0

    total_score = sum([
        risk_capacity_score,
        extract_score(emergency_fund),
        extract_score(financial_situation),
        extract_score(cryptocurrency_knowledge),
        extract_score(investment_experience)
    ])



    # Determine Risk Capacity
    risk_capacity = (
        'Conservative' if 8 <= total_score <= 15 else
        'Moderate' if 15 <= total_score <= 24 else
        'Aggressive'
    )

    # Create individual's data
    individual_data = {
        'Name': fake.name(),
        'Investment Time Horizon': time_horizon,
        'Comfort Level with Market Fluctuations': market_comfort,
        'Previous Investment Experience': investment_experience,
        'Financial Goals': financial_goals,
        'Reaction to Potential Losses': potential_losses_reaction,
        'Current Financial Situation': financial_situation,
        'Knowledge of Cryptocurrency': cryptocurrency_knowledge,
        'Emergency Fund': emergency_fund,
        'Risk Capacity Score': risk_capacity_score,
        'Liquidity Needs': liquidity_needs,
        'Risk Capacity': risk_capacity
    }

    data.append(individual_data)

# Create a dataframe
df = pd.DataFrame(data)

# Save the dataframe to a CSV file
df.to_csv('investment_data.csv', index=False)


In [28]:

df.value_counts()

Name           Investment Time Horizon  Comfort Level with Market Fluctuations  Previous Investment Experience                          Financial Goals                                              Reaction to Potential Losses                               Current Financial Situation                                  Knowledge of Cryptocurrency        Emergency Fund      Risk Capacity Score  Liquidity Needs                                   Risk Capacity
Aaron Alvarez  Short-term               Somewhat uncomfortable                  Intermediate (some experience)                          Income generation (receiving regular dividends or interest)  Sell investments to limit further losses                   Adequate (Meeting basic needs with some room for savings)    Intermediate (Some understanding)  Partially prepared  10                   Medium-term (Possible need within 3-5 years)      Conservative     1
Megan Garza    Short-term               Neutral                                 A

In [29]:
df.head(20)

Unnamed: 0,Name,Investment Time Horizon,Comfort Level with Market Fluctuations,Previous Investment Experience,Financial Goals,Reaction to Potential Losses,Current Financial Situation,Knowledge of Cryptocurrency,Emergency Fund,Risk Capacity Score,Liquidity Needs,Risk Capacity
0,Benjamin Stevens,Short-term,Very uncomfortable,Beginner (limited or no experience),Income generation (receiving regular dividends...,Sell investments to limit further losses,Adequate (Meeting basic needs with some room f...,Expert (In-depth understanding),Partially prepared,20,No immediate need (Long-term investment horizon),Moderate
1,Cynthia Richards,Medium-term,Very comfortable,Intermediate (some experience),Income generation (receiving regular dividends...,Sell investments to limit further losses,Strained (Meeting basic needs with little room...,Intermediate (Some understanding),Partially prepared,16,Medium-term (Possible need within 3-5 years),Moderate
2,Kara Blanchard,Short-term,Neutral,Intermediate (some experience),Capital growth (maximizing returns),Sell investments to limit further losses,Adequate (Meeting basic needs with some room f...,Intermediate (Some understanding),"Yes, well-prepared",10,Medium-term (Possible need within 3-5 years),Conservative
3,Timothy Ford,Long-term,Somewhat uncomfortable,Beginner (limited or no experience),Capital growth (maximizing returns),Sell investments to limit further losses,Strained (Meeting basic needs with little room...,Novice (Limited understanding),Not prepared,13,Medium-term (Possible need within 3-5 years),Conservative
4,Wayne Sharp,Long-term,Very comfortable,Intermediate (some experience),Income generation (receiving regular dividends...,Sell investments to limit further losses,Adequate (Meeting basic needs with some room f...,Intermediate (Some understanding),Partially prepared,10,Medium-term (Possible need within 3-5 years),Conservative
5,Tyler Terry,Long-term,Neutral,Intermediate (some experience),Wealth preservation (protecting existing capital),Reevaluate and potentially adjust the investme...,Strained (Meeting basic needs with little room...,Intermediate (Some understanding),"Yes, well-prepared",19,Medium-term (Possible need within 3-5 years),Moderate
6,April King,Short-term,Very uncomfortable,Advanced (experienced in managing a diverse po...,Income generation (receiving regular dividends...,Reevaluate and potentially adjust the investme...,Comfortable (Stable income and savings),Novice (Limited understanding),"Yes, well-prepared",24,Short-term (Possible need within 1-3 years),Moderate
7,Taylor Myers,Long-term,Very comfortable,Advanced (experienced in managing a diverse po...,Wealth preservation (protecting existing capital),Reevaluate and potentially adjust the investme...,Adequate (Meeting basic needs with some room f...,Novice (Limited understanding),Not prepared,21,No immediate need (Long-term investment horizon),Moderate
8,Gary Sanchez,Short-term,Somewhat uncomfortable,Advanced (experienced in managing a diverse po...,Income generation (receiving regular dividends...,Sell investments to limit further losses,Comfortable (Stable income and savings),Intermediate (Some understanding),"Yes, well-prepared",15,Medium-term (Possible need within 3-5 years),Conservative
9,Adrienne Ayala,Short-term,Very uncomfortable,Advanced (experienced in managing a diverse po...,Wealth preservation (protecting existing capital),Stay the course and hold onto investments,Adequate (Meeting basic needs with some room f...,Novice (Limited understanding),Not prepared,14,No immediate need (Long-term investment horizon),Conservative


In [30]:
df.drop('Name',axis=1,inplace=True)

In [31]:
df.head()

Unnamed: 0,Investment Time Horizon,Comfort Level with Market Fluctuations,Previous Investment Experience,Financial Goals,Reaction to Potential Losses,Current Financial Situation,Knowledge of Cryptocurrency,Emergency Fund,Risk Capacity Score,Liquidity Needs,Risk Capacity
0,Short-term,Very uncomfortable,Beginner (limited or no experience),Income generation (receiving regular dividends...,Sell investments to limit further losses,Adequate (Meeting basic needs with some room f...,Expert (In-depth understanding),Partially prepared,20,No immediate need (Long-term investment horizon),Moderate
1,Medium-term,Very comfortable,Intermediate (some experience),Income generation (receiving regular dividends...,Sell investments to limit further losses,Strained (Meeting basic needs with little room...,Intermediate (Some understanding),Partially prepared,16,Medium-term (Possible need within 3-5 years),Moderate
2,Short-term,Neutral,Intermediate (some experience),Capital growth (maximizing returns),Sell investments to limit further losses,Adequate (Meeting basic needs with some room f...,Intermediate (Some understanding),"Yes, well-prepared",10,Medium-term (Possible need within 3-5 years),Conservative
3,Long-term,Somewhat uncomfortable,Beginner (limited or no experience),Capital growth (maximizing returns),Sell investments to limit further losses,Strained (Meeting basic needs with little room...,Novice (Limited understanding),Not prepared,13,Medium-term (Possible need within 3-5 years),Conservative
4,Long-term,Very comfortable,Intermediate (some experience),Income generation (receiving regular dividends...,Sell investments to limit further losses,Adequate (Meeting basic needs with some room f...,Intermediate (Some understanding),Partially prepared,10,Medium-term (Possible need within 3-5 years),Conservative


In [11]:

file_path = 'investment_data.csv'
df = pd.read_csv(file_path)

# Drop the 'Name' column
df = df.drop('Name', axis=1)

# Encode categorical variables using one-hot encoding
encoder = OneHotEncoder(cols=['Investment Time Horizon', 'Comfort Level with Market Fluctuations',
                               'Previous Investment Experience', 'Financial Goals',
                               'Reaction to Potential Losses', 'Current Financial Situation',
                               'Knowledge of Cryptocurrency', 'Emergency Fund', 'Liquidity Needs',
                               'Risk Capacity'])
df_encoded = encoder.fit_transform(df)


In [13]:
df_encoded.shape

(5000, 33)

In [15]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(x,y,test_size=0.15,random_state=2) 

In [21]:
from statistics import LinearRegression
from sklearn.compose import ColumnTransformer

from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder


step1= ColumnTransformer(transformers=
        [('col_tnf', OneHotEncoder(sparse=False,drop='first'),[0,1,7,10,11])],
        remainder='passthrough'
        )

step2=LinearRegression()

pipe=Pipeline([
    ('step1',step1),
    ('step2',step2)
])

pipe.fit(X_train,y_train)

TypeError: LinearRegression.__new__() missing 2 required positional arguments: 'slope' and 'intercept'