In [12]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import OrdinalEncoder

# Load dataset
df = pd.read_csv("personalized_investment_recommendations.csv")   # replace with your filename

# -------------------------
# 1. DROP USELESS COLUMN
# -------------------------
df = df.drop(columns=["User_ID"])

# -------------------------
# 2. LABEL ENCODING
# -------------------------
le = LabelEncoder()

df["Gender"] = le.fit_transform(df["Gender"])  
# Male = 1, Female = 0 (automatically)

df["Recommended_Portfolio"] = le.fit_transform(df["Recommended_Portfolio"])
# This is your target (y)

# -------------------------
# 3. ORDINAL ENCODING (for ordered categories)
# -------------------------

# Risk Tolerance: Low < Medium < High
risk_order = [["Low", "Medium", "High"]]
df["Risk_Tolerance"] = OrdinalEncoder(categories=risk_order).fit_transform(df[["Risk_Tolerance"]])

# Investment Horizon: Short < Medium < Long
horizon_order = [["Short-term", "Medium-term", "Long-term"]]
df["Investment_Horizon"] = OrdinalEncoder(categories=horizon_order).fit_transform(df[["Investment_Horizon"]])

# -------------------------
# 4. ONE-HOT ENCODING (for non-ordered categories)
# -------------------------

one_hot_cols = ["Investment_Goal", "Preferred_Sector", "Current_Investments"]

df = pd.get_dummies(df, columns=one_hot_cols, drop_first=True)
# drop_first=True avoids dummy variable trap

df = df.astype(int)

# -------------------------
# 5. View final numeric dataset
# -------------------------


# df.dtypes


In [13]:
df.head()

Unnamed: 0,Age,Gender,Annual_Income,Risk_Tolerance,Investment_Horizon,Recommended_Portfolio,Investment_Goal_Emergency Fund,Investment_Goal_House Purchase,Investment_Goal_Retirement,Investment_Goal_Wealth Growth,Preferred_Sector_Energy,Preferred_Sector_Finance,Preferred_Sector_Healthcare,Preferred_Sector_Real Estate,Preferred_Sector_Tech,Current_Investments_Crypto,Current_Investments_Mixed Portfolio,Current_Investments_Mutual Funds,Current_Investments_Real Estate,Current_Investments_Stocks
0,27,0,98977,0,1,4,0,0,0,1,0,0,0,0,0,1,0,0,0,0
1,58,1,46433,2,1,2,1,0,0,0,1,0,0,0,0,1,0,0,0,0
2,27,0,67265,0,0,3,0,1,0,0,1,0,0,0,0,1,0,0,0,0
3,27,0,145122,1,2,4,0,1,0,0,0,0,0,1,0,0,1,0,0,0
4,28,1,92188,0,0,1,0,0,0,1,1,0,0,0,0,0,0,1,0,0
