In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

In [6]:
data = {'Color': ['Red', 'Blue', 'Green', 'Red', 'Blue'],
        'Size': ['Small', 'Large', 'Medium', 'Large', 'Small'],
        'Label': [1, 0, 1, 1, 0]}

df = pd.DataFrame(data)

In [7]:
print("Original DataFrame:")
print(df)

Original DataFrame:
   Color    Size  Label
0    Red   Small      1
1   Blue   Large      0
2  Green  Medium      1
3    Red   Large      1
4   Blue   Small      0


In [8]:
X = df.drop('Label', axis=1)
y = df['Label']

In [9]:
encoder = OneHotEncoder(drop='first', sparse=False)
X_onehot = encoder.fit_transform(X)

columns_after_onehot = encoder.get_feature_names_out(X.columns)
df_onehot = pd.DataFrame(X_onehot, columns=columns_after_onehot)
print("\nDataFrame after One-Hot Encoding:")
print(df_onehot)



DataFrame after One-Hot Encoding:
   Color_Green  Color_Red  Size_Medium  Size_Small
0          0.0        1.0          0.0         1.0
1          0.0        0.0          0.0         0.0
2          1.0        0.0          1.0         0.0
3          0.0        1.0          0.0         0.0
4          0.0        0.0          0.0         1.0




In [12]:
label_encoder = LabelEncoder()
X_label = X.apply(label_encoder.fit_transform)
print("\nDataFrame after Label Encoding:")
print(X_label)



DataFrame after Label Encoding:
   Color  Size
0      2     2
1      0     0
2      1     1
3      2     0
4      0     2


In [13]:

X_train, X_test, y_train, y_test = train_test_split(X_onehot, y, test_size=0.2, random_state=42)

clf = RandomForestClassifier(random_state=42)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("\nModel Accuracy:", accuracy)



Model Accuracy: 0.0
