NAME : YAZHINI R R
REG NO : 212224100063

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import confusion_matrix, classification_report
from torch.utils.data import TensorDataset, DataLoader


In [2]:
dataset = pd.read_csv("customers.csv")
print("Dataset Preview:\n", dataset.head())


Dataset Preview:
        ID  Gender Ever_Married  Age Graduated     Profession  Work_Experience  \
0  462809    Male           No   22        No     Healthcare              1.0   
1  462643  Female          Yes   38       Yes       Engineer              NaN   
2  466315  Female          Yes   67       Yes       Engineer              1.0   
3  461735    Male          Yes   67       Yes         Lawyer              0.0   
4  462669  Female          Yes   40       Yes  Entertainment              NaN   

  Spending_Score  Family_Size  Var_1 Segmentation  
0            Low          4.0  Cat_4            D  
1        Average          3.0  Cat_4            A  
2            Low          1.0  Cat_6            B  
3           High          2.0  Cat_6            B  
4           High          6.0  Cat_6            A  


In [3]:
X = dataset.drop("Segmentation", axis=1)
y = dataset["Segmentation"]

In [4]:
X["Work_Experience"].fillna(X["Work_Experience"].median(), inplace=True)
X["Family_Size"].fillna(X["Family_Size"].median(), inplace=True)


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  X["Work_Experience"].fillna(X["Work_Experience"].median(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  X["Family_Size"].fillna(X["Family_Size"].median(), inplace=True)


In [5]:
cat_cols = X.select_dtypes(include="object").columns
for col in cat_cols:
    le = LabelEncoder()
    X[col] = le.fit_transform(X[col])


In [6]:
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)

In [7]:
scaler = StandardScaler()
X = scaler.fit_transform(X)

In [8]:
X = torch.tensor(X, dtype=torch.float32)
y = torch.tensor(y, dtype=torch.long)

In [9]:
dataset = TensorDataset(X, y)
loader = DataLoader(dataset, batch_size=64, shuffle=True)

In [10]:
class PeopleClassifier(nn.Module):
    def __init__(self, input_size, classes):
        super().__init__()
        self.fc1 = nn.Linear(input_size, 16)
        self.fc2 = nn.Linear(16, 8)
        self.fc3 = nn.Linear(8, classes)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x

model = PeopleClassifier(X.shape[1], len(label_encoder.classes_))


In [11]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)

In [12]:
epochs = 20
for epoch in range(epochs):
    for xb, yb in loader:
        optimizer.zero_grad()
        outputs = model(xb)
        loss = criterion(outputs, yb)
        loss.backward()
        optimizer.step()
    print(f"Epoch {epoch + 1}/{epochs} completed")

print("\nTraining Completed")

Epoch 1/20 completed
Epoch 2/20 completed
Epoch 3/20 completed
Epoch 4/20 completed
Epoch 5/20 completed
Epoch 6/20 completed
Epoch 7/20 completed
Epoch 8/20 completed
Epoch 9/20 completed
Epoch 10/20 completed
Epoch 11/20 completed
Epoch 12/20 completed
Epoch 13/20 completed
Epoch 14/20 completed
Epoch 15/20 completed
Epoch 16/20 completed
Epoch 17/20 completed
Epoch 18/20 completed
Epoch 19/20 completed
Epoch 20/20 completed

Training Completed


In [13]:
model.eval()
with torch.no_grad():
    predictions = torch.argmax(model(X), dim=1)

print("\nConfusion Matrix:")
print(confusion_matrix(y, predictions))

print("\nClassification Report:")
print(classification_report(
    y,
    predictions,
    target_names=label_encoder.classes_,
    zero_division=0
))



Confusion Matrix:
[[1401  435  337  645]
 [ 779  631  663  335]
 [ 455  386 1267  334]
 [ 744  158  158 1967]]

Classification Report:
              precision    recall  f1-score   support

           A       0.41      0.50      0.45      2818
           B       0.39      0.26      0.31      2408
           C       0.52      0.52      0.52      2442
           D       0.60      0.65      0.62      3027

    accuracy                           0.49     10695
   macro avg       0.48      0.48      0.48     10695
weighted avg       0.49      0.49      0.49     10695



In [14]:
sample = X[0].unsqueeze(0)
with torch.no_grad():
    pred = model(sample)
    result = label_encoder.inverse_transform([torch.argmax(pred).item()])

print("\nSample Prediction:", result[0])



Sample Prediction: D
