In [2]:
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report


In [3]:
df = pd.read_csv("TShirt_size.csv")
print("Dataset Loaded Successfully")
print(df.head())


Dataset Loaded Successfully
   Height (in cms)  Weight (in kgs) T Shirt Size
0              158               58            M
1              158               59            M
2              158               63            M
3              160               59            M
4              160               60            M


In [4]:
df.columns = (
    df.columns
    .str.strip()
    .str.lower()
    .str.replace(" ", "_")
    .str.replace("(", "", regex=False)
    .str.replace(")", "", regex=False)
)

print("\nColumns after fixing:")
print(df.columns)



Columns after fixing:
Index(['height_in_cms', 'weight_in_kgs', 't_shirt_size'], dtype='object')


In [5]:
le = LabelEncoder()
df["t_shirt_size"] = le.fit_transform(df["t_shirt_size"])

In [6]:
X = df[["height_in_cms", "weight_in_kgs"]]
y = df["t_shirt_size"]

In [7]:
X_train, X_test, y_train, y_test = train_test_split(
    X,
    y,
    test_size=0.25,
    random_state=42,
    stratify=y
)

In [8]:
nb = GaussianNB()
nb.fit(X_train, y_train)
y_pred = nb.predict(X_test)

print("\nAccuracy:", accuracy_score(y_test, y_pred))
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))
print("\nClassification Report:")
print(classification_report(y_test, y_pred)) 

new_person = [[165, 63]]  
prediction = nb.predict(new_person)
print("\nPredicted T-Shirt Size:",le.inverse_transform(prediction)[0])



Accuracy: 1.0

Confusion Matrix:
[[3 0]
 [0 2]]

Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         3
           1       1.00      1.00      1.00         2

    accuracy                           1.00         5
   macro avg       1.00      1.00      1.00         5
weighted avg       1.00      1.00      1.00         5


Predicted T-Shirt Size: L


