In [None]:
# -----------------------------------------
# 1. Import Libraries
# -----------------------------------------
from google.colab import files

uploaded = files.upload()   # Choose train_dataset.csv and test_dataset.csv

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer

# -----------------------------------------
# 2. Load Dataset
# (replace with your filename)
# -----------------------------------------
df = pd.read_csv("train_dataset.csv")   # or your file name

# -----------------------------------------
# 3. Inspect
# -----------------------------------------
print(df.head())
print(df.isnull().sum())

# -----------------------------------------
# 4. Separate Features and Target
# -----------------------------------------
X = df.drop("smoking", axis=1)   # target column name must be "smoking"
y = df["smoking"]

# -----------------------------------------
# 5. Train / Test Split
# -----------------------------------------
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# -----------------------------------------
# 6. Preprocessing + SVM Pipeline
# -----------------------------------------
numeric_features = X.columns

preprocess = ColumnTransformer(
    transformers=[
        ("num", SimpleImputer(strategy="median"), numeric_features),
    ]
)

# Standardize + SVM
svm_clf = Pipeline(
    steps=[
        ("preprocess", preprocess),
        ("scaler", StandardScaler()),
        ("svm", SVC(kernel="rbf", C=2, gamma="scale", probability=True))
    ]
)

# -----------------------------------------
# 7. Train Model
# -----------------------------------------
svm_clf.fit(X_train, y_train)

# -----------------------------------------
# 8. Predictions
# -----------------------------------------
y_pred = svm_clf.predict(X_test)

# -----------------------------------------
# 9. Model Evaluation
# -----------------------------------------
print("\nAccuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))


Saving train_dataset.csv to train_dataset (1).csv
Saving test_dataset.csv to test_dataset.csv
   age  height(cm)  weight(kg)  waist(cm)  eyesight(left)  eyesight(right)  \
0   35         170          85       97.0             0.9              0.9   
1   20         175         110      110.0             0.7              0.9   
2   45         155          65       86.0             0.9              0.9   
3   45         165          80       94.0             0.8              0.7   
4   20         165          60       81.0             1.5              0.1   

   hearing(left)  hearing(right)  systolic  relaxation  ...  HDL  LDL  \
0              1               1       118          78  ...   70  142   
1              1               1       119          79  ...   71  114   
2              1               1       110          80  ...   57  112   
3              1               1       158          88  ...   46   91   
4              1               1       109          64  ...   47   92   