In [None]:
# 1️⃣ Load Libraries & Dataset
import seaborn as sns
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report


In [2]:
# Load tips dataset
df = sns.load_dataset("tips")
df.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.5,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4


In [3]:
# 2️⃣ Feature Engineering (Important Step)
# Create target variable
df['tip_pct'] = df['tip'] / df['total_bill']
df['high_tip'] = (df['tip_pct'] >= 0.2).astype(int)


In [6]:
# 3️⃣ Select Features & Encode Categorical Data
X = df[['total_bill', 'size', 'sex', 'smoker', 'day', 'time']]
y = df['high_tip']

# One-hot encoding
X = pd.get_dummies(X, drop_first=True)


In [7]:
# 4️⃣ Train–Test Split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)


In [8]:
# 5️⃣ Decision Tree Model 
dt = DecisionTreeClassifier(max_depth=4, random_state=42)
dt.fit(X_train, y_train)

y_pred_dt = dt.predict(X_test)

print("Decision Tree Accuracy:", accuracy_score(y_test, y_pred_dt))
print(classification_report(y_test, y_pred_dt))

Decision Tree Accuracy: 0.8367346938775511
              precision    recall  f1-score   support

           0       0.85      0.98      0.91        41
           1       0.50      0.12      0.20         8

    accuracy                           0.84        49
   macro avg       0.68      0.55      0.55        49
weighted avg       0.79      0.84      0.79        49



In [9]:
# 6️⃣ Logistic Regression Model
lr = LogisticRegression(max_iter=200)
lr.fit(X_train, y_train)

y_pred_lr = lr.predict(X_test)

print("Logistic Regression Accuracy:", accuracy_score(y_test, y_pred_lr))
print(classification_report(y_test, y_pred_lr))

Logistic Regression Accuracy: 0.8367346938775511
              precision    recall  f1-score   support

           0       0.84      1.00      0.91        41
           1       0.00      0.00      0.00         8

    accuracy                           0.84        49
   macro avg       0.42      0.50      0.46        49
weighted avg       0.70      0.84      0.76        49



  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


In [11]:
# 7️⃣ Performance Comparison
print("Decision Tree vs Logistic Regression Comparison:")
print("Decision Tree Accuracy:", accuracy_score(y_test, y_pred_dt))
print("Logistic Regression Accuracy:", accuracy_score(y_test, y_pred_lr))
# Analyze which model performed better based on accuracy and classification report

Decision Tree vs Logistic Regression Comparison:
Decision Tree Accuracy: 0.8367346938775511
Logistic Regression Accuracy: 0.8367346938775511
