In [7]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt 
import seaborn as sns 
import warnings 

# Configure environment settings
warnings.filterwarnings("ignore")
%matplotlib inline

In [8]:
# Load dataset (using seaborn's built-in tips data for demonstration)
df = sns.load_dataset('tips') 

# Basic inspection
print(df.head())
print(f"Dataset Shape: {df.shape}")
print(df.info())

   total_bill   tip     sex smoker  day    time  size
0       16.99  1.01  Female     No  Sun  Dinner     2
1       10.34  1.66    Male     No  Sun  Dinner     3
2       21.01  3.50    Male     No  Sun  Dinner     3
3       23.68  3.31    Male     No  Sun  Dinner     2
4       24.59  3.61  Female     No  Sun  Dinner     4
Dataset Shape: (244, 7)
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 244 entries, 0 to 243
Data columns (total 7 columns):
 #   Column      Non-Null Count  Dtype   
---  ------      --------------  -----   
 0   total_bill  244 non-null    float64 
 1   tip         244 non-null    float64 
 2   sex         244 non-null    category
 3   smoker      244 non-null    category
 4   day         244 non-null    category
 5   time        244 non-null    category
 6   size        244 non-null    int64   
dtypes: category(4), float64(2), int64(1)
memory usage: 7.4 KB
None


In [9]:
# Encoding categorical variables
df['sex'] = df['sex'].map({'Male': 1, 'Female': 0})
df['smoker'] = df['smoker'].map({'No': 0, 'Yes': 1})
df['day'] = df['day'].map({'Sun': 0, 'Sat': 1, 'Thur': 2, 'Fri': 3})
df['time'] = df['time'].map({'Dinner': 1, 'Lunch': 0})

# Feature selection for both models
X = df[['total_bill', 'size', 'sex', 'smoker', 'day', 'time']]

In [10]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression

# Target for Regression
y_reg = df['tip']

# Split data
X_train_r, X_test_r, y_train_r, y_test_r = train_test_split(X, y_reg, test_size=0.2, random_state=42)

# Train Regression Model
reg_model = LinearRegression()
reg_model.fit(X_train_r, y_train_r)

def predict_tip_amount(customer_data):
    return reg_model.predict(customer_data)[0]

In [11]:
# Target for Classification: 1 if tip > $3 (Premium), else 0 (Standard)
df['is_premium'] = (df['tip'] > 3).astype(int)
y_clf = df['is_premium']

from sklearn.ensemble import RandomForestClassifier

# Split data
X_train_c, X_test_c, y_train_c, y_test_c = train_test_split(X, y_clf, test_size=0.2, random_state=42)

# Train Classification Model
clf_model = RandomForestClassifier()
clf_model.fit(X_train_c, y_train_c)

def classify_customer(customer_data):
    prediction = clf_model.predict(customer_data)
    return "Premium" if prediction[0] == 1 else "Standard"

In [12]:
# Create a mock customer
example_customer = pd.DataFrame({
    'total_bill': [50.00],
    'size': [4],
    'sex': [1],      # Male
    'smoker': [0],    # No
    'day': [1],       # Saturday
    'time': [1]       # Dinner
})

# Run Integrated System
print("--- Prediction System Results ---")
print(f"Predicted Tip Amount: ${predict_tip_amount(example_customer):.2f}")
print(f"Predicted Customer Category: {classify_customer(example_customer)}")

--- Prediction System Results ---
Predicted Tip Amount: $6.29
Predicted Customer Category: Premium
