In [4]:
# Import required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# Load dataset
df = sns.load_dataset("tips")

# Preprocessing the data before splitting
scaler = StandardScaler()
df[["total_bill", "tip"]] = scaler.fit_transform(df[["total_bill", "tip"]])

# Separate features (inputs or 'X') and targets / labels (outputs or 'Y')
X = df[["total_bill"]]
Y = df[["tip"]]

# Split the data into training and testing sets
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2)

# Create Model instance
model = LinearRegression()

# Fit the model or train the model
model.fit(X_train, Y_train)

# Prediction by model
Y_pred = model.predict(X_test)

# Evaluate the model
print("MSE: ", mean_squared_error(Y_test, Y_pred))
print("r2: ", r2_score(Y_test, Y_pred))

# Transform the input value before predicting
original_value = pd.DataFrame({"total_bill": [24.59], "tip": [0]})
scaled_value = scaler.transform(original_value)
predicted_scaled_value = model.predict(scaled_value[:, 0].reshape(-1, 1))

# Inverse transform the predicted value to get it back to the original scale
# Note: Since we are predicting 'tip', we need to inverse transform only the 'tip' column
predicted_original_value = scaler.inverse_transform(
    np.concatenate((scaled_value[:, 0].reshape(-1, 1), predicted_scaled_value), axis=1)
)[:, 1]

print(predicted_original_value)

MSE:  0.38758102511348064
r2:  0.5960035062220497
[3.50046172]


