# Linear Regression implementation

In [69]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
import seaborn as sns

In [70]:
df = sns.load_dataset('tips')
df.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.5,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4


In [71]:
# Encode smoker column
df['smoker'] = df['smoker'].map({'Yes': 1, 'No': 0})
df.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,0,Sun,Dinner,2
1,10.34,1.66,Male,0,Sun,Dinner,3
2,21.01,3.5,Male,0,Sun,Dinner,3
3,23.68,3.31,Male,0,Sun,Dinner,2
4,24.59,3.61,Female,0,Sun,Dinner,4


In [72]:
# Select multiple features and the target variable
x = df[["total_bill","size","smoker"]]
y = df["tip"]

In [73]:
# Split into training and test sets
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

In [74]:
# Initialize and train the model
model = LinearRegression()
model.fit(x_train, y_train)

In [None]:
# Predict on unknown value
print(model.predict([[17.54,4,1]]))
print(model.predict([[65.27,5,0]]))

[3.06250706]
[7.95715599]




In [76]:
# Predict on the test set
y_pred = model.predict(x_test)

In [77]:
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"📊 Mean Squared Error (MSE): {mse:.2f}")
print(f"📈 R-squared Score (R²): {r2:.2f}")

📊 Mean Squared Error (MSE): 0.69
📈 R-squared Score (R²): 0.45
