In [None]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go

data = pd.read_csv("tips.csv")
print(data.head())

In [None]:
figure = px.scatter(data, 
                    x="total_bill",
                    y="tip", 
                    size="size", 
                    color= "day", 
                    trendline="ols", 
                    title="Total Bill vs Tip by Day", 
                    height=400,
                    width=1200,
                    opacity=0.5)
figure.show()

In [None]:
figure = px.scatter(data_frame = data, x="total_bill",
                    y="tip", size="size", color= "sex", trendline="ols")
figure.show()

In [None]:
figure = px.scatter(data_frame = data, x="total_bill",
                    y="tip", size="size", color= "time", trendline="ols")
figure.show()

In [None]:
figure = px.pie(data, 
             values='tip', 
             names='day',
             hole = 0.5,
             height=500, width=500)
figure.show()

In [None]:
figure = px.pie(data, 
             values='tip', 
             names='sex',hole = 0.5,
             height=500, width=500)
figure.show()

## Transforming categorical values into numerical values

In [None]:
data["sex"] = data["sex"].map({"Female": 0, "Male": 1})
data["smoker"] = data["smoker"].map({"No": 0, "Yes": 1})
data["day"] = data["day"].map({"Thur": 0, "Fri": 1, "Sat": 2, "Sun": 3})
data["time"] = data["time"].map({"Lunch": 0, "Dinner": 1})
data.head()

## Machine Learning Model

In [None]:
# Splitting data into training and test sets
x = np.array(data[["total_bill", "sex", "smoker", "day", 
                   "time", "size"]])
y = np.array(data["tip"])

from sklearn.model_selection import train_test_split
xtrain, xtest, ytrain, ytest = train_test_split(x, y, 
                                                test_size=0.2, 
                                                random_state=42)

In [None]:
# Training the model to predict waiter tips
from sklearn.linear_model import LinearRegression
model = LinearRegression()
model.fit(xtrain, ytrain)

In [64]:
# testing performance of the model
# features = [[total_bill, "sex", "smoker", "day", "time", "size"]]
features = np.array([[34.50, 1, 0, 3, 1, 5]])
model.predict(features)

array([5.1051877])

This model predicts that for a scenario with the following attributes, the tip will be $5.11:<br>
-bill of 34.50 (with tax)<br>
-male waiter<br>
-non smoking table<br>
-on a Sunday<br>
-for dinner<br>
-group of 5 patrons<br>
