In [None]:
import pandas as pd
import seaborn as sns 
import plotly.express as px
import matplotlib.pyplot as plt
import numpy as np
import warnings
warnings.filterwarnings("ignore")
sns.set_style("whitegrid")
import plotly.io as pio
pio.renderers.default = 'notebook'  # or 'iframe_connected'
pio.renderers.default = 'iframe_connected'

# Project -
## Customer Tipping Patterns Analysis 

In [None]:
df=pd.read_csv(r"/kaggle/input/tip-prediction-dataset/tip.csv")

# Data Info

In [None]:
df.shape

In [None]:
df.info()

In [None]:
df.describe().plot(kind="bar",figsize=(12,6))

In [None]:
df.head()

# Data Cleaning & Handling

In [None]:
df.isna().sum()
df.duplicated().sum()
df.loc[df.duplicated()]


# EDA & Visualization

In [None]:
df["tip_percentage"] = (df["tip"] / df["total_bill"]) * 100  # Creating a “Tip Percentage” column — because context matters. A $10 tip on a $200 bill isn’t the same as a $10 tip on a $40 meal.
df["tip_percentage"] = df["tip_percentage"].round(2)

In [None]:
df.head()

In [None]:
# 1. Histogram of Tip Percentage
fig1 = px.histogram(
    df, x="tip_percentage",
    title="Distribution of Tip Percentage",
    labels={"tip_percentage": "Tip Percentage"},
    nbins=30,
    width=1000, height=500
)
fig1.show()

In [None]:
# 2. Box Plot of Tip Percentage by Gender
fig2 = px.box(
    df, x="sex", y="tip_percentage", color="sex",
    title="Tip Percentage by Gender",
    labels={"tip_percentage": "Tip Percentage", "sex": "Gender"},
    width=1000, height=500
)
fig2.show()

In [None]:
# 3. Violin Plot of Tip Percentage by Time (Lunch/Dinner)
fig3 = px.violin(
    df, x="time", y="tip_percentage", color="time", box=True,
    title="Tip Percentage Distribution by Time of Day",
    labels={"tip_percentage": "Tip Percentage", "time": "Time"},
    width=1000, height=500
)
fig3.show()

In [None]:
def high_low(col):
    print(f"{col} Max value:", df[col].max())
    print(f"{col} Min value:", df[col].min())

high_low('tip')
high_low("total_bill")
high_low("size")



In [None]:
for col in  df.select_dtypes(include="number").columns:
    sns.histplot(data=df,x=col,kde=True,color="darkred")
    plt.title(f"The distribution of {col}")
    plt.show()


In [None]:
df.head()

In [None]:
for col in df.select_dtypes(include="object").columns:
    plt.figure(figsize=(10,6))
    sns.countplot(data=df,x=col,edgecolor="black",palette="Set1")
    plt.title(f'The counts of {col}')
    plt.show()

# Most tips given by which gender,the tip value and the time

In [None]:
plt.figure(figsize=(12,6))
sns.barplot(data=df,x='sex',y='tip',hue="time",ci=None,edgecolor="black",palette="Set2")
plt.title("Most tips given by which gender,the tip value and the time")
plt.show()

In [None]:
plt.figure(figsize=(12,6))
sns.scatterplot(data=df,x="total_bill",y="tip",hue="sex",s=30)
plt.show()

In [None]:
import plotly.express as px

fig = px.scatter(
    df,
    x="total_bill",
    y="tip",
    color="sex",
    title="Interactive Scatter Plot: Tip vs Total Bill by Gender",
    labels={"total_bill": "Total Bill", "tip": "Tip"},
    width=1000,
    height=500
)

fig.update_traces(marker=dict(size=6))  # Adjust point size
fig.show()


In [None]:
df.head()

In [None]:
df.info()

In [None]:
sns.barplot(data=df,x=df["smoker"],y=df["tip_percentage"],hue=df["sex"],ci=None,edgecolor="black",palette="Set3")
plt.title("Tip percentage by smoker and gender")
plt.show()

In [None]:
import plotly.express as px

fig1 = px.bar(
    df,
    x="smoker",
    y="tip_percentage",
    color="sex",
    barmode="group",
    title="Tip Percentage by Smoker and Gender",
    labels={"smoker": "Smoker Status", "tip_percentage": "Tip Percentage", "sex": "Gender"},
    width=1000,
    height=500
)
fig1.show()


In [None]:
# 3. Bar chart: average tip by time (Lunch/Dinner)
avg_tip_by_time = df.groupby("time", as_index=False)["tip"].mean()
fig3 = px.bar(
    avg_tip_by_time, x="time", y="tip", color="time",
    title="Average Tip by Time of Day",
    labels={"tip": "Average Tip", "time": "Time"},
    width=800, height=500
)
fig3.show()



# 5. Histogram: total_bill colored by sex
fig5 = px.histogram(
    df, x="total_bill", color="sex",
    title="Distribution of Total Bill by Gender",
    labels={"total_bill": "Total Bill"},
    nbins=30,
    width=1000, height=500
)
fig5.show()


In [None]:
import plotly.express as px

fig = px.scatter(
    df,
    x="size",
    y="tip",
    hover_data=["total_bill"],  # Wrap in a list
    title="Interactive Scatter Plot: Correlatiion Bteween Tip,Number of guest and total bill",
    labels={"total_bill": "Total Bill", "tip": "Tip"},
    width=1000,
    height=500
)

fig.update_traces(marker=dict(size=8))  # Optional: adjust point size
fig.show()


In [None]:
cor=df.select_dtypes(include="number").corr()
plt.figure(figsize=(10,6))
sns.heatmap(cor,annot=True,fmt=".2f")
plt.title("Correlation between Total bill,Tip and Size")
plt.show()