In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
import seaborn as sns
%matplotlib inline

In [2]:
# loading the dataset

df = pd.read_csv('/kaggle/input/bakery/Bakery.csv')

In [3]:
df.head()

Unnamed: 0,TransactionNo,Items,DateTime,Daypart,DayType
0,1,Bread,2016-10-30 09:58:11,Morning,Weekend
1,2,Scandinavian,2016-10-30 10:05:34,Morning,Weekend
2,2,Scandinavian,2016-10-30 10:05:34,Morning,Weekend
3,3,Hot chocolate,2016-10-30 10:07:57,Morning,Weekend
4,3,Jam,2016-10-30 10:07:57,Morning,Weekend


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20507 entries, 0 to 20506
Data columns (total 5 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   TransactionNo  20507 non-null  int64 
 1   Items          20507 non-null  object
 2   DateTime       20507 non-null  object
 3   Daypart        20507 non-null  object
 4   DayType        20507 non-null  object
dtypes: int64(1), object(4)
memory usage: 801.2+ KB


In [5]:
# converting datetime column to datetime datatype

df['DateTime'] = pd.to_datetime(df['DateTime'])

# create a new date column

df['Date'] = df['DateTime'].dt.date


# create a new time column

df['Time'] = df['DateTime'].dt.time
df

Unnamed: 0,TransactionNo,Items,DateTime,Daypart,DayType,Date,Time
0,1,Bread,2016-10-30 09:58:11,Morning,Weekend,2016-10-30,09:58:11
1,2,Scandinavian,2016-10-30 10:05:34,Morning,Weekend,2016-10-30,10:05:34
2,2,Scandinavian,2016-10-30 10:05:34,Morning,Weekend,2016-10-30,10:05:34
3,3,Hot chocolate,2016-10-30 10:07:57,Morning,Weekend,2016-10-30,10:07:57
4,3,Jam,2016-10-30 10:07:57,Morning,Weekend,2016-10-30,10:07:57
...,...,...,...,...,...,...,...
20502,9682,Coffee,2017-09-04 14:32:58,Afternoon,Weekend,2017-09-04,14:32:58
20503,9682,Tea,2017-09-04 14:32:58,Afternoon,Weekend,2017-09-04,14:32:58
20504,9683,Coffee,2017-09-04 14:57:06,Afternoon,Weekend,2017-09-04,14:57:06
20505,9683,Pastry,2017-09-04 14:57:06,Afternoon,Weekend,2017-09-04,14:57:06


In [6]:
#drop DateTime Column
df = df.drop(columns=['DateTime'])
df = df.drop(columns=['Time'])
df

Unnamed: 0,TransactionNo,Items,Daypart,DayType,Date
0,1,Bread,Morning,Weekend,2016-10-30
1,2,Scandinavian,Morning,Weekend,2016-10-30
2,2,Scandinavian,Morning,Weekend,2016-10-30
3,3,Hot chocolate,Morning,Weekend,2016-10-30
4,3,Jam,Morning,Weekend,2016-10-30
...,...,...,...,...,...
20502,9682,Coffee,Afternoon,Weekend,2017-09-04
20503,9682,Tea,Afternoon,Weekend,2017-09-04
20504,9683,Coffee,Afternoon,Weekend,2017-09-04
20505,9683,Pastry,Afternoon,Weekend,2017-09-04


In [7]:
df['Date'] = pd.to_datetime(df['Date'])

In [8]:
df['Month'] = df['Date'].dt.month
df['Year'] = df['Date'].dt.year
df

Unnamed: 0,TransactionNo,Items,Daypart,DayType,Date,Month,Year
0,1,Bread,Morning,Weekend,2016-10-30,10,2016
1,2,Scandinavian,Morning,Weekend,2016-10-30,10,2016
2,2,Scandinavian,Morning,Weekend,2016-10-30,10,2016
3,3,Hot chocolate,Morning,Weekend,2016-10-30,10,2016
4,3,Jam,Morning,Weekend,2016-10-30,10,2016
...,...,...,...,...,...,...,...
20502,9682,Coffee,Afternoon,Weekend,2017-09-04,9,2017
20503,9682,Tea,Afternoon,Weekend,2017-09-04,9,2017
20504,9683,Coffee,Afternoon,Weekend,2017-09-04,9,2017
20505,9683,Pastry,Afternoon,Weekend,2017-09-04,9,2017


In [9]:
df['Year'] = (df['Year']).astype(int)
df['Month'] =(df['Month']).astype(int)

In [10]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20507 entries, 0 to 20506
Data columns (total 7 columns):
 #   Column         Non-Null Count  Dtype         
---  ------         --------------  -----         
 0   TransactionNo  20507 non-null  int64         
 1   Items          20507 non-null  object        
 2   Daypart        20507 non-null  object        
 3   DayType        20507 non-null  object        
 4   Date           20507 non-null  datetime64[ns]
 5   Month          20507 non-null  int64         
 6   Year           20507 non-null  int64         
dtypes: datetime64[ns](1), int64(3), object(3)
memory usage: 1.1+ MB


In [11]:
Yearly_items = df.groupby(['Year'])['Items'].count().reset_index()
Yearly_items

Unnamed: 0,Year,Items
0,2016,8144
1,2017,12363


In [12]:
DayPartType_items = df.groupby(['DayType','Daypart'])['Items'].count().reset_index()
DayPartType_items

Unnamed: 0,DayType,Daypart,Items
0,Weekday,Afternoon,7273
1,Weekday,Evening,356
2,Weekday,Morning,5174
3,Weekday,Night,4
4,Weekend,Afternoon,4296
5,Weekend,Evening,164
6,Weekend,Morning,3230
7,Weekend,Night,10


In [13]:
count_items = df.groupby(['Items'])['TransactionNo'].count().reset_index()
count_items.columns = ['Items', 'TransactionNo']
# Sort by 'TransactionCount' in descending order
sorted_items = count_items.sort_values(by='TransactionNo',ascending=False)
# Select the top 10 items
top_10_items = sorted_items.head(10)

top_10_items

Unnamed: 0,Items,TransactionNo
23,Coffee,5471
11,Bread,3325
83,Tea,1435
15,Cake,1025
65,Pastry,856
73,Sandwich,771
55,Medialuna,616
48,Hot chocolate,590
26,Cookies,540
14,Brownie,379


In [14]:
fig = px.bar(top_10_items , x = 'Items', y = 'TransactionNo', height = 400,
             title = 'Top 10 Items')
fig.show()

In [15]:
daypartTran = df['Daypart'].value_counts().reset_index().rename(columns = {'count' : 'Total_Transactions'})
daypartTran


Unnamed: 0,Daypart,Total_Transactions
0,Afternoon,11569
1,Morning,8404
2,Evening,520
3,Night,14


In [16]:
fig = px.pie(daypartTran , values = 'Total_Transactions' , names = 'Daypart'  , title = 'Percentage of Transactions')
fig.show()

In [17]:
daytypeTran = df['DayType'].value_counts().reset_index().rename(columns = {'count' : 'Total_Transactions'})
daytypeTran

Unnamed: 0,DayType,Total_Transactions
0,Weekday,12807
1,Weekend,7700


In [18]:
fig = px.pie(daytypeTran , values = 'Total_Transactions' , names = 'DayType'  , title = 'Percentage of Transactions')
fig.show()