In [1]:
import numpy as np
import pandas as pd

df = pd.read_csv('https://raw.githubusercontent.com/bipulshahi/Dataset/main/chipotle.tsv' , sep='\t')
df.head()

Unnamed: 0,order_id,quantity,item_name,choice_description,item_price
0,1,1,Chips and Fresh Tomato Salsa,,$2.39
1,1,1,Izze,[Clementine],$3.39
2,1,1,Nantucket Nectar,[Apple],$3.39
3,1,1,Chips and Tomatillo-Green Chili Salsa,,$2.39
4,2,2,Chicken Bowl,"[Tomatillo-Red Chili Salsa (Hot), [Black Beans...",$16.98


In [2]:
df.dtypes

order_id               int64
quantity               int64
item_name             object
choice_description    object
item_price            object
dtype: object

In [3]:
print("Total Quantity Sold" , df['quantity'].sum())

Total Quantity Sold 4972


In [4]:
df['item_price'] = df['item_price'].str.replace('$','')

In [5]:
df['item_price'] = df['item_price'].astype(float)

In [6]:
print("Total Price (Sales value)" , df['item_price'].sum())
print("Average Price (Average Sales value)" , df['item_price'].mean())

Total Price (Sales value) 34500.16
Average Price (Average Sales value) 7.464335785374297


In [7]:
df.head()

Unnamed: 0,order_id,quantity,item_name,choice_description,item_price
0,1,1,Chips and Fresh Tomato Salsa,,2.39
1,1,1,Izze,[Clementine],3.39
2,1,1,Nantucket Nectar,[Apple],3.39
3,1,1,Chips and Tomatillo-Green Chili Salsa,,2.39
4,2,2,Chicken Bowl,"[Tomatillo-Red Chili Salsa (Hot), [Black Beans...",16.98


In [8]:
#Total Quantity sold of each item
df.groupby('item_name')['quantity'].sum().sort_values(ascending=False).reset_index()

Unnamed: 0,item_name,quantity
0,Chicken Bowl,761
1,Chicken Burrito,591
2,Chips and Guacamole,506
3,Steak Burrito,386
4,Canned Soft Drink,351
5,Chips,230
6,Steak Bowl,221
7,Bottled Water,211
8,Chips and Fresh Tomato Salsa,130
9,Canned Soda,126


In [9]:
#Total Sales value of each item
df.groupby('item_name')['item_price'].sum().sort_values(ascending=False).reset_index()

Unnamed: 0,item_name,item_price
0,Chicken Bowl,7342.73
1,Chicken Burrito,5575.82
2,Steak Burrito,3851.43
3,Steak Bowl,2260.19
4,Chips and Guacamole,2201.04
5,Chicken Salad Bowl,1228.75
6,Chicken Soft Tacos,1108.09
7,Veggie Burrito,934.77
8,Barbacoa Burrito,894.75
9,Veggie Bowl,867.99


In [14]:
df_item_info = df.groupby('item_name')[['quantity','item_price']].sum().sort_values(by = 'quantity' ,
                                                                                  ascending=False).reset_index()

df_item_info

Unnamed: 0,item_name,quantity,item_price
0,Chicken Bowl,761,7342.73
1,Chicken Burrito,591,5575.82
2,Chips and Guacamole,506,2201.04
3,Steak Burrito,386,3851.43
4,Canned Soft Drink,351,438.75
5,Chips,230,494.34
6,Steak Bowl,221,2260.19
7,Bottled Water,211,302.56
8,Chips and Fresh Tomato Salsa,130,361.36
9,Canned Soda,126,137.34


In [15]:
df_item_info.columns


Index(['item_name', 'quantity', 'item_price'], dtype='object')

In [16]:
#Per price item of each item name
df_item_info['per_item_price'] = df_item_info['item_price']/df_item_info['quantity']

In [18]:
df_item_info.head()

Unnamed: 0,item_name,quantity,item_price,per_item_price
0,Chicken Bowl,761,7342.73,9.648791
1,Chicken Burrito,591,5575.82,9.434552
2,Chips and Guacamole,506,2201.04,4.349881
3,Steak Burrito,386,3851.43,9.977798
4,Canned Soft Drink,351,438.75,1.25


In [19]:
#total number of orders of each item
df_item_order_numbers = df['item_name'].value_counts().reset_index()
df_item_order_numbers
df_item_order_numbers.columns = ['item_name','total_orders']
df_item_order_numbers.head()

Unnamed: 0,item_name,total_orders
0,Chicken Bowl,726
1,Chicken Burrito,553
2,Chips and Guacamole,479
3,Steak Burrito,368
4,Canned Soft Drink,301


In [20]:
df_each_item_info = pd.merge(df_item_info , df_item_order_numbers , on = 'item_name')
df_each_item_info.head()

Unnamed: 0,item_name,quantity,item_price,per_item_price,total_orders
0,Chicken Bowl,761,7342.73,9.648791,726
1,Chicken Burrito,591,5575.82,9.434552,553
2,Chips and Guacamole,506,2201.04,4.349881,479
3,Steak Burrito,386,3851.43,9.977798,368
4,Canned Soft Drink,351,438.75,1.25,301


In [21]:
df.head()

Unnamed: 0,order_id,quantity,item_name,choice_description,item_price
0,1,1,Chips and Fresh Tomato Salsa,,2.39
1,1,1,Izze,[Clementine],3.39
2,1,1,Nantucket Nectar,[Apple],3.39
3,1,1,Chips and Tomatillo-Green Chili Salsa,,2.39
4,2,2,Chicken Bowl,"[Tomatillo-Red Chili Salsa (Hot), [Black Beans...",16.98


In [22]:
df[df['item_name'] == 'Chicken']

Unnamed: 0,order_id,quantity,item_name,choice_description,item_price


In [23]:
dfc = df[df['item_name'].str.contains('Chicken')]
dfc.head()


Unnamed: 0,order_id,quantity,item_name,choice_description,item_price
4,2,2,Chicken Bowl,"[Tomatillo-Red Chili Salsa (Hot), [Black Beans...",16.98
5,3,1,Chicken Bowl,"[Fresh Tomato Salsa (Mild), [Rice, Cheese, Sou...",10.98
11,6,1,Chicken Crispy Tacos,"[Roasted Chili Corn Salsa, [Fajita Vegetables,...",8.75
12,6,1,Chicken Soft Tacos,"[Roasted Chili Corn Salsa, [Rice, Black Beans,...",8.75
13,7,1,Chicken Bowl,"[Fresh Tomato Salsa, [Fajita Vegetables, Rice,...",11.25


In [24]:
df[df['item_name'].str.contains('Chips')]

Unnamed: 0,order_id,quantity,item_name,choice_description,item_price
0,1,1,Chips and Fresh Tomato Salsa,,2.39
3,1,1,Chips and Tomatillo-Green Chili Salsa,,2.39
6,3,1,Side of Chips,,1.69
10,5,1,Chips and Guacamole,,4.45
14,7,1,Chips and Guacamole,,4.45
...,...,...,...,...,...
4596,1826,1,Chips and Guacamole,,4.45
4600,1827,1,Chips and Guacamole,,4.45
4605,1828,1,Chips and Guacamole,,4.45
4613,1831,1,Chips,,2.15


In [25]:
df.isnull().sum()


order_id                 0
quantity                 0
item_name                0
choice_description    1246
item_price               0
dtype: int64

In [26]:
#fill the null values with text 'Unknown'
df['choice_description'] = df['choice_description'].fillna('Unknown')

In [27]:
df.head()

Unnamed: 0,order_id,quantity,item_name,choice_description,item_price
0,1,1,Chips and Fresh Tomato Salsa,Unknown,2.39
1,1,1,Izze,[Clementine],3.39
2,1,1,Nantucket Nectar,[Apple],3.39
3,1,1,Chips and Tomatillo-Green Chili Salsa,Unknown,2.39
4,2,2,Chicken Bowl,"[Tomatillo-Red Chili Salsa (Hot), [Black Beans...",16.98


In [28]:
df['choice_description'].nunique()

1044