# Pizza Analysis - ETL

### Order Details CSV

In [72]:
# Importing pandas, reading csv, creating dataframe, printing top and bottom.
import pandas as pd
order_details = pd.read_csv('order_details.csv', index_col='order_details_id')
print(order_details.head(5))
print(order_details.tail(5))

                  order_id       pizza_id  quantity
order_details_id                                   
1                        1     hawaiian_m         1
2                        2  classic_dlx_m         1
3                        2  five_cheese_l         1
4                        2    ital_supr_l         1
5                        2     mexicana_m         1
                  order_id       pizza_id  quantity
order_details_id                                   
48616                21348  ckn_alfredo_m         1
48617                21348  four_cheese_l         1
48618                21348   napolitana_s         1
48619                21349     mexicana_l         1
48620                21350      bbq_ckn_s         1


In [73]:
#Verifying data type
type(order_details)

pandas.core.frame.DataFrame

In [74]:
# Getting the column names.
order_details.columns

Index(['order_id', 'pizza_id', 'quantity'], dtype='object')

In [75]:
# Verifying the correct data types for each column.
order_details.dtypes

order_id     int64
pizza_id    object
quantity     int64
dtype: object

In [76]:
# Checking for null values.
order_details.isnull().sum().sum()

0

### Orders CSV

In [128]:
# Reading csv, creating dataframe, printing.
orders = pd.read_csv('orders.csv', index_col=0)
print(orders)

                date      time
order_id                      
1         2015-01-01  11:38:36
2         2015-01-01  11:57:40
3         2015-01-01  12:12:28
4         2015-01-01  12:16:31
5         2015-01-01  12:21:30
...              ...       ...
21346     2015-12-31  20:51:07
21347     2015-12-31  21:14:37
21348     2015-12-31  21:23:10
21349     2015-12-31  22:09:54
21350     2015-12-31  23:02:05

[21350 rows x 2 columns]


In [129]:
# Verfiying dataframe.
type(orders)

pandas.core.frame.DataFrame

In [130]:
# Checking column datatypes.
orders.dtypes

date    object
time    object
dtype: object

##### Date and Time columns are in the incorrect formats.

In [137]:
# Converting date and time columns to datetime data types.
orders['date'] = pd.to_datetime(orders['date'])
orders['time'] = pd.to_datetime(orders['time']).dt.time
orders.dtypes

date    datetime64[ns]
time            object
dtype: object

In [138]:
print(orders.head(5))

               date      time
order_id                     
1        2015-01-01  11:38:36
2        2015-01-01  11:57:40
3        2015-01-01  12:12:28
4        2015-01-01  12:16:31
5        2015-01-01  12:21:30


In [85]:
# Checking for null values.
orders.isnull().sum().sum()

0

### Pizzas CSV

In [148]:
# Loading csv, no index, creating dataframe, printing top and bottom.
pizzas = pd.read_csv('pizzas.csv', index_col=None)
print(pizzas)

         pizza_id pizza_type_id size  price
0       bbq_ckn_s       bbq_ckn    S  12.75
1       bbq_ckn_m       bbq_ckn    M  16.75
2       bbq_ckn_l       bbq_ckn    L  20.75
3      cali_ckn_s      cali_ckn    S  12.75
4      cali_ckn_m      cali_ckn    M  16.75
..            ...           ...  ...    ...
91  spinach_fet_m   spinach_fet    M  16.00
92  spinach_fet_l   spinach_fet    L  20.25
93   veggie_veg_s    veggie_veg    S  12.00
94   veggie_veg_m    veggie_veg    M  16.00
95   veggie_veg_l    veggie_veg    L  20.25

[96 rows x 4 columns]


In [149]:
#Verifying dataframe
type(pizzas)

pandas.core.frame.DataFrame

In [150]:
#Verifying column data types
pizzas.dtypes

pizza_id          object
pizza_type_id     object
size              object
price            float64
dtype: object

In [151]:
# Checking for na values
pizzas.isnull().sum().sum()

0

### Pizza Type CSV

In [107]:
# Reading CSV, creating dataframe no index, printing
pizza_types = pd.read_csv('pizza_types.csv', index_col=0, encoding='unicode_escape')
print(pizza_types.head(5))

                                       name category  \
pizza_type_id                                          
bbq_ckn          The Barbecue Chicken Pizza  Chicken   
cali_ckn       The California Chicken Pizza  Chicken   
ckn_alfredo       The Chicken Alfredo Pizza  Chicken   
ckn_pesto           The Chicken Pesto Pizza  Chicken   
southw_ckn      The Southwest Chicken Pizza  Chicken   

                                                     ingredients  
pizza_type_id                                                     
bbq_ckn        Barbecued Chicken, Red Peppers, Green Peppers,...  
cali_ckn       Chicken, Artichoke, Spinach, Garlic, Jalapeno ...  
ckn_alfredo    Chicken, Red Onions, Red Peppers, Mushrooms, A...  
ckn_pesto      Chicken, Tomatoes, Red Peppers, Spinach, Garli...  
southw_ckn     Chicken, Tomatoes, Red Peppers, Red Onions, Ja...  


In [97]:
#Verifying Dataframe
type(pizza_types)

pandas.core.frame.DataFrame

In [99]:
#Ensuring all the data types are strings(objects)
pizza_types.dtypes

name           object
category       object
ingredients    object
dtype: object

In [100]:
#Making sure there's no null values.
pizza_types.isnull().sum().sum()

0

In [108]:
#Splitting the ingredients column into separate columns to aid in analysis
pizza_types[['ing_1', 'ing_2', 'ing_3', 'ing_4', 'ing_5', 'ing_6', 'ing_7', 'ing_8']] = pizza_types['ingredients'].str.split(',', expand=True)
print(pizza_types.head(5))

                                       name category  \
pizza_type_id                                          
bbq_ckn          The Barbecue Chicken Pizza  Chicken   
cali_ckn       The California Chicken Pizza  Chicken   
ckn_alfredo       The Chicken Alfredo Pizza  Chicken   
ckn_pesto           The Chicken Pesto Pizza  Chicken   
southw_ckn      The Southwest Chicken Pizza  Chicken   

                                                     ingredients  \
pizza_type_id                                                      
bbq_ckn        Barbecued Chicken, Red Peppers, Green Peppers,...   
cali_ckn       Chicken, Artichoke, Spinach, Garlic, Jalapeno ...   
ckn_alfredo    Chicken, Red Onions, Red Peppers, Mushrooms, A...   
ckn_pesto      Chicken, Tomatoes, Red Peppers, Spinach, Garli...   
southw_ckn     Chicken, Tomatoes, Red Peppers, Red Onions, Ja...   

                           ing_1         ing_2           ing_3        ing_4  \
pizza_type_id                                      

In [109]:
#Removing the ingredients column now that I have separated ingredient columns.
pizza_types_exp = pizza_types.drop(['ingredients'], axis=1)
print(pizza_types_exp.head(5))
print(pizza_types_exp.tail(5))

                                       name category              ing_1  \
pizza_type_id                                                             
bbq_ckn          The Barbecue Chicken Pizza  Chicken  Barbecued Chicken   
cali_ckn       The California Chicken Pizza  Chicken            Chicken   
ckn_alfredo       The Chicken Alfredo Pizza  Chicken            Chicken   
ckn_pesto           The Chicken Pesto Pizza  Chicken            Chicken   
southw_ckn      The Southwest Chicken Pizza  Chicken            Chicken   

                      ing_2           ing_3        ing_4              ing_5  \
pizza_type_id                                                                 
bbq_ckn         Red Peppers   Green Peppers     Tomatoes         Red Onions   
cali_ckn          Artichoke         Spinach       Garlic   Jalapeno Peppers   
ckn_alfredo      Red Onions     Red Peppers    Mushrooms      Asiago Cheese   
ckn_pesto          Tomatoes     Red Peppers      Spinach             Garlic   


### Joining Dataframes

In [141]:
#Joining the orders and order_details dataframes.
orders_comb_df = pd.merge(order_details, orders, on='order_id')
print(orders_comb_df)

       order_id       pizza_id  quantity       date      time
0             1     hawaiian_m         1 2015-01-01  11:38:36
1             2  classic_dlx_m         1 2015-01-01  11:57:40
2             2  five_cheese_l         1 2015-01-01  11:57:40
3             2    ital_supr_l         1 2015-01-01  11:57:40
4             2     mexicana_m         1 2015-01-01  11:57:40
...         ...            ...       ...        ...       ...
48615     21348  ckn_alfredo_m         1 2015-12-31  21:23:10
48616     21348  four_cheese_l         1 2015-12-31  21:23:10
48617     21348   napolitana_s         1 2015-12-31  21:23:10
48618     21349     mexicana_l         1 2015-12-31  22:09:54
48619     21350      bbq_ckn_s         1 2015-12-31  23:02:05

[48620 rows x 5 columns]


In [152]:
#Joining the pizzas and pizza types Dataframes.
pizzas_comb_df = pd.merge(pizzas, pizza_types_exp, on='pizza_type_id')
print(pizzas_comb_df)

         pizza_id pizza_type_id size  price  \
0       bbq_ckn_s       bbq_ckn    S  12.75   
1       bbq_ckn_m       bbq_ckn    M  16.75   
2       bbq_ckn_l       bbq_ckn    L  20.75   
3      cali_ckn_s      cali_ckn    S  12.75   
4      cali_ckn_m      cali_ckn    M  16.75   
..            ...           ...  ...    ...   
91  spinach_fet_m   spinach_fet    M  16.00   
92  spinach_fet_l   spinach_fet    L  20.25   
93   veggie_veg_s    veggie_veg    S  12.00   
94   veggie_veg_m    veggie_veg    M  16.00   
95   veggie_veg_l    veggie_veg    L  20.25   

                                 name category              ing_1  \
0          The Barbecue Chicken Pizza  Chicken  Barbecued Chicken   
1          The Barbecue Chicken Pizza  Chicken  Barbecued Chicken   
2          The Barbecue Chicken Pizza  Chicken  Barbecued Chicken   
3        The California Chicken Pizza  Chicken            Chicken   
4        The California Chicken Pizza  Chicken            Chicken   
..                   

In [144]:
pizzas_comb_df.dtypes

pizza_type_id     object
size              object
price            float64
name              object
category          object
ing_1             object
ing_2             object
ing_3             object
ing_4             object
ing_5             object
ing_6             object
ing_7             object
ing_8             object
dtype: object

In [None]:
pizzas_comb_df = pd.merge(pizzas, pizza_types_exp, on='pizza_type_id')
print(pizzas_comb_df)
orders_comb_df = pd.merge(order_details, orders, on='order_id')
print(orders_comb_df)

pizza_order_comb_df = pd.merge(pizzas_comb_df, orders_comb_df, on='')