# Ex2 - Getting and Knowing your Data

This time we are going to pull data directly from the internet.
Special thanks to: https://github.com/justmarkham for sharing the dataset and materials.

### Step 1. Import the necessary libraries

In [49]:
import pandas as pd
import numpy as np

### Step 2. Import the dataset from this [address](https://raw.githubusercontent.com/justmarkham/DAT8/master/data/chipotle.tsv). 

### Step 3. Assign it to a variable called chipo.

In [60]:
chipo = pd.read_csv('data/chipotle.txt', sep='\t')

### Step 4. See the first 10 entries

In [5]:
chipo.head(10)

Unnamed: 0,order_id,quantity,item_name,choice_description,item_price
0,1,1,Chips and Fresh Tomato Salsa,,$2.39
1,1,1,Izze,[Clementine],$3.39
2,1,1,Nantucket Nectar,[Apple],$3.39
3,1,1,Chips and Tomatillo-Green Chili Salsa,,$2.39
4,2,2,Chicken Bowl,"[Tomatillo-Red Chili Salsa (Hot), [Black Beans...",$16.98
5,3,1,Chicken Bowl,"[Fresh Tomato Salsa (Mild), [Rice, Cheese, Sou...",$10.98
6,3,1,Side of Chips,,$1.69
7,4,1,Steak Burrito,"[Tomatillo Red Chili Salsa, [Fajita Vegetables...",$11.75
8,4,1,Steak Soft Tacos,"[Tomatillo Green Chili Salsa, [Pinto Beans, Ch...",$9.25
9,5,1,Steak Burrito,"[Fresh Tomato Salsa, [Rice, Black Beans, Pinto...",$9.25


### Step 5. What is the number of observations in the dataset?

In [6]:
# Solution 1
len(chipo)

4622

In [7]:
# Solution 2
chipo.shape[0]

4622

### Step 6. What is the number of columns in the dataset?

In [13]:
len(chipo.columns)

5

### Step 7. Print the name of all the columns.

In [14]:
chipo.columns

Index(['order_id', 'quantity', 'item_name', 'choice_description',
       'item_price'],
      dtype='object')

### Step 8. How is the dataset indexed?

In [15]:
chipo.index

RangeIndex(start=0, stop=4622, step=1)

### Step 9. Which was the most-ordered item? 

In [61]:
chipo['item_price'] = chipo['item_price'].str.replace(r'\$', '')
chipo = chipo.replace({'\$':'', '\[':'', '\]':''}, regex=True)
chipo['item_price'] = chipo['item_price'].astype(np.float64)
chipo['total'] = chipo.quantity * chipo.item_price
chipo

Unnamed: 0,order_id,quantity,item_name,choice_description,item_price,total
0,1,1,Chips and Fresh Tomato Salsa,,2.39,2.39
1,1,1,Izze,Clementine,3.39,3.39
2,1,1,Nantucket Nectar,Apple,3.39,3.39
3,1,1,Chips and Tomatillo-Green Chili Salsa,,2.39,2.39
4,2,2,Chicken Bowl,"Tomatillo-Red Chili Salsa (Hot), Black Beans, ...",16.98,33.96
...,...,...,...,...,...,...
4617,1833,1,Steak Burrito,"Fresh Tomato Salsa, Rice, Black Beans, Sour Cr...",11.75,11.75
4618,1833,1,Steak Burrito,"Fresh Tomato Salsa, Rice, Sour Cream, Cheese, ...",11.75,11.75
4619,1834,1,Chicken Salad Bowl,"Fresh Tomato Salsa, Fajita Vegetables, Pinto B...",11.25,11.25
4620,1834,1,Chicken Salad Bowl,"Fresh Tomato Salsa, Fajita Vegetables, Lettuce",8.75,8.75


In [78]:
chipo[chipo['total'] == chipo['total'].max()]

Unnamed: 0,order_id,quantity,item_name,choice_description,item_price,total
3598,1443,15,Chips and Fresh Tomato Salsa,,44.25,663.75


In [122]:
chipo.value_counts('item_name')

item_name
Chicken Bowl                             726
Chicken Burrito                          553
Chips and Guacamole                      479
Steak Burrito                            368
Canned Soft Drink                        301
Steak Bowl                               211
Chips                                    211
Bottled Water                            162
Chicken Soft Tacos                       115
Chicken Salad Bowl                       110
Chips and Fresh Tomato Salsa             110
Canned Soda                              104
Side of Chips                            101
Veggie Burrito                            95
Barbacoa Burrito                          91
Veggie Bowl                               85
Carnitas Bowl                             68
Barbacoa Bowl                             66
Carnitas Burrito                          59
Steak Soft Tacos                          55
6 Pack Soft Drink                         54
Chips and Tomatillo Red Chili Salsa       48


### Step 10. For the most-ordered item, how many items were ordered?

In [79]:
chipo[chipo['total'] == chipo['total'].max()]['quantity']

3598    15
Name: quantity, dtype: int64

In [112]:
chipo

Unnamed: 0,order_id,quantity,item_name,choice_description,item_price,total
0,1,1,Chips and Fresh Tomato Salsa,,2.39,2.39
1,1,1,Izze,Clementine,3.39,3.39
2,1,1,Nantucket Nectar,Apple,3.39,3.39
3,1,1,Chips and Tomatillo-Green Chili Salsa,,2.39,2.39
4,2,2,Chicken Bowl,"Tomatillo-Red Chili Salsa (Hot), Black Beans, ...",16.98,33.96
...,...,...,...,...,...,...
4617,1833,1,Steak Burrito,"Fresh Tomato Salsa, Rice, Black Beans, Sour Cr...",11.75,11.75
4618,1833,1,Steak Burrito,"Fresh Tomato Salsa, Rice, Sour Cream, Cheese, ...",11.75,11.75
4619,1834,1,Chicken Salad Bowl,"Fresh Tomato Salsa, Fajita Vegetables, Pinto B...",11.25,11.25
4620,1834,1,Chicken Salad Bowl,"Fresh Tomato Salsa, Fajita Vegetables, Lettuce",8.75,8.75


In [115]:
chipo.groupby('item_name')['quantity'].sum().sort_values(ascending=False)

item_name
Chicken Bowl                             761
Chicken Burrito                          591
Chips and Guacamole                      506
Steak Burrito                            386
Canned Soft Drink                        351
Chips                                    230
Steak Bowl                               221
Bottled Water                            211
Chips and Fresh Tomato Salsa             130
Canned Soda                              126
Chicken Salad Bowl                       123
Chicken Soft Tacos                       120
Side of Chips                            110
Veggie Burrito                            97
Barbacoa Burrito                          91
Veggie Bowl                               87
Carnitas Bowl                             71
Barbacoa Bowl                             66
Carnitas Burrito                          60
Steak Soft Tacos                          56
6 Pack Soft Drink                         55
Chips and Tomatillo Red Chili Salsa       50


In [114]:
chipo['item_name'].value_counts('quantity')

Chicken Bowl                             0.157075
Chicken Burrito                          0.119645
Chips and Guacamole                      0.103635
Steak Burrito                            0.079619
Canned Soft Drink                        0.065123
Steak Bowl                               0.045651
Chips                                    0.045651
Bottled Water                            0.035050
Chicken Soft Tacos                       0.024881
Chips and Fresh Tomato Salsa             0.023799
Chicken Salad Bowl                       0.023799
Canned Soda                              0.022501
Side of Chips                            0.021852
Veggie Burrito                           0.020554
Barbacoa Burrito                         0.019688
Veggie Bowl                              0.018390
Carnitas Bowl                            0.014712
Barbacoa Bowl                            0.014280
Carnitas Burrito                         0.012765
Steak Soft Tacos                         0.011900


### Step 11. What was the most ordered item in the choice_description column?

In [80]:
chipo['choice_description'].value_counts()

Diet Coke                                                                                            134
Coke                                                                                                 123
Sprite                                                                                                77
Fresh Tomato Salsa, Rice, Black Beans, Cheese, Sour Cream, Lettuce                                    42
Fresh Tomato Salsa, Rice, Black Beans, Cheese, Sour Cream, Guacamole, Lettuce                         40
                                                                                                    ... 
Fresh Tomato Salsa, Fajita Vegetables, Black Beans, Cheese, Guacamole                                  1
Fresh Tomato Salsa, Lettuce, Rice, Black Beans, Cheese                                                 1
Roasted Chili Corn Salsa (Medium), Tomatillo-Red Chili Salsa (Hot), Cheese, Sour Cream, Guacamole      1
Fresh Tomato Salsa, Cheese, Rice, Black Beans, Lettuce 

### Step 12. How many items were orderd in total?

In [82]:
chipo['quantity'].sum()

4972

### Step 13. Turn the item price into a float

#### Step 13.a. Check the item price type

In [89]:
chipo.item_price.dtypes

dtype('float64')

#### Step 13.b. Create a lambda function and change the type of item price

In [91]:
chipo['item_price'].apply(lambda x: float(x))

0        2.39
1        3.39
2        3.39
3        2.39
4       16.98
        ...  
4617    11.75
4618    11.75
4619    11.25
4620     8.75
4621     8.75
Name: item_price, Length: 4622, dtype: float64

#### Step 13.c. Check the item price type

In [92]:
chipo.item_price.dtypes

dtype('float64')

### Step 14. How much was the revenue for the period in the dataset?

In [137]:
revenue = chipo['total'].sum()
revenue

39237.02

### Step 15. How many orders were made in the period?

In [138]:
orders = chipo.groupby('order_id', as_index=False).count().shape[0]
orders

1834

### Step 16. What is the average revenue amount per order?

In [140]:
# Solution 1
revenue/orders

21.39423118865867

In [152]:
# Solution 2
# chipo.total.sum()/chipo.shape[0]

### Step 17. How many different items are sold?

In [141]:
chipo['item_name'].value_counts().count()

50