# 1. 데이터의 기본 정보 요약

## 1.1 필수 라이브러리 로딩

In [4]:
import numpy as np
import pandas as pd

In [5]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


## 1.2 데이터셋 로딩

In [6]:
file = "/content/gdrive/My Drive/머신러닝/chipotle.tsv"
chipo = pd.read_csv(file, sep='\t')

## 1.3 요약정보 확인

In [7]:
chipo.shape

(4622, 5)

In [8]:
chipo.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4622 entries, 0 to 4621
Data columns (total 5 columns):
 #   Column              Non-Null Count  Dtype 
---  ------              --------------  ----- 
 0   order_id            4622 non-null   int64 
 1   quantity            4622 non-null   int64 
 2   item_name           4622 non-null   object
 3   choice_description  3376 non-null   object
 4   item_price          4622 non-null   object
dtypes: int64(2), object(3)
memory usage: 180.7+ KB


In [9]:
chipo.head(10)

Unnamed: 0,order_id,quantity,item_name,choice_description,item_price
0,1,1,Chips and Fresh Tomato Salsa,,$2.39
1,1,1,Izze,[Clementine],$3.39
2,1,1,Nantucket Nectar,[Apple],$3.39
3,1,1,Chips and Tomatillo-Green Chili Salsa,,$2.39
4,2,2,Chicken Bowl,"[Tomatillo-Red Chili Salsa (Hot), [Black Beans...",$16.98
5,3,1,Chicken Bowl,"[Fresh Tomato Salsa (Mild), [Rice, Cheese, Sou...",$10.98
6,3,1,Side of Chips,,$1.69
7,4,1,Steak Burrito,"[Tomatillo Red Chili Salsa, [Fajita Vegetables...",$11.75
8,4,1,Steak Soft Tacos,"[Tomatillo Green Chili Salsa, [Pinto Beans, Ch...",$9.25
9,5,1,Steak Burrito,"[Fresh Tomato Salsa, [Rice, Black Beans, Pinto...",$9.25


In [10]:
chipo.columns

Index(['order_id', 'quantity', 'item_name', 'choice_description',
       'item_price'],
      dtype='object')

- order_id : 주문번호
- quantity : 수량
- item_name: 메뉴
- choice_description: 토핑
- item_price: 가격

In [12]:
chipo.index

RangeIndex(start=0, stop=4622, step=1)

In [17]:
# 수치 데이터의 기초 통계량 분석
chipo.describe()

Unnamed: 0,quantity
count,4622.0
mean,1.075725
std,0.410186
min,1.0
25%,1.0
50%,1.0
75%,1.0
max,15.0


In [15]:
# order_id는 숫자의 의미를 가지고 있다고 볼 수 없다.
chipo['order_id'] = chipo['order_id'].astype(str)

In [20]:
# unique()
(chipo['item_name'].unique())

array(['Chips and Fresh Tomato Salsa', 'Izze', 'Nantucket Nectar',
       'Chips and Tomatillo-Green Chili Salsa', 'Chicken Bowl',
       'Side of Chips', 'Steak Burrito', 'Steak Soft Tacos',
       'Chips and Guacamole', 'Chicken Crispy Tacos',
       'Chicken Soft Tacos', 'Chicken Burrito', 'Canned Soda',
       'Barbacoa Burrito', 'Carnitas Burrito', 'Carnitas Bowl',
       'Bottled Water', 'Chips and Tomatillo Green Chili Salsa',
       'Barbacoa Bowl', 'Chips', 'Chicken Salad Bowl', 'Steak Bowl',
       'Barbacoa Soft Tacos', 'Veggie Burrito', 'Veggie Bowl',
       'Steak Crispy Tacos', 'Chips and Tomatillo Red Chili Salsa',
       'Barbacoa Crispy Tacos', 'Veggie Salad Bowl',
       'Chips and Roasted Chili-Corn Salsa',
       'Chips and Roasted Chili Corn Salsa', 'Carnitas Soft Tacos',
       'Chicken Salad', 'Canned Soft Drink', 'Steak Salad Bowl',
       '6 Pack Soft Drink', 'Chips and Tomatillo-Red Chili Salsa', 'Bowl',
       'Burrito', 'Crispy Tacos', 'Carnitas Crispy Tacos

In [21]:
len(chipo['item_name'].unique())

50

In [22]:
len(chipo['order_id'].unique())

1834

# 2. 인사이트의 발견 : 탐색적 데이터 분석

## 2.1 가장 많이 주문한 메뉴는 무엇일까?

In [26]:
item_count = chipo['item_name'].value_counts()[:10]
item_count

Chicken Bowl                    726
Chicken Burrito                 553
Chips and Guacamole             479
Steak Burrito                   368
Canned Soft Drink               301
Steak Bowl                      211
Chips                           211
Bottled Water                   162
Chicken Soft Tacos              115
Chips and Fresh Tomato Salsa    110
Name: item_name, dtype: int64

In [28]:
# top 10
# items()
for idx, (val, cnt) in enumerate(item_count.iteritems(),1):
  print("Top",idx,":", val, cnt) # enumerate는 인덱스값을 얻어오는 items()같은 느낌 iteritems()는 items()같은 느낌

Top 1 : Chicken Bowl 726
Top 2 : Chicken Burrito 553
Top 3 : Chips and Guacamole 479
Top 4 : Steak Burrito 368
Top 5 : Canned Soft Drink 301
Top 6 : Steak Bowl 211
Top 7 : Chips 211
Top 8 : Bottled Water 162
Top 9 : Chicken Soft Tacos 115
Top 10 : Chips and Fresh Tomato Salsa 110


  for idx, (val, cnt) in enumerate(item_count.iteritems(),1):


In [30]:
chipo['item_name'].value_counts().index.tolist()[0]

'Chicken Bowl'

## 2.2 메뉴당 주문 총수량은 얼마일까?

In [34]:
order_count = chipo.groupby('item_name')['order_id'].count()
order_count[:10]

item_name
6 Pack Soft Drink         54
Barbacoa Bowl             66
Barbacoa Burrito          91
Barbacoa Crispy Tacos     11
Barbacoa Salad Bowl       10
Barbacoa Soft Tacos       25
Bottled Water            162
Bowl                       2
Burrito                    6
Canned Soda              104
Name: order_id, dtype: int64

In [37]:
chipo['item_price'] = chipo['item_price'].apply(lambda x : float(x[1:]))
chipo.describe()

Unnamed: 0,quantity,item_price
count,4622.0,4622.0
mean,1.075725,7.464336
std,0.410186,4.245557
min,1.0,1.09
25%,1.0,3.39
50%,1.0,8.75
75%,1.0,9.25
max,15.0,44.25
