# 라이브러리 로드

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
#파이썬과 씨본의 버전 확인하는 과정
print(pd.__version__)
print(sns.__version__)

1.2.4
0.11.1


In [3]:
# 0.11.0 버전에서 변화가 많으니 이 버전 이상을 사용하기
!pip install seaborn --upgrade



# seaborn의 종류

![image.png](attachment:image.png)

- replot~ 수치형
- displot~ 수치형 분포
- catplot~ 범주형

# 데이터셋 불러오기

In [4]:
df= sns.load_dataset("mpg")
df

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model_year,origin,name
0,18.0,8,307.0,130.0,3504,12.0,70,usa,chevrolet chevelle malibu
1,15.0,8,350.0,165.0,3693,11.5,70,usa,buick skylark 320
2,18.0,8,318.0,150.0,3436,11.0,70,usa,plymouth satellite
3,16.0,8,304.0,150.0,3433,12.0,70,usa,amc rebel sst
4,17.0,8,302.0,140.0,3449,10.5,70,usa,ford torino
...,...,...,...,...,...,...,...,...,...
393,27.0,4,140.0,86.0,2790,15.6,82,usa,ford mustang gl
394,44.0,4,97.0,52.0,2130,24.6,82,europe,vw pickup
395,32.0,4,135.0,84.0,2295,11.6,82,usa,dodge rampage
396,28.0,4,120.0,79.0,2625,18.6,82,usa,ford ranger


In [5]:
print(df.shape)
print(df.index)
print(df.columns)
print(df.values)
print(df.dtypes)

(398, 9)
RangeIndex(start=0, stop=398, step=1)
Index(['mpg', 'cylinders', 'displacement', 'horsepower', 'weight',
       'acceleration', 'model_year', 'origin', 'name'],
      dtype='object')
[[18.0 8 307.0 ... 70 'usa' 'chevrolet chevelle malibu']
 [15.0 8 350.0 ... 70 'usa' 'buick skylark 320']
 [18.0 8 318.0 ... 70 'usa' 'plymouth satellite']
 ...
 [32.0 4 135.0 ... 82 'usa' 'dodge rampage']
 [28.0 4 120.0 ... 82 'usa' 'ford ranger']
 [31.0 4 119.0 ... 82 'usa' 'chevy s-10']]
mpg             float64
cylinders         int64
displacement    float64
horsepower      float64
weight            int64
acceleration    float64
model_year        int64
origin           object
name             object
dtype: object


## 데이터셋 일부만 가져오기

In [6]:
df.head()

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model_year,origin,name
0,18.0,8,307.0,130.0,3504,12.0,70,usa,chevrolet chevelle malibu
1,15.0,8,350.0,165.0,3693,11.5,70,usa,buick skylark 320
2,18.0,8,318.0,150.0,3436,11.0,70,usa,plymouth satellite
3,16.0,8,304.0,150.0,3433,12.0,70,usa,amc rebel sst
4,17.0,8,302.0,140.0,3449,10.5,70,usa,ford torino


In [7]:
df.tail()

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model_year,origin,name
393,27.0,4,140.0,86.0,2790,15.6,82,usa,ford mustang gl
394,44.0,4,97.0,52.0,2130,24.6,82,europe,vw pickup
395,32.0,4,135.0,84.0,2295,11.6,82,usa,dodge rampage
396,28.0,4,120.0,79.0,2625,18.6,82,usa,ford ranger
397,31.0,4,119.0,82.0,2720,19.4,82,usa,chevy s-10


In [8]:
df.sample()

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model_year,origin,name
214,13.0,8,302.0,130.0,3870,15.0,76,usa,ford f108


In [9]:
df.sample(frac=0.1,random_state=42) #10퍼센트 비율로 가지고오기

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model_year,origin,name
198,33.0,4,91.0,53.0,1795,17.4,76,japan,honda civic
396,28.0,4,120.0,79.0,2625,18.6,82,usa,ford ranger
33,19.0,6,232.0,100.0,2634,13.0,71,usa,amc gremlin
208,13.0,8,318.0,150.0,3940,13.2,76,usa,plymouth volare premier v8
93,14.0,8,318.0,150.0,4237,14.5,73,usa,plymouth fury gran sedan
84,27.0,4,97.0,88.0,2100,16.5,72,japan,toyota corolla 1600 (sw)
373,24.0,4,140.0,92.0,2865,16.4,82,usa,ford fairmont futura
94,13.0,8,440.0,215.0,4735,11.0,73,usa,chrysler new yorker brougham
222,17.0,8,260.0,110.0,4060,19.0,77,usa,oldsmobile cutlass supreme
126,21.0,6,200.0,,2875,17.0,74,usa,ford maverick


In [10]:
df.columns

Index(['mpg', 'cylinders', 'displacement', 'horsepower', 'weight',
       'acceleration', 'model_year', 'origin', 'name'],
      dtype='object')

In [11]:
#위치 변경해주고 싶을때
df['name','mpg', 'cylinders', 'displacement', 'horsepower', 'weight',
       'acceleration', 'model_year', 'origin']

KeyError: ('name', 'mpg', 'cylinders', 'displacement', 'horsepower', 'weight', 'acceleration', 'model_year', 'origin')

## 요약하기

In [None]:
df.info()

## 결측치 확인

In [None]:
df.isnull().sum()

In [None]:
df.isnull().mean()*100

## 기술통계

In [None]:
df.describe()

In [None]:
df.describe(include="object")

## series

In [12]:
type(df["mpg"])

pandas.core.series.Series

## dataframe

In [13]:
type(df[["mpg"]])

pandas.core.frame.DataFrame

## loc 
- index이름

In [15]:
df.loc[0]

mpg                                  18.0
cylinders                               8
displacement                        307.0
horsepower                          130.0
weight                               3504
acceleration                         12.0
model_year                             70
origin                                usa
name            chevrolet chevelle malibu
Name: 0, dtype: object

In [17]:
df.loc[0:1]

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model_year,origin,name
0,18.0,8,307.0,130.0,3504,12.0,70,usa,chevrolet chevelle malibu
1,15.0,8,350.0,165.0,3693,11.5,70,usa,buick skylark 320


In [18]:
df.loc[[0,1]]

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model_year,origin,name
0,18.0,8,307.0,130.0,3504,12.0,70,usa,chevrolet chevelle malibu
1,15.0,8,350.0,165.0,3693,11.5,70,usa,buick skylark 320


In [19]:
df.loc[0,"mpg"]

18.0

In [20]:
df.loc[[0,1],["mpg","origin"]]

Unnamed: 0,mpg,origin
0,18.0,usa
1,15.0,usa
