In [None]:
!pip install seaborn

In [None]:
import seaborn as sns

# 한글 깨짐 설정

In [None]:
!sudo apt-get install -y fonts-nanum
!sudo fc-cache -fv
!rm ~/.cache/matplotlib -rf

In [None]:
import matplotlib.pyplot as plt
plt.rc('font', family='NanumBarunGothic')

In [None]:
import numpy as np
import pandas as pd

# 테마 설정

In [None]:
sns.set(context='notebook', 
        style='darkgrid', 
        palette='deep', 
        # font='sans-serif', 
        font_scale=1, 
        color_codes=True)

# 샘플 데이터셋 로드

In [None]:
penguins = sns.load_dataset('penguins')

In [None]:
print(penguins.head())

In [None]:
from pprint import pprint

pprint(sns.get_dataset_names())

In [None]:
tips = sns.load_dataset('tips')

In [None]:
print(tips.head())

# Seaborn plot 종류
```
sns.histplot()
sns.countplot()
sns.kdeplot()
sns.scatterplot()
sns.lineplot()
sns.relplot()
sns.boxplot()
sns.violinplot()
sns.stripplot()
sns.heatmap()
```

## Histogram()

In [None]:
# 데이터프레임을 넣고(tips) 컬럼명(tip) 히스토그램 출력
fig = plt.figure()
# title, 축 등 추가 설정을 위해서는 matplotlib 방법 그대로 사용
plt.suptitle('TIP Histogram')

# sns.histplot(data=tips, x='tip')
sns.histplot(tips, x='tip')
plt.show()

In [None]:
fig = plt.figure()
plt.suptitle('TIP Histogram')

# sns.histplot(tips, x='tip')
sns.histplot(tips.tip)  # Series 를 주면서 그림
plt.show()

In [None]:
fig = plt.figure()
plt.suptitle('TIP Histogram')

sns.histplot(tips.tip, bins=30)  # bins 늘리기
plt.show()

In [None]:
a = tips.tip.to_numpy()
print(a.shape)

# numpy로 변환하면 컬럼이 아니므로 컬럼명(tip)이 사라진다.
sns.histplot(a)
plt.show()

In [None]:
plt.hist(a, bins=30)
plt.show()

sns.histplot(a)
plt.show()

In [None]:
tips.head()

In [None]:
sns.histplot(tips.total_bill)
plt.show()

### subplot
- 창 분할해서 그리기

In [None]:
x = np.linspace(0, 10, 100)

# ax1 = plt.add_subplot(121)
# ax1.plot(np.sin(x))
plt.subplot(121)
plt.plot(np.sin(x))

plt.subplot(122)
plt.plot(np.cos(x))

plt.show()

In [None]:
plt.figure(figsize=(10,3))
plt.subplot(121)
sns.histplot(tips, x='total_bill')

plt.subplot(122)
sns.histplot(tips, x='tip')

plt.show()

### y=
- 히스토그램 가로로 그리기

In [None]:
plt.figure(figsize=(10,3))
plt.subplot(121)
sns.histplot(tips, y='total_bill')

plt.subplot(122)
sns.histplot(tips, y='tip')

plt.show()

### kde

In [None]:
plt.figure(figsize=(10,3))
plt.subplot(121)
sns.histplot(tips, x='total_bill', kde=True) # 데이터분포 선 추가

plt.subplot(122)
sns.histplot(tips, x='tip', kde=True)

plt.show()

### hue

In [None]:
plt.figure(figsize=(10,3))
plt.subplot(121)
sns.histplot(tips, x="total_bill", hue="sex")

plt.subplot(122)
sns.histplot(tips, x="tip", hue="sex")

plt.show()

### element
- element = ["bar", "step", "poly"]

In [None]:
plt.figure(figsize=(10,3))
plt.subplot(121)
sns.histplot(tips, x="total_bill", hue="sex", element='poly', linewidth=3)

plt.subplot(122)
sns.histplot(tips, x="tip", hue="sex", element='step')  # element = ["bar", "step", "poly"]

plt.show()

### shrink

In [None]:
plt.figure(figsize=(10,3))
plt.subplot(121)
sns.histplot(tips, x="total_bill", hue="sex", shrink=0.8)

plt.subplot(122)
sns.histplot(tips, x="tip", hue="sex", element='step')

plt.show()

### stat
- `stat` must be one of ['count', 'frequency', 'density', 'probability', 'proportion', 'percent']


In [None]:
plt.figure(figsize=(10,3))
plt.subplot(121)
sns.histplot(tips, x="total_bill", hue="sex", stat='probability')

plt.subplot(122)
sns.histplot(tips, x="tip", hue="sex", stat='density')

plt.show()

### multiple
- `multiple` must be one of ['layer', 'stack', 'fill', 'dodge']

In [None]:
plt.figure(figsize=(10,3))
plt.subplot(121)
sns.histplot(tips, x="total_bill", hue="sex", multiple='stack')

plt.subplot(122)
sns.histplot(tips, x="tip", hue="sex", multiple='fill')

plt.show()

In [None]:
plt.figure(figsize=(10,3))
plt.subplot(121)
sns.histplot(tips, x="total_bill", hue="sex", multiple='stack')

plt.subplot(122)
sns.histplot(tips, x="tip", hue="sex", multiple='dodge', shrink=0.8)

plt.show()

### log_scale

In [None]:
planets = sns.load_dataset("planets")
print(planets.head())

In [None]:
sns.histplot(planets, x='distance')
plt.show()

In [None]:
sns.histplot(planets, x='distance', log_scale=True)
plt.show()

### fill

In [None]:
sns.histplot(planets, x='distance', log_scale=True, fill=False)
plt.show()

### 누적

In [None]:
sns.histplot(
    data=planets, x="distance", hue="method",
    hue_order=["Radial Velocity", "Transit"],
    log_scale=True, element="step", fill=False,
    cumulative=True, stat="density", common_norm=False,
)
plt.show()

In [None]:
sns.histplot(tips, x = 'total_bill', y = 'tip', cbar = True)
plt.show()

In [None]:
sns.scatterplot(data=tips, x='total_bill', y='tip')
plt.show()

## Countplot()

In [None]:
tips.head()

In [None]:
plt.figure(figsize=(10,3))
plt.subplot(121)
sns.countplot(data=tips, x='day')

plt.subplot(122)
sns.countplot(data=tips, x='smoker')

plt.show()

## Kdeplot()
- kde만 그리는 plot

In [None]:
plt.figure(figsize=(10,3))
plt.subplot(121)
sns.kdeplot(data=tips, x='total_bill')

plt.subplot(122)
sns.kdeplot(data=tips, x='tip')

plt.show()

## Scatterplot()

In [None]:
sns.scatterplot(data=tips, x='total_bill', y='tip')
plt.show()

### hue

In [None]:
sns.scatterplot(data=tips, x='total_bill', y='tip', hue='time')
plt.show()

### style

In [None]:
sns.scatterplot(data=tips, x='total_bill', y='tip', hue='time', style='time')
plt.show()

### size

In [None]:
sns.scatterplot(data=tips, x='total_bill', y='tip', hue='time', size='size')
plt.show()

## Lineplot()

In [None]:
flights = sns.load_dataset('flights')
print(flights.head())

### Query

In [None]:
may_flights = flights.query("month == 'May'")
may_flights

In [None]:
sns.lineplot(data=may_flights, x='year', y='passengers')
plt.show()

### hue

In [None]:
sns.lineplot(data=flights, x='year', y='passengers', hue='month')
plt.show()

### hue X : 95% 신뢰구간
- 신뢰도? 구간 의미 공부하기

In [None]:
sns.lineplot(data=flights, x='year', y='passengers')
plt.show()

In [None]:
fmri = sns.load_dataset('fmri')
fmri

In [None]:
sns.lineplot(data=fmri, x='timepoint', y='signal')
plt.show()

In [None]:
sns.lineplot(data=fmri, x='timepoint', y='signal', hue='event')
plt.show()

In [None]:
plt.subplot(121)
sns.lineplot(data=fmri, x='timepoint', y='signal')

plt.subplot(122)
sns.lineplot(data=fmri, x='timepoint', y='signal', hue='event')
plt.show()

### units
- ??? 설명 안하고 넘어감...찾아보기

In [None]:
sns.lineplot(data=fmri, x='timepoint', y='signal', hue='event', units='subject', estimator=None, lw=1)
plt.show()

## Boxpliot()

In [None]:
sns.boxplot(x=tips.total_bill)
plt.show()

### 세로로 그리기 

In [None]:
sns.boxplot(data=tips, y='total_bill')
plt.show()

In [None]:
sns.boxplot(data=tips, x='day', y='total_bill')
plt.show()

### hue

In [None]:
sns.boxplot(data=tips, x='day', y='total_bill', hue='smoker')
plt.show()

## swarmplot()

In [None]:
sns.boxplot(data=tips, x='day', y='total_bill', hue='smoker')
sns.swarmplot(data=tips, x='day', y='total_bill', color="0.25")
plt.show()

# Titanic 분석


In [None]:
titanic = sns.load_dataset('titanic')
titanic

```
성별에 따라 얼마나 살아남았는지
나이별
PClass(객실 등급)
Fare
```

## 성별

In [None]:
# sns.countplot(data=titanic, x='sex')
sns.histplot(data=titanic, x='sex', shrink=0.5)
plt.show()

In [None]:
sns.histplot(data=titanic, x='sex', hue='survived', multiple='dodge', shrink=0.5)
plt.show()

In [None]:
sns.histplot(data=titanic, x='sex', hue='survived', multiple='fill', shrink=0.5)
plt.show()

## 객실등급

In [None]:
sns.histplot(data=titanic, x='pclass', hue='survived', multiple='dodge', shrink=0.5)
plt.show()

In [None]:
sns.histplot(data=titanic, x='pclass', hue='survived', multiple='fill', shrink=0.5)
plt.show()

## 요금

In [None]:
sns.histplot(data=titanic, x='fare')
plt.plot()

In [None]:
sns.histplot(data=titanic, x='fare', hue='survived')
plt.plot()

In [None]:
sns.histplot(data=titanic, x='fare', hue='survived', element='poly')
plt.plot()

In [None]:
not_zero_fare = titanic.query("fare>0")
sns.histplot(data=not_zero_fare, x='fare', hue='survived', element='poly', log_scale=True)
plt.plot()

## 성별 + 객실등급

### Catplot()

In [None]:
sns.catplot(data=titanic, x='pclass', col='sex', hue='survived', kind='count')
plt.plot()