# Quick Data Analysis with the Python `seaborn` Package

In [None]:
import seaborn
from matplotlib import pyplot as plt

In [None]:
penguins = seaborn.load_dataset('penguins')

In [None]:
penguins.head()

In [None]:
penguins = penguins.dropna()

## Questions about our Data

### 1. What's a culmen?
See [here](https://www.google.com/search?q=penguin+culmen&tbm=isch&source=iu&ictx=1&fir=zwyRKgn-Tm4YtM%253A%252CVE_2OlS4JwXQyM%252C_&vet=1&usg=AI4_-kQAyUSMvu5r_sWMrB99Q7YPzWdKkg&sa=X&ved=2ahUKEwiq04_rnIDqAhWXIDQIHfAKDRMQ9QEwAHoECAUQEQ&biw=1440&bih=821#imgrc=zwyRKgn-Tm4YtM:)
<br/>
<br/>
### 2. What species are represented?

In [None]:
penguins.species.value_counts()

### 3. What islands are represented?

In [None]:
penguins.island.value_counts()

These islands are part of Antarctica.

### 4. How many penguins do we have in our dataset?

In [None]:
len(penguins)

## Plotting

In [None]:
seaborn.set_style('darkgrid')

In [None]:
seaborn.barplot(data=penguins, x='flipper_length_mm', y='species');

In [None]:
fig, ax = plt.subplots(figsize=(10, 10))
ax.set_xlabel('', fontsize=20)
ax.set_ylabel('', fontsize=20)
seaborn.swarmplot(data=penguins, x='body_mass_g',
               y='species', ax=ax);

In [None]:
fig, ax = plt.subplots(figsize=(10, 10))
ax.set_xlabel('', fontsize=20)
ax.set_ylabel('', fontsize=20)
seaborn.swarmplot(data=penguins, x='culmen_depth_mm',
               y='island', ax=ax);

In [None]:
seaborn.kdeplot(data=penguins['flipper_length_mm']);

In [None]:
seaborn.relplot(data=penguins, x='flipper_length_mm',
                y='culmen_depth_mm', col='sex',
               size='body_mass_g', hue='species',
               style='island');

## Modeling

It looks like penguins' culmen depth has a correlation with the islands where the penguins live.

In [None]:
from sklearn.model_selection import train_test_split
from sklearn import tree

In [None]:
X = penguins['culmen_depth_mm']
y = penguins['island']

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

In [None]:
model = tree.DecisionTreeClassifier()

In [None]:
model.fit(X_train.values.reshape(-1, 1), y_train)

In [None]:
model.predict([[12]])

In [None]:
model.predict([[19]])

In [None]:
model.predict([[24]])

In [None]:
model.score(X_test.values.reshape(-1, 1), y_test)