# Visualisierung mit Seaborn
* https://elitedatascience.com/python-seaborn-tutorial

In [1]:
# blab init
import blab
startup_notebook = blab.blab_startup()
%run $startup_notebook

dropbox_path = C:\Users\me\Dropbox


In [2]:
import pandas  as pd
import numpy   as np
import keras
import matplotlib.pyplot as plt
import seaborn
from bj_helper.pandas_content   import *
from bj_helper.pandas           import *
from bj_helper.pandas_analyse   import *
from bj_helper.keras            import see

ModuleNotFoundError: No module named 'keras'

time: 2.5 s


## Pokemon-Tabelle

In [None]:
# Read dataset
df = pd.read_csv('pokemon.csv', index_col=0)

In [None]:
grid(df)

In [None]:
# Scatterplot
seaborn.lmplot(x='Attack', y='Defense', data=df)

In [None]:
# Ändern der Größe geht nur mit regplot
plt.figure(figsize=(16, 4))
seaborn.regplot(x='Attack', y='Defense', data=df)

In [None]:
# Scatterplot arguments
seaborn.lmplot( data=df,
                x='Attack',  
                y='Defense',
                hue='Stage',   # Color by evolution stage
                fit_reg=False) # No regression line

In [None]:
# Boxplot
seaborn.boxplot(data=df)

In [None]:
# Set theme
#seaborn.set_style('whitegrid')

# Größe ändern
plt.figure(figsize=(16, 4))
 
# Violin plot
seaborn.violinplot(x='Type 1', y='Attack', data=df)

In [None]:
# Größe ändern
plt.figure(figsize=(14, 4))

# Swarm plot with Pokemon color palette
seaborn.swarmplot(x='Type 1', y='Attack', data=df)

In [None]:
# Distribution Plot (a.k.a. Histogram)
seaborn.distplot(df.Attack)

In [None]:
seaborn.countplot(df.Attack)

In [None]:
# Count Plot (a.k.a. Bar Plot)
seaborn.countplot(x='Type 1', data=df)
 
# Rotate x-labels
plt.xticks(rotation=-45)

In [None]:
# Factor Plot
g = seaborn.catplot(x='Type 1', 
                    y='Attack', 
                    data=df, 
                    hue='Stage',  # Color by stage
                    col='Stage',  # Separate by stage
                    kind='swarm') # Swarmplot
 
# Rotate x-axis labels
g.set_xticklabels(rotation=-45)
 
# Doesn't work because only rotates last plot
# plt.xticks(rotation=-45)

In [None]:
# Density Plot
# Density plots display the distribution between two variables.
seaborn.kdeplot(df.Attack, df.Defense)

In [None]:
# Joint Distribution Plot
seaborn.jointplot(x='Attack', y='Defense', data=df)

## Keras-Dataset

In [None]:
# Daten laden
from keras.datasets import boston_housing

# train_data, train_targets, test_data, test_targets
(x_train0,    y_train0),    (x_test0,   y_test0) = boston_housing.load_data()

In [None]:
see(y_train0)

In [None]:
y_train_df = pd.DataFrame(y_train0, columns=['Preis'])
y_train_df['Preis'] *= 1000
grid(y_train_df)

In [None]:
seaborn.distplot(y_train_df)


In [None]:
x_train_df = pd.DataFrame(x_train0, columns=list('ABCDEFGHIJKLM'))
grid(x_train_df)

In [None]:
# Größe ändern
plt.figure(figsize=(10, 4))

# Boxplot
seaborn.boxplot(data=x_train_df)

In [None]:
# Normalisieren

# Mittelwert und Standardabweichung berechnen (ergibt jeweils ein Array)
mean = x_train0.mean(axis=0)
std  = x_train0.std(axis=0)

# normalisieren
x_train1 = x_train0 - mean 
x_train1 /= std

# Testdaten auch normieren. Es ist richtig, dass dafür die Parameter aus den Trainingsdaten verwendet werden!!
x_test1 = x_test0 - mean 
x_test1 /= std

In [None]:
# Visualisieren
x_train_df = pd.DataFrame(x_train1, columns=list('ABCDEFGHIJKLM'))
plt.figure(figsize=(16, 8))
seaborn.boxplot(data=x_train_df, showmeans=True)

# geht auch: Numpy-Array direkt visualisieren
# seaborn.boxplot(data=x_train1)

## Trainingskurve mit Rauschen
* Quelle: https://stackoverflow.com/questions/48254090/numpy-how-to-generate-a-random-noisy-curve-resembling-a-training-curve

In [None]:
import scipy

# Parameter
rauschen = 10

# Kurve erzeugen
x = np.arange(2000)
y = 0.00025 + 0.001 * np.exp(-x/100.) + scipy.stats.gamma(rauschen).rvs(len(x)) * (1-np.exp(-x/100)) * 2e-5

# In Dataframe wandeln
training = pd.DataFrame(y)
training['i'] = training.index # Index immer als extra Zeile! Sonst kommt seaborn durcheinander.
training.columns=['mae','i']

# visualisieren
plt.figure(figsize=(16, 4))
seaborn.lineplot(data=training, x='i', y='mae')

In [None]:
#grid(training)

In [None]:
# visualisieren
# Order legt ein Polynom n-ter Ordnung durch die Punkte
plt.figure(figsize=(16, 4))
seaborn.regplot(data=training, x='i', y='mae', order=5, scatter_kws={"color": "black"}, line_kws={"color": "red"})