## Introduction
## This notebook covers dataset loading, dataset split, training a model with scikit-learn, and creating neural networks with Keras.

In [None]:
# package import
import seaborn as sns
import pandas as pd

# sklearn packages
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score, classification_report

# deep learning packages


In [None]:
# load dataset
penguins = sns.load_dataset('penguins')

In [None]:
# dataset exploration
penguins

In [None]:
# Dataset Visualization: Pairplot
sns.pairplot(penguins, hue="species")


In [None]:
# Coding Task: Create a similar pairplot, but with hue="sex"
# ---------- Your Code Here------------------

# -------------------------------------------

In [None]:
# pre-process data
# Drop categorical columns
penguins_filtered = penguins.drop(columns=['island', 'sex'])
penguins_filtered

In [None]:
# Are there N/A values in the dataset?
penguins_filtered.isna().sum()

In [None]:
# Drop N/A values
penguins_filtered = penguins_filtered.dropna()

In [None]:
# Extract columns corresponding to features - this is our prediction target!
features = penguins_filtered.drop(columns=['species'])

In [None]:
target, cat_indices = pd.factorize(penguins_filtered['species'])
print(target)
print(cat_indices)

In [None]:
# dataset split
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=0, shuffle=True, stratify=target)

In [None]:
classifier = LogisticRegression(random_state=0, max_iter=200).fit(X_train, y_train)
print(classifier.score(X_train, y_train))
print(classifier.score(X_test, y_test))

## One-hot Encoding for Target Data

In [None]:
target = pd.get_dummies(penguins_filtered['species'])
target.head() # print out the top 5 to see what it looks like.

## Intro to Neural Network

In [None]:
# run once in the runtime
# !pip install tensorflow

In [None]:
from tensorflow import keras

In [None]:
keras.utils.set_random_seed(2)

In [None]:
inputs = keras.Input(shape=(X_train.shape[1],))

In [None]:
hidden_layer = keras.layers.Dense(10, activation="relu")(inputs)

In [None]:
output_layer = keras.layers.Dense(3, activation="softmax")(hidden_layer)

In [None]:
model = keras.Model(inputs=inputs, outputs=output_layer)
model.summary()

In [None]:
# Challenge
# 1. How many parameters does the resulting model have?
# 2. What happens to the number of parameters if we increase or decrease the number of neurons in the hidden layer?