# <font color="#49699E" size=40>Developing Neural Network Models with Keras and Tensorflow</font>

# LEARNING OBJECTIVES


# INTRODUCTION


## Imports


In [None]:
import pandas as pd
pd.set_option("display.notebook_repr_html", False)

import seaborn as sns
import matplotlib.pyplot as plt
from dcss.plotting import custom_seaborn
custom_seaborn()

from numpy.random import seed
from tensorflow.random import set_seed
set_seed(42)
seed(42)


columns = ['speech', 'speakername', 'party', 'constituency', 'year']

uk_df = pd.read_csv("../data/british_hansards/hansard-speeches-v301.csv", usecols=columns).dropna(subset=['party', 'speakername', 'speech'])

In [None]:
uk_df.info()

In [None]:
uk_df = uk_df[uk_df['year'].isin([2015, 2016, 2017, 2018, 2019])]

### Filtering The Data


In [None]:
uk_df['party'].value_counts()

In [None]:
uk_df = uk_df.drop(uk_df[uk_df['party'] == 'Speaker'].index)
uk_df['party'].value_counts()

In [None]:
sorted(list(uk_df['speech']), key=lambda x: len(x))[10:20]

In [None]:
uk_df.drop(uk_df[uk_df['speech'].apply(lambda x: len(x)) < 200].axes[0], inplace=True)

### Categorizing Affiliation


In [None]:
right = ['Conservative']
centre = ['Liberal Democrat']
left = ['Labour', 'Labour (Co-op)']
national = ['Scottish National Party']
other = list(uk_df['party'].value_counts().axes[0].drop([*right, *left, *centre, *national]))

uk_df.loc[uk_df['party'].isin(right), 'affiliation'] = "centre-right"
uk_df.loc[uk_df['party'].isin(centre), 'affiliation'] = "centre"
uk_df.loc[uk_df['party'].isin(left), 'affiliation'] = "centre-left"
uk_df.loc[uk_df['party'].isin(national), 'affiliation'] = "national"
uk_df.loc[uk_df['party'].isin(other), 'affiliation'] = "other"

In [None]:
uk_df['affiliation'].value_counts()

### Taking a Stratified Sample


In [None]:
uk_df_strat = uk_df.groupby("affiliation", group_keys=False).apply(lambda x: x.sample(3000))

uk_df_strat.affiliation.value_counts()

### Lemmatizing Speech


In [None]:
import spacy 
from tqdm import tqdm

nlp = spacy.load('en_core_web_sm', disable=['ner', 'textcat', 'parser'])

lem_speeches = []

for doc in tqdm(nlp.pipe(uk_df_strat['speech']), total=15000):
    
    lem_speeches.append([tok.lemma_ for tok in doc if not tok.is_punct])

lem_speeches_joined = []
for speech in lem_speeches:
    lem_speeches_joined.append(" ".join(speech))
    

# GETTING STARTED WITH `KERAS`


## Pre-processing / Prep Work


### Encoding the 'Affiliation' Column


In [None]:
uk_df_strat[['affiliation']].sample(5, random_state=1)

In [None]:
from sklearn.preprocessing import LabelBinarizer

affiliation_encoder = LabelBinarizer()
affiliation_encoder.fit(uk_df_strat['affiliation'])
aff_transformed = affiliation_encoder.transform(uk_df_strat['affiliation'])
pd.DataFrame(aff_transformed).sample(5, random_state=1)

In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer
speech_vectorizer = TfidfVectorizer(strip_accents='unicode', stop_words='english', min_df=0.01)
speech_transformed = speech_vectorizer.fit_transform(lem_speeches_joined)

In [None]:
speech_vectorizer.get_feature_names()[40:50]

In [None]:
aff_transformed.shape

In [None]:
speech_transformed.shape

## Training and Validation Sets


In [None]:
from sklearn.model_selection import train_test_split
import numpy as np
import tensorflow as tf
from tensorflow import keras


X_t, X_test, y_t, y_test = train_test_split(
    speech_transformed,
    aff_transformed,
    test_size = 0.1,
    shuffle = True,
    stratify=aff_transformed
)

X_train, X_valid, y_train, y_valid = train_test_split(
    X_t,
    y_t,
    test_size = 0.2,
    shuffle = True,
    stratify=y_t
)

## You don't need to pay much attention to the following text chunk of code - it's just
## something we have to do to make sparse numpy arrays compatable with Keras

def convert_sparse_matrix_to_sparse_tensor(X):
    coo = X.tocoo()
    indices = np.mat([coo.row, coo.col]).transpose()
    return tf.sparse.reorder(tf.SparseTensor(indices, coo.data, coo.shape))

X_train = convert_sparse_matrix_to_sparse_tensor(X_train)
X_valid = convert_sparse_matrix_to_sparse_tensor(X_valid)

In [None]:
X_train.shape

In [None]:
y_train.shape

In [None]:
words = X_train.shape[1]

# END-TO-END NEURAL NETWORK MODELLING

### Building the Sequential Model


In [None]:
uk_model = keras.models.Sequential()

In [None]:
uk_model.add(keras.layers.InputLayer(words))

In [None]:
uk_model.add(keras.layers.Dense(400, activation = "relu"))

In [None]:
uk_model.summary()

In [None]:
uk_model.add(keras.layers.Dense(400, activation="relu"))
uk_model.add(keras.layers.Dense(400, activation="relu"))
uk_model.add(keras.layers.Dense(400, activation="relu"))
uk_model.add(keras.layers.Dense(400, activation="relu"))

In [None]:
uk_model.add(keras.layers.Dense(5, activation='softmax'))
uk_model.summary()

### Compiling a Keras ANN


In [None]:
uk_model.compile(
    loss=keras.losses.categorical_crossentropy,
    optimizer="sgd",
    metrics=["accuracy"]
)

### Care, Feeding, and Training of your ANN


In [None]:
history = uk_model.fit(X_train, y_train, epochs=50, validation_data = (X_valid, y_valid), verbose=0)

In [None]:
pd.DataFrame(history.history).plot(style=['*-','o-','^-'], 
                                   linewidth=.5, markersize=3,
                                   figsize = (8, 8))
plt.grid(True)
plt.gca().set_ylim(0, 2)
plt.show()

## Overfitting


In [None]:
uk_model_2 = keras.models.Sequential([
    keras.layers.InputLayer(words),
    keras.layers.Dense(400, activation="relu"),
    keras.layers.Dense(10, activation="relu"),
    keras.layers.Dense(5, activation="softmax"),
])

In [None]:
uk_model_2.compile(
    loss=keras.losses.categorical_crossentropy,
    optimizer="sgd",
    metrics=["accuracy"]
)

history2 = uk_model_2.fit(X_train, y_train, epochs=50, validation_data = (X_valid, y_valid), verbose=0)

In [None]:
lims = (0, 2)

fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(8,16))


pd.DataFrame(history.history).plot(ax=ax1, style=['*-','o-','^-'], 
                                   linewidth=.5, markersize=3,)
ax1.grid(True)
ax1.set_ylim(lims)
ax1.title.set_text("5-Layer Model")

pd.DataFrame(history2.history).plot(ax=ax2, style=['*-','o-','^-'], 
                                   linewidth=.5, markersize=3,)
ax2.grid(True)
ax2.set_ylim(lims)
ax2.title.set_text("2-Layer Model")

plt.show()

## Confusion Matrices


In [None]:
y_pred = np.argmax(
    uk_model_2.predict(
        convert_sparse_matrix_to_sparse_tensor(X_test)), 
    axis=1)

y_true = np.argmax(y_test, axis=1)

conf_mat = tf.math.confusion_matrix(y_true, y_pred)
plt.figure()

# grayscale for printing
cmap = sns.cubehelix_palette(50, hue=0.05, rot=0, light=0.9, dark=0, as_cmap=True)

sns.heatmap(
    np.array(conf_mat).T,
    xticklabels=affiliation_encoder.classes_,
    yticklabels=affiliation_encoder.classes_,
    annot=True,
    fmt='g',
    cmap=cmap
)

plt.xlabel("Observed")
plt.xticks(rotation=45)
plt.yticks(rotation=45)
plt.ylabel("Predicted")
plt.show()

# CONCLUSION
## Key Points 
