In [None]:
import sys
import sklearn
import tensorflow as tf
from tensorflow import keras 
import tensorflow_addons as tfa #for tfa to work, a compatible version of tensorflow has to be installed: check https://github.com/tensorflow/addons
import numpy as np
import matplotlib.pyplot as plt
import os

#to make this notebook’s output stable across runs
np.random.seed(42) 
tf.random.set_seed(42)

(X_train_full, y_train_full), (X_test, y_test) = keras.datasets.fashion_mnist.load_data() 
X_train_full = X_train_full.astype(np.float32) / 255.
X_valid , X_train = X_train_full [:5000] , X_train_full [5000:]
y_valid , y_train = y_train_full [:5000] , y_train_full [5000:]
X_test = X_test.astype(np.float32) / 255.

In [None]:
#define sequential model
model = tf.keras.models.Sequential([
  tf.keras.layers.Flatten(input_shape=(28, 28)),
  tf.keras.layers.Dense(64, activation='relu'),
  tf.keras.layers.Dense(10, activation='softmax'),
])

#define optimizers used in different layers of the model
#legacy used to run more efficient on M1/M2 Macs as suggested by warning
#WARNING:absl:At this time, the v2.11+ optimizer `tf.keras.optimizers.SGD` runs slowly on M1/M2 Macs, please use the legacy Keras optimizer instead, located at `tf.keras.optimizers.legacy.SGD`.
optimizers = [
    tf.keras.optimizers.legacy.SGD(learning_rate=1e-4), #lower optimizer -> close to input
    tf.keras.optimizers.legacy.Adam(learning_rate=1e-2) #lower optimizer -> close to output
]

#assign optimizers to the layers
optimizers_and_layers = [(optimizers[0], model.layers[0]), (optimizers[1], model.layers[1:])]
optimizer = tfa.optimizers.MultiOptimizer(optimizers_and_layers)

#compile with .SparseCategoricalCrossentropy as loss funciton and accuracy as metric (will be later be outputted for every epoch)
model.compile(optimizer=optimizer, loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), metrics=['accuracy'])


In [None]:
#fit the model, default batch size is 32, see https://keras.io/api/models/model_training_apis/
model.fit(X_train, y_train, validation_data=(X_valid, y_valid), epochs=5)

In [None]:
#check accuracy of model on the test data
test_loss, test_acc = model.evaluate(X_test,  y_test, verbose=2)

print('\nTest accuracy:', test_acc)

In [None]:
#functions to plot the predictions from lecture exercise

#use classnames from lecture example
class_names = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat',
               'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot']

def plot_image(i, predictions_array, true_label, img, label):
  true_label, img = label[i], img[i]
  plt.grid(False)
  plt.xticks([])
  plt.yticks([])

  plt.imshow(img, cmap=plt.cm.binary)

  predicted_label = np.argmax(predictions_array)
  if predicted_label == true_label:
    color = 'blue'
  else:
    color = 'red'

  plt.xlabel("{} {:2.0f}% ({})".format(class_names[predicted_label],
                                100*np.max(predictions_array),
                                class_names[true_label]))
                   
def plot_value_array(i, predictions_array, true_label):
  true_label = true_label[i]
  plt.grid(False)
  plt.xticks(range(10))
  plt.yticks([])
  thisplot = plt.bar(range(10), predictions_array, color="#777777")
  plt.ylim([0, 1])
  predicted_label = np.argmax(predictions_array)

  thisplot[predicted_label].set_color('red')
  thisplot[true_label].set_color('blue')
  


In [None]:
#make predictions
import numpy as np
import matplotlib.pyplot as plt


# Randomly choose 10 indexes from X_test
num_images = 10
indexes = np.random.choice(len(X_test), size=num_images, replace=False)
test_images = X_test[indexes]
label = y_test[indexes]
pred = model.predict(test_images)

for i in range(num_images):
    plt.figure(figsize=(6,3))
    plt.subplot(1,2,1)
    plot_image(i, pred[i], y_test, test_images, label)
    plt.subplot(1,2,2)
    plot_value_array(i, pred[i],  y_test)
    plt.show()

## Question 02

In [12]:
import pandas as pd
import numpy as np
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import PorterStemmer
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Bidirectional, Dense, Embedding
from tensorflow.keras.optimizers import Adam

In [13]:
#downloading NLTK stopwords
nltk.download('stopwords')

#loading datasets
fake = pd.read_csv('Fake.csv')
true = pd.read_csv('True.csv')

#adding label column to the datasets
fake['label'] = 1  # Fake news label
true['label'] = 0  # True news label

#combining datasets
data = pd.concat([fake, true], ignore_index=True)

[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/mauricebaier/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [14]:
stop_words = set(stopwords.words('english'))


def text_preprocessing(text):
    text = text.lower()
    tokens = word_tokenize(text)

    new_text = []
    ps = PorterStemmer()

    for w in tokens:
        if w not in stopwords.words('english') and len(w) > 2:
            stem_token = ps.stem(w)   
            new_text.append(stem_token)  # Add the stemmed word to new_tokens
    
    text_prep = ' '.join(new_text)  # Join stemmed tokens into a string
    return text_prep

fake['text'] = fake['text'].apply(text_preprocessing)
true['text'] = true['text'].apply(text_preprocessing)

In [None]:
#tokenizing text data
tokenizer = Tokenizer()
tokenizer.fit_on_texts(data['text'])
X_sequences = tokenizer.texts_to_sequences(data['text'])
max_len = max([len(seq) for seq in X_sequences])
X_padded = pad_sequences(X_sequences, maxlen=max_len)

#splitting data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X_padded, data['label'], test_size=0.2, random_state=42)

#defining Bi-LSTM model
model = Sequential()
model.add(Embedding(input_dim=len(tokenizer.word_index) + 1, output_dim=100))
model.add(Bidirectional(LSTM(units=64, activation='relu')))
model.add(Dense(units=1, activation='sigmoid'))

#compiling the model
model.compile(optimizer=Adam(), loss='binary_crossentropy', metrics=['accuracy'])

#training the model
model.fit(X_train, y_train, epochs=2, batch_size=64, validation_split=0.2)

#evaluating the model
loss, accuracy = model.evaluate(X_test, y_test)
print("Model Accuracy:", accuracy)