In [10]:
# Importiere die notwendigen Bibliotheken
from keras.preprocessing.text import Tokenizer
from keras.utils import pad_sequences
from keras.models import Sequential
from keras.metrics import Precision, Recall, AUC
from keras.layers import Dense, Embedding, LSTM
from sklearn.model_selection import train_test_split
from keras.utils.np_utils import to_categorical
import pandas as pd
import matplotlib.pyplot as plt


In [2]:
# Lade Datensatz
df = pd.read_csv('../data/processed/processed.csv', delimiter=',', encoding='utf-8')


In [5]:
# Konvertiere alle Werte in der Spalte 'Text' in Strings
df['Text'] = df['Text'].astype(str)

# Bereite Textdaten vor
max_features = 2000
tokenizer = Tokenizer(num_words=max_features, split=' ')
tokenizer.fit_on_texts(df['Text'].values)
X = tokenizer.texts_to_sequences(df['Text'].values)
X = pad_sequences(X)
y = df['Populism'].values


In [11]:
# Baue das LSTM-Modell
embed_dim = 128
lstm_out = 196

model = Sequential()
model.add(Embedding(max_features, embed_dim,input_length = X.shape[1]))
model.add(LSTM(lstm_out, dropout=0.2, recurrent_dropout=0.2))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy', 
                       Precision(), 
                       Recall(), 
                       AUC()])

print(model.summary())

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_2 (Embedding)     (None, 1243, 128)         256000    
                                                                 
 lstm_2 (LSTM)               (None, 196)               254800    
                                                                 
 dense_2 (Dense)             (None, 1)                 197       
                                                                 
Total params: 510,997
Trainable params: 510,997
Non-trainable params: 0
_________________________________________________________________
None


In [7]:
# Split data into 70% train and 30% temp test
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=42)

# Split temp test into 20% validation and 10% test
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=1/3, random_state=42)

In [13]:
# Trainiere das Modell
batch_size = 8
history = model.fit(X_train, y_train, epochs = 20, batch_size=batch_size, verbose = 2)

Epoch 1/20


KeyboardInterrupt: 

In [None]:
# Plotten Sie die Trainingsgenauigkeit
plt.figure(figsize=(12, 6))
plt.plot(history.history['accuracy'])
plt.title('Model accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train'], loc='upper left')
plt.show()

# Plotten Sie den Trainingsverlust
plt.figure(figsize=(12, 6))
plt.plot(history.history['loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train'], loc='upper left')
plt.show()

In [None]:
# Plotten Sie den Trainings-Recall
plt.figure(figsize=(12, 6))
plt.plot(history.history['recall'])
plt.title('Model recall')
plt.ylabel('Recall')
plt.xlabel('Epoch')
plt.legend(['Train'], loc='upper left')
plt.show()

# Plotten Sie die Trainings-Precision
plt.figure(figsize=(12, 6))
plt.plot(history.history['precision'])
plt.title('Model precision')
plt.ylabel('Precision')
plt.xlabel('Epoch')
plt.legend(['Train'], loc='upper left')
plt.show()

# Plotten Sie die Trainings-AUC
plt.figure(figsize=(12, 6))
plt.plot(history.history['auc'])
plt.title('Model AUC')
plt.ylabel('AUC')
plt.xlabel('Epoch')
plt.legend(['Train'], loc='upper left')
plt.show()
