In [3]:
import tensorflow as tf
from keras.datasets import imdb
from tensorflow.keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Embedding, GlobalAveragePooling1D, Dense
from keras.regularizers import l2
from sklearn.metrics import classification_report
import mlflow
import numpy as np

mlflow.set_tracking_uri("http://localhost:8001")
mlflow.autolog()

# Definicja hiperparametrów
num_words = 5000  # Liczba słów w naszym słowniku
maxlen = 200  # Maksymalna długość tekstu
embedding_dim = 16 # Wielkość wektora embeddingu (hiperparametr modelu)

dataset = imdb.load_data(num_words = num_words) # Pobieramy dane z datasetu IMDB
(x_train, y_train), (x_test, y_test) = dataset

# Robimy padding komentarzy tak, aby wszystekie miały tę sama długość
x_train = pad_sequences(x_train, maxlen=maxlen)
x_test = pad_sequences(x_test, maxlen=maxlen)

# Rozpoczęcie eksperymentu MLFlow
mlflow.set_experiment("IMDB Sentiment Analysis")

## Tworzymy model sieci neuronowej z jedn warstwa ukryta z 16 wezlami (taki mamy rozmiar embeddingu)
def build_keras_model(input_dim, output_dim):
    model = Sequential()
    # Używamy tutaj regularyzacji L2, aby model nam nie overfitował
    model.add(Embedding(input_dim = input_dim, output_dim = output_dim, embeddings_regularizer=l2(0.01)))
    model.add(GlobalAveragePooling1D())
    model.add(Dense(1, activation='sigmoid'))
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

model = build_keras_model(num_words, embedding_dim)
history = model.fit(x_train, y_train, epochs=10, batch_size=128, validation_data=(x_test, y_test))

# Predict the sentiment on the test dataset
y_pred = (model.predict(x_test) > 0.5).astype("int32")

# Print classification report
print(classification_report(y_test, y_pred, target_names=['Negative', 'Positive']))

mlflow.end_run()

2025/02/22 20:50:33 INFO mlflow.tracking.fluent: Autologging successfully enabled for keras.
2025/02/22 20:50:33 INFO mlflow.tracking.fluent: Autologging successfully enabled for sklearn.
2025/02/22 20:50:33 INFO mlflow.tracking.fluent: Autologging successfully enabled for tensorflow.
2025/02/22 20:50:33 INFO mlflow.tracking.fluent: Autologging successfully enabled for pyspark.
2025/02/22 20:50:36 INFO mlflow.utils.autologging_utils: Created MLflow autologging run with ID '65e7c96f28ee46178065d66b0df083b2', which will track hyperparameters, performance metrics, model artifacts, and lineage information for the current tensorflow workflow


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
INFO:tensorflow:Assets written to: C:\Users\Urban\AppData\Local\Temp\tmp786bck7x\model\data\model\assets
🏃 View run clumsy-fox-844 at: http://localhost:8001/#/experiments/984236064121418142/runs/65e7c96f28ee46178065d66b0df083b2
🧪 View experiment at: http://localhost:8001/#/experiments/984236064121418142
              precision    recall  f1-score   support

    Negative       0.59      0.56      0.57     12500
    Positive       0.58      0.60      0.59     12500

    accuracy                           0.58     25000
   macro avg       0.58      0.58      0.58     25000
weighted avg       0.58      0.58      0.58     25000

