In [13]:
from mlflow.tracking import MlflowClient
import mlflow.keras
import pickle

from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [14]:
# Set the experiment name (or ID)
experiment_name = "Default"  # Replace with your experiment name

# Initialize MLflow client
client = MlflowClient()

# Get the experiment
experiment = client.get_experiment_by_name(experiment_name)
experiment_id = experiment.experiment_id

# Get all runs in the experiment
runs = client.search_runs(experiment_ids=experiment_id)

# Find the best run based on test accuracy
best_run = None
best_accuracy = -float("inf")

for run in runs:
    metrics = run.data.metrics
    if 'test_accuracy' in metrics and metrics['test_accuracy'] > best_accuracy:
        best_accuracy = metrics['test_accuracy']
        best_run = run

if best_run:
    print(f"Best run ID: {best_run.info.run_id}")
    print(f"Best test accuracy: {best_accuracy}")
    print("Best parameters:")
    for param in best_run.data.params:
        print(f"  {param}: {best_run.data.params[param]}")
else:
    print("No runs found.")

Best run ID: d57c3e774625460da4363f633dc8a0c4
Best test accuracy: 0.9430780410766602
Best parameters:
  optimizer_global_clipnorm: None
  initial_epoch: 0
  optimizer_beta_2: 0.999
  optimizer_use_ema: False
  sample_weight: None
  validation_freq: 1
  batch_size: 32
  optimizer_name: adam
  steps_per_epoch: None
  optimizer_ema_momentum: 0.99
  optimizer_clipnorm: None
  validation_steps: None
  embedding_dim: 256
  optimizer_learning_rate: 0.0010000000474974513
  validation_batch_size: None
  lstm_units: 128
  class_weight: None
  shuffle: True
  optimizer_weight_decay: None
  optimizer_clipvalue: None
  optimizer_loss_scale_factor: None
  validation_split: 0.0
  epochs: 10
  dropout_rate: 0.3
  optimizer_epsilon: 1e-07
  optimizer_amsgrad: False
  optimizer_ema_overwrite_frequency: None
  optimizer_beta_1: 0.9
  optimizer_gradient_accumulation_steps: None


In [15]:
# Load the best model
best_model = mlflow.keras.load_model(f"runs:/{best_run.info.run_id}/model")

with open('tokenizer_info.pickle', 'rb') as handle:
    tokenizer_info = pickle.load(handle)

tokenizer = tokenizer_info['tokenizer']
max_sequence_length = tokenizer_info['max_sequence_length']

In [18]:
# Function to preprocess input name
def preprocess_name(name, tokenizer, max_sequence_length):
    sequence = tokenizer.texts_to_sequences([name])
    padded_sequence = pad_sequences(sequence, maxlen=max_sequence_length)
    return padded_sequence

# Example name to predict
name_to_predict = "Martín"
preprocessed_name = preprocess_name(name_to_predict, tokenizer, max_sequence_length)

# Make a prediction
prediction = best_model.predict(preprocessed_name)

# Interpret the prediction
gender = "Female" if prediction[0] > 0.5 else "Male"
print(f"The predicted gender for the name '{name_to_predict}' is {gender}.")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step
The predicted gender for the name 'Martín' is Male.
