In [2]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
import torch
import matplotlib.pyplot as plt
from tqdm import tqdm_notebook
from sklearn.metrics import accuracy_score, mean_squared_error
from statistics import mean
import mlflow

In [3]:
penguins = pd.read_csv("https://github.com/allisonhorst/palmerpenguins/raw/5b5891f01b52ae26ad8cb9755ec93672f49328a8/data/penguins_size.csv")
penguins.dropna(inplace=True)

attributes = penguins.iloc[:,2:6]
scaler = StandardScaler()
num = scaler.fit_transform(attributes)
data_n = pd.DataFrame(num, columns=['culmen_length_mm','culmen_depth_mm','flipper_length_mm','body_mass_g'])

species = penguins.iloc[:, 0]
labelencoder = LabelEncoder()
cat = labelencoder.fit_transform(species)
enc_species = pd.DataFrame(cat, columns=['species_short'])

enc_species = pd.get_dummies(species)


#Tensor Of all the numerical values, lenghth etc. 
X_tensor = torch.tensor(data_n.astype(np.float32).values)

#Tensor of the encoded categorical variable
y_tensor = torch.tensor(enc_species.astype(np.float32).values)

In [7]:
penguins.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 334 entries, 0 to 343
Data columns (total 7 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   species_short      334 non-null    object 
 1   island             334 non-null    object 
 2   culmen_length_mm   334 non-null    float64
 3   culmen_depth_mm    334 non-null    float64
 4   flipper_length_mm  334 non-null    float64
 5   body_mass_g        334 non-null    float64
 6   sex                334 non-null    object 
dtypes: float64(4), object(3)
memory usage: 20.9+ KB


In [9]:
penguins['species_short'].unique()

array(['Adelie', 'Chinstrap', 'Gentoo'], dtype=object)

t=2023-04-29T18:50:03+0200 lvl=eror msg="heartbeat timeout, terminating session" obj=csess id=565898f20a5d clientid=b2cef244e4351a9c02b06439ad3f1d01
t=2023-04-29T18:50:03+0200 lvl=eror msg="session closed, starting reconnect loop" obj=csess id=692f90eca2ac err="session closed"
t=2023-04-29T19:25:43+0200 lvl=eror msg="session closed, starting reconnect loop" obj=csess id=692f90eca2ac err="read tcp [2a09:5e40:3054:5a0:21cb:868f:db18:2198]:49202->[2600:1f16:d83:1201::6e74:4]:443: read: connection reset by peer"
t=2023-04-29T19:41:22+0200 lvl=eror msg="session closed, starting reconnect loop" obj=csess id=692f90eca2ac err="read tcp [2a09:5e40:3054:5a0:21cb:868f:db18:2198]:49428->[2600:1f16:d83:1200::6e74:3]:443: read: connection reset by peer"
t=2023-04-29T19:52:52+0200 lvl=eror msg="heartbeat timeout, terminating session" obj=csess id=2d4cc25359fb clientid=b2cef244e4351a9c02b06439ad3f1d01
t=2023-04-29T20:07:57+0200 lvl=eror msg="failed to reconnect session" obj=csess id=692f90eca2ac err="

In [4]:
mlflow.set_experiment("Penguins ANN")
experiment_id = mlflow.get_experiment_by_name('Penguins ANN').experiment_id

2023/04/29 17:35:37 INFO mlflow.tracking.fluent: Experiment with name 'Penguins ANN' does not exist. Creating a new experiment.


In [5]:
with mlflow.start_run(run_name="run with artifact", nested=True, experiment_id=experiment_id):
  # Your machine learning code here
  # Initializing Hyperparameters
  epochs = 50
  learning_rate = 0.001
  loss_set = {}
  # 1. Creating a FeedForwardNetwork
  model_net4 = torch.nn.Sequential(
                            # Input layer
                            torch.nn.Linear(4,8),
                            torch.nn.ReLU(),
                            # 1. Hidden layer
                            torch.nn.Linear(8,16),
                            torch.nn.ReLU(),
                            # 2. Hidden layer
                            torch.nn.Linear(16,32),
                            torch.nn.ReLU(),
                            # 3. Hidden layer
                            torch.nn.Linear(32,64),
                            torch.nn.ReLU(),
                            # Output layer
                            torch.nn.Linear(64,3),
                            torch.nn.Softmax()  # We have a multiclass single-label classification problem, since a penguin can only be from one species
                          );
  loss_cel = torch.nn.CrossEntropyLoss()
  optimizer = torch.optim.RMSprop(model_net4.parameters(), lr=learning_rate)

  # Loop over the number of epochs
  for i in tqdm_notebook(range(epochs), total=epochs, unit="epoch"):
    lost_set_epoch = {}
    j = 0
    for x, y_t in zip(X_tensor, y_tensor):
        output = model_net4.forward(x.float())
        # 2. FeedForward Evaluation
        loss = loss_cel(output, y_t.float())
        optimizer.zero_grad();

        # 3. Backward / Gradient Calculation
        loss.backward()
        # 4. Back Propagation
        optimizer.step()
        # Store the loss for each sample of data
        lost_set_epoch[j] = loss
        j = j + 1
    
    # Store the loss for each epoch
    loss_set[i] = torch.mean(torch.stack(list(lost_set_epoch.values()))).detach().numpy()

    # Display the loss after every 10 epochs
    if (i % 10)==0:
      print (f"Loss: {loss_set[i]}")

  mlflow.log_param("Learning rate", learning_rate)
  mlflow.log_param("Num. of epochs", epochs)

  from pandas.core.groupby.groupby import OutputFrameOrSeries
  with torch.no_grad():
    n_correct = 0
    n_samples = len(y_tensor)

    outputs = model_net4.forward(X_tensor)
    for idx, output in enumerate(outputs):
      pred = np.argmax(output).item()
      true_val = np.argmax(y_tensor[idx]).item()
      
      if pred == true_val:
        n_correct += 1

    acc = 100 * (n_correct / n_samples)

  mlflow.log_metric("accuracy", acc)
  params_path = "model.pt"
  #/Users/yasminesarraj/Documents/GitHub/MLOPS-assignments/Assignment_4
  torch.save(model_net4.state_dict(), params_path)
  mlflow.log_artifact(params_path)
  
  # End the current run
  mlflow.end_run()

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for i in tqdm_notebook(range(epochs), total=epochs, unit="epoch"):


  0%|          | 0/50 [00:00<?, ?epoch/s]

  input = module(input)


Loss: 0.7510645389556885
Loss: 0.5580784678459167
Loss: 0.5578799843788147
Loss: 0.5574740767478943
Loss: 0.560384213924408


In [6]:
# run tracking UI in the background
get_ipython().system_raw("mlflow ui --port 5002 &")
## Step 3 - Installing pyngrok for remote tunnel access using ngrock.com

from pyngrok import ngrok
from getpass import getpass
# Terminate open tunnels if any exist
ngrok.kill()
## Step 4 - Login on ngrok.com and get your authtoken from https://dashboard.ngrok.com/auth
# Enter your auth token when the code is running
NGROK_AUTH_TOKEN = getpass('Enter the ngrok authtoken: ')
ngrok.set_auth_token(NGROK_AUTH_TOKEN)
ngrok_tunnel = ngrok.connect(addr="5002", proto="http", bind_tls=True)
print("MLflow Tracking UI:", ngrok_tunnel.public_url)

[2023-04-29 17:36:12 +0200] [37589] [INFO] Starting gunicorn 20.1.0
[2023-04-29 17:36:12 +0200] [37589] [ERROR] Connection in use: ('127.0.0.1', 5002)
[2023-04-29 17:36:12 +0200] [37589] [ERROR] Retrying in 1 second.
t=2023-04-29T17:36:13+0200 lvl=warn msg="ngrok config file found at legacy location, move to XDG location" xdg_path="/Users/yasminesarraj/Library/Application Support/ngrok/ngrok.yml" legacy_path=/Users/yasminesarraj/.ngrok2/ngrok.yml
[2023-04-29 17:36:13 +0200] [37589] [ERROR] Connection in use: ('127.0.0.1', 5002)
[2023-04-29 17:36:13 +0200] [37589] [ERROR] Retrying in 1 second.


MLflow Tracking UI: https://ee05-2a09-5e40-3054-5a0-21cb-868f-db18-2198.ngrok-free.app


[2023-04-29 17:36:14 +0200] [37589] [ERROR] Connection in use: ('127.0.0.1', 5002)
[2023-04-29 17:36:14 +0200] [37589] [ERROR] Retrying in 1 second.
[2023-04-29 17:36:15 +0200] [37589] [ERROR] Connection in use: ('127.0.0.1', 5002)
[2023-04-29 17:36:15 +0200] [37589] [ERROR] Retrying in 1 second.
[2023-04-29 17:36:16 +0200] [37589] [ERROR] Connection in use: ('127.0.0.1', 5002)
[2023-04-29 17:36:16 +0200] [37589] [ERROR] Retrying in 1 second.
[2023-04-29 17:36:17 +0200] [37589] [ERROR] Can't connect to ('127.0.0.1', 5002)
Running the mlflow server failed. Please see the logs above for details.
