<a href="https://colab.research.google.com/github/caroheymes/mlflow_colab/blob/master/minist_mlflow.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Context**

This notebook tries to use mlfow in google colab. MLflow is an open source platform to manage the ML lifecycle. It provides tools to conduct experiments, manage models for reproducibility, as well as mlflow ui for logging, and visualization. 

# Digits prediction with keras

In [1]:
from __future__ import print_function
import argparse

import cloudpickle
import tensorflow as tf
import keras
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras import utils as np_utils
from keras import backend as K
# parser = argparse.ArgumentParser(description='Train a Keras CNN model for MNIST classification in PyTorch')
# parser.add_argument('--batch-size', '-b', type=int, default=128)
# parser.add_argument('--epochs', '-e', type=int, default=4)
# args = parser.parse_args()
batch_size = 128
epochs = 2
num_classes = 10
# input image dimensions
img_rows, img_cols = 28, 28
# the data, split between train and test sets
(x_train, y_train), (x_test, y_test) = mnist.load_data()
if K.image_data_format() == 'channels_first':
    x_train = x_train.reshape(x_train.shape[0],1, img_rows, img_cols)
    x_test = x_test.reshape(x_test.shape[0],1, img_rows, img_cols)
    input_shape = (1, img_rows, img_cols)
else:
    x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
    x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
    input_shape = (img_rows, img_cols, 1)
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')
# convert class vectors to binary class matrices
y_train = keras.utils.np_utils.to_categorical(y_train, num_classes)
y_test = keras.utils.np_utils.to_categorical(y_test, num_classes)
model = Sequential()
model.add(Conv2D(64, (3, 3), activation='relu', input_shape=input_shape))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(64, kernel_size=(3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dense(num_classes, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(x_train, y_train,
          batch_size=batch_size,
          epochs=epochs,
          verbose=1,
          validation_data=(x_test, y_test))
score = model.evaluate(x_test, y_test, verbose=0)


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
x_train shape: (60000, 28, 28, 1)
60000 train samples
10000 test samples
Epoch 1/2
Epoch 2/2


In [2]:
score

[0.034771766513586044, 0.988099992275238]

ESSAI AVEC https://github.com/dmatrix/google-colab/blob/master/mlflow_issue_2350.ipynb

# ML Flow set_up

In [3]:
!pip install mlflow --quiet

[K     |████████████████████████████████| 16.9 MB 87 kB/s 
[K     |████████████████████████████████| 1.1 MB 42.8 MB/s 
[K     |████████████████████████████████| 146 kB 55.6 MB/s 
[K     |████████████████████████████████| 58 kB 5.7 MB/s 
[K     |████████████████████████████████| 79 kB 6.9 MB/s 
[K     |████████████████████████████████| 180 kB 58.9 MB/s 
[K     |████████████████████████████████| 596 kB 58.5 MB/s 
[K     |████████████████████████████████| 75 kB 4.5 MB/s 
[K     |████████████████████████████████| 52 kB 1.5 MB/s 
[K     |████████████████████████████████| 63 kB 1.8 MB/s 
[?25h  Building wheel for alembic (setup.py) ... [?25l[?25hdone
  Building wheel for databricks-cli (setup.py) ... [?25l[?25hdone


In [4]:
import mlflow
print(mlflow.__version__)

1.21.0


In [5]:
import mlflow
import mlflow.keras
import mlflow.pyfunc
from mlflow.pyfunc import PythonModel
from mlflow.utils.file_utils import TempDir
from mlflow.utils.environment import _mlflow_conda_env

In [6]:

# run tracking UI in the background
get_ipython().system_raw("mlflow ui --port 5000 &")# run tracking UI in the background

In [7]:
with mlflow.start_run():
  mlflow.log_metric("cross_entropy_test_loss", score[0])
  mlflow.log_metric("test_accuracy", score[1])
  print('Test loss:', score[0])
  print('Test accuracy:', score[1])

  mlflow.keras.log_model(model, artifact_path="keras-model")

Test loss: 0.034771766513586044
Test accuracy: 0.988099992275238
INFO:tensorflow:Assets written to: /tmp/tmpx4l1x92b/model/data/model/assets


# Pyngrok set-up

In [8]:
!pip install pyngrok==4.1.1 --quiet

Collecting pyngrok==4.1.1
  Downloading pyngrok-4.1.1.tar.gz (18 kB)
Building wheels for collected packages: pyngrok
  Building wheel for pyngrok (setup.py) ... [?25l[?25hdone
  Created wheel for pyngrok: filename=pyngrok-4.1.1-py3-none-any.whl size=15984 sha256=86e5be90a567a88f2cbc955dd2f664bb1f9f1a7efa8a5f97e5847b306296a0c2
  Stored in directory: /root/.cache/pip/wheels/b1/d9/12/045a042fee3127dc40ba6f5df2798aa2df38c414bf533ca765
Successfully built pyngrok
Installing collected packages: pyngrok
Successfully installed pyngrok-4.1.1


In [9]:

from pyngrok import ngrok
import os

# Terminate open tunnels if exist
ngrok.kill()

# Setting the authtoken (optional)
# Get your authtoken from https://dashboard.ngrok.com/auth
with open('/content/drive/MyDrive/ngroktoken') as f:
    NGROK_AUTH_TOKEN = f.read()

ngrok.set_auth_token(NGROK_AUTH_TOKEN)

# Open an HTTPs tunnel on port 5000 for http://localhost:5000
public_url = ngrok.connect(port="5000", proto="http", options={'region': 'eu',"bind_tls": True, })
print("MLflow Tracking UI:", public_url)

MLflow Tracking UI: http://3005-35-227-191-167.ngrok.io


# Mlflow  avec DagsHub

In [10]:
import mlflow
import os
from getpass import getpass

In [27]:


# os.environ['MLFLOW_TRACKING_USERNAME'] = input('Enter your DAGsHub username: ')
# os.environ['MLFLOW_TRACKING_PROJECTNAME'] = input('Enter your DAGsHub project name: ')
# os.environ['MLFLOW_TRACKING_PASSWORD'] = getpass('Enter your DAGsHub access token: ')

with open('/content/drive/MyDrive/11_01_mlflow/username_dagshub') as f:
    MLFLOW_TRACKING_USERNAME = f.read()
with open('/content/drive/MyDrive/11_01_mlflow/token_dagshub') as f:
    MLFLOW_TRACKING_PASSWORD = f.read()
with open('/content/drive/MyDrive/dags_project_name') as f:
    MLFLOW_TRACKING_PROJECTNAME = f.read()  

url = 'https://dagshub.com/' + MLFLOW_TRACKING_USERNAME + '/' + MLFLOW_TRACKING_PROJECTNAME + "/experiments/#/"

print(url)

mlflow.set_tracking_uri(f'url')

with mlflow.start_run(run_name="MLflow on Colab"):

    mlflow.log_metric("cross_entropy_test_loss", score[0])
    mlflow.log_metric("test_accuracy", score[1])
    print('Test loss:', score[0])
    print('Test accuracy:', score[1])
    mlflow.keras.log_model(model, artifact_path="keras-model")

https://dagshub.com/caroline/minist/experiments/#/
Test loss: 0.034771766513586044
Test accuracy: 0.988099992275238
INFO:tensorflow:Assets written to: /tmp/tmp45dpiud1/model/data/model/assets


In [28]:
import IPython
display(IPython.display.IFrame(url,'100%',600))
#https://dagshub.com/caroline/minist/experiments/#/

In [None]:
#A adapter

%%bash
curl --silent --show-error 'http://localhost:5000/invocations' -H 'Content-Type: application/json' -d '{
    "columns": ["season", "year", "month", "hour_of_day", "is_holiday", "weekday", "is_workingday", "weather_situation", "temperature", "feels_like_temperature", "humidity", "windspeed"],
    "data": x_train[0]
}'

In [29]:
#pour itérer sur différents paramètres regarder
# https://colab.research.google.com/github/alfozan/mlflow/blob/master/examples/sklearn_elasticnet_wine/train.ipynb
# https://colab.research.google.com/github/alfozan/mlflow-example/blob/master/MLflow-example-notebook.ipynb#scrollTo=4h3bKHMYUIG6