In [2]:
%reload_ext autoreload
%autoreload 2


import numpy as np 
import os 
import mlflow 
import git 
import subprocess 
from tqdm import tqdm 

import mlflow_info 
import keras
from keras.utils import np_utils
from keras.layers.core import Dense, Dropout, Activation

import warnings
warnings.filterwarnings('ignore')

%reload_ext mlflow_info

## creating a ssh-tunnel to server in the background

In [3]:
""" creating a ssh-tunnel to server in the background """
command = 'ssh -N -L 5000:localhost:5432 artinmajdi@data7-db1.cyverse.org &'
ssh_session = subprocess.Popen('exec ' + command, stdout=subprocess.PIPE, shell=True)

## Load data

In [10]:
(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()

num_pixels = x_train.shape[1] * x_train.shape[2]

x_train = x_train.reshape((x_train.shape[0], num_pixels)).astype('float32')
x_test  = x_test.reshape( (x_test.shape[0],  num_pixels)).astype('float32')

x_train = x_train[:5000] / 255
x_test  = x_test / 255

y_train = np_utils.to_categorical(y_train[:5000])
y_test  = np_utils.to_categorical(y_test)

## Architecture

In [4]:
mlflow.keras.autolog()

model = keras.models.Sequential()
model.add(Dense(512, input_shape=(784,)))
model.add(Activation('relu'))
model.add(Dropout(0.2))

model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dropout(0.2))

model.add(Dense(10))
model.add(Activation('softmax'))

## set up mlflow

In [5]:
""" MLflow settings: 
    The style we should use when running mlflow ui
        Postgres server: server = f'{dialect_driver}://{username}:{password}@{ip}/{database_name}' 
        Local:           server = "file:/Users/artinmac/Documents/Research/Data7/mlflow/mlrun_store" """

postgres_connection_type = { 'direct':    ('5432', 'data7-db1.cyverse.org'),
                            'ssh-tunnel': ('5000', 'localhost') }

port, host = postgres_connection_type['ssh-tunnel']

In [6]:
""" Setting up the artifact server """
username = 'username'
password = 'password'
database_name  = 'resbaz2021'
dialect_driver = 'postgresql'

server = '{dialect_driver}://{username}:{password}@{host}:{port}/{database_name}'

In [7]:
Artifacts = {
    'local':      'file:/{path_to_artifact_store}',
    'hpc':        'sftp://{user}:{password}@filexfer.hpc.arizona.edu:{path_to_artifact_store}',
    'atmosphere': 'sftp://{user}:{password}@{ip_address}:{path_to_artifact_store}',
    'cyverse':    'file:/{path_to_artifact_store}',
    'data7_db1':  'sftp://{user}:{password}@{ip_address}:{path_to_artifact_store}'}

artifact = Artifacts['data7_db1']

server, artifact = mlflow_info.load()

In [8]:

""" setting the trackinng uri """
mlflow.set_tracking_uri(server)

""" Creating/Setting the experiment
    Line below should be commented if the experiment is already created
    If kept commented during the first run of a new experiment, the set_experiment 
    will automatically create the new experiment with local artifact storage """

experiment_name = 'exp_mnist_live'
mlflow.create_experiment(name=experiment_name, artifact_location=artifact)
mlflow.set_experiment(experiment_name=experiment_name)

## model training

In [13]:
%%time 

# Starting the MLflow 
run = mlflow.start_run()
mlflow.set_tag(f'mlflow.note.content',f'run_id: {run.info.run_id}')


# model compiling
learning_rate = 0.001
model.compile( optimizer = keras.optimizers.Adam(learning_rate=learning_rate), 
               loss      = keras.losses.categorical_crossentropy,
               metrics   = [keras.metrics.binary_accuracy] )

# model optimization
history = model.fit(x_train, y_train, epochs=5, batch_size=100, validation_data=(x_test, y_test))

# Model evaluation
test_loss, test_acc = model.evaluate(x_test, y_test)
print('Accuracy:', test_acc) 
print('Loss: '   , test_loss)

prediction = model.predict(x_test)
predicted_classes = np.argmax(prediction, axis=1)

# Saving MLflow parameters & metrics
mlflow.log_param("epochs",          history.params['epochs'])
mlflow.log_param("steps_per_epoch", history.params['steps'])
mlflow.log_metric("accuracy",       test_acc)
mlflow.log_metric("test_loss",      test_loss)

# saving git commit hash
repo = git.Repo(search_parent_directories=True)
git_commit_hash = repo.head.object.hexsha
print('git commit hash', git_commit_hash)
mlflow.set_tag('mlflow.source.git.commit', git_commit_hash)


# ending mlflow session
mlflow.end_run()

print('process completed')

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Accuracy: 0.9875999093055725
Loss:  0.1981697827577591
git commit hash e335d6d284d4d32afb7e4a271d35c9d46c0ef5af
process completed
CPU times: user 12.5 s, sys: 2.31 s, total: 14.8 s
Wall time: 9.03 s


In [14]:
# Starting the MLflow 
parent_run = mlflow.start_run(run_name='learning rate')

for learning_rate in tqdm(np.linspace(start=0.01,stop=0.1,num=5)):
    
    learning_rate = np.floor(learning_rate*1000)/1000

    with mlflow.start_run(run_name=f'LR {learning_rate}', nested=True) as child_run:
        mlflow.set_tag(f'mlflow.note.content',f'run_id: {child_run.info.run_id}')

        # model compiling
        model.compile( optimizer = keras.optimizers.Adam(learning_rate=learning_rate), 
                        loss     = keras.losses.categorical_crossentropy,
                        metrics  = [keras.metrics.binary_accuracy] )

        # model optimization
        history = model.fit(x_train, y_train, epochs=5, batch_size=100, validation_data=(x_test, y_test),verbose=0)

        # Model evaluation
        test_loss, test_acc = model.evaluate(x_test, y_test)
        print('\nAccuracy:', test_acc) 
        print('Loss: '   , test_loss,'\n')

        prediction = model.predict(x_test)
        predicted_classes = np.argmax(prediction, axis=1)

        # Saving MLflow parameters & metrics
        mlflow.log_param("epochs",          history.params['epochs'])
        mlflow.log_param("steps_per_epoch", history.params['steps'])
        mlflow.log_metric("accuracy",       test_acc)
        mlflow.log_metric("test_loss",      test_loss)

        # saving git commit hash
        repo = git.Repo(search_parent_directories=True)
        git_commit_hash = repo.head.object.hexsha
        mlflow.set_tag('mlflow.source.git.commit', git_commit_hash)

# ending mlflow session
mlflow.end_run()

print('\nprocess completed')


Accuracy: 0.9851499199867249
Loss:  0.2834130823612213 


Accuracy: 0.9749800562858582
Loss:  0.588406503200531 


Accuracy: 0.9507598876953125
Loss:  1.1720397472381592 


Accuracy: 0.8999972939491272
Loss:  2.3092315196990967 


Accuracy: 0.8999972939491272
Loss:  2.3024988174438477 

100%|██████████| 5/5 [00:45<00:00,  9.02s/it]
process completed



## finding the optimum batch size

In [15]:
# Starting the MLflow 
parent_run = mlflow.start_run(run_name='batch size')


for batch_size in tqdm(np.linspace(start=50,stop=200,num=6)):
    batch_size = int(batch_size)

    with mlflow.start_run(run_name=f'bsize {batch_size}', nested=True) as child_run:
        mlflow.set_tag(f'mlflow.note.content',f'run_id: {child_run.info.run_id}')

        # model compiling
        model.compile( optimizer = keras.optimizers.Adam(learning_rate=0.001), 
                        loss     = keras.losses.categorical_crossentropy,
                        metrics  = [keras.metrics.binary_accuracy] )

        # model optimization
        history = model.fit(x_train, y_train, epochs=5, batch_size=batch_size, validation_data=(x_test, y_test),verbose=0)

        # Model evaluation
        test_loss, test_acc = model.evaluate(x_test, y_test)
        print('\nAccuracy:', test_acc) 
        print('Loss: '   , test_loss,'\n')

        prediction = model.predict(x_test)
        predicted_classes = np.argmax(prediction, axis=1)

        # Saving MLflow parameters & metrics
        mlflow.log_param("epochs",          history.params['epochs'])
        mlflow.log_param("batch_size",      batch_size)
        mlflow.log_param("steps_per_epoch", history.params['steps'])

        mlflow.log_metric("accuracy",       test_acc)
        mlflow.log_metric("test_loss",      test_loss)

        # saving git commit hash
        repo = git.Repo(search_parent_directories=True)
        git_commit_hash = repo.head.object.hexsha
        mlflow.set_tag('mlflow.source.git.commit', git_commit_hash)

# ending mlflow session
mlflow.end_run()

print('\nprocess completed')


Accuracy: 0.8999972939491272
Loss:  2.3017165660858154 


Accuracy: 0.8999972939491272
Loss:  2.3018269538879395 


Accuracy: 0.8999972939491272
Loss:  2.301870107650757 


Accuracy: 0.8999972939491272
Loss:  2.301859140396118 


Accuracy: 0.8999972939491272
Loss:  2.3019351959228516 


Accuracy: 0.8999972939491272
Loss:  2.3019442558288574 

100%|██████████| 6/6 [00:57<00:00,  9.54s/it]
process completed



In [17]:
old_run = mlflow.get_run(run_id='0861236387ba4d7683e589f206dff964')

In [20]:
dict(old_run).keys()

dict_keys(['data', 'info'])

In [22]:
old_run.info 

<RunInfo: artifact_uri='sftp://artinmajdi:temp2_data7_b@data7-db1.cyverse.org:/home/artinmajdi/mlflow_data/artifact_store/0861236387ba4d7683e589f206dff964/artifacts', end_time=1621630769536, experiment_id='4', lifecycle_stage='active', run_id='0861236387ba4d7683e589f206dff964', run_uuid='0861236387ba4d7683e589f206dff964', start_time=1621630758633, status='FINISHED', user_id='mohammadsmajdi'>

In [25]:
mlflow.list_run_infos(experiment_id='4')[0]

<RunInfo: artifact_uri='sftp://artinmajdi:temp2_data7_b@data7-db1.cyverse.org:/home/artinmajdi/mlflow_data/artifact_store/078e97a2e57740be87f1257f650d8c94/artifacts', end_time=1621631167831, experiment_id='4', lifecycle_stage='active', run_id='078e97a2e57740be87f1257f650d8c94', run_uuid='078e97a2e57740be87f1257f650d8c94', start_time=1621631157205, status='FINISHED', user_id='mohammadsmajdi'>

In [28]:
with mlflow.start_run(run_id='0861236387ba4d7683e589f206dff964') as run: 

    mlflow.set_tag('status','final optimized learning rate')

In [29]:
client = mlflow.tracking.MlflowClient()
client.download_artifacts(run_id='0861236387ba4d7683e589f206dff964', path='model/MLmodel', dst_path='../')


'/home/u29/mohammadsmajdi/projects/model/MLmodel'

In [31]:
source_run = mlflow.get_run(run_id='0861236387ba4d7683e589f206dff964')

mlflow.create_experiment(name='new_exp', artifact_location=artifact)
mlflow.set_experiment(experiment_name='new_exp')

mlflow.log_metrics(source_run.data.metrics)
mlflow.log_params(source_run.data.params)

file_path = client.download_artifacts(run_id='0861236387ba4d7683e589f206dff964', path='', dst_path='../')



mlflow.set_tag('mlflow.source.git.commit', repo.head.object.hexsha)
mlflow.set_tag('mlflow.source.name'      , old_run.data.tags['mlflow.source.name'])
mlflow.set_tag('mlflow.log-model.history', old_run.data.tags['mlflow.log-model.history'])


mlflow.log_artifacts(file_path + '/model')

mlflow.end_run()



KeyboardInterrupt: 

In [33]:
model = mlflow.keras.load_model(model_uri='models:/mnist_classifier/production')

In [None]:
model = mlflow.keras.load_model(model_uri='runs:/{}/model'.format(run_id),compile=False)

model = mlflow.keras.load_model(model_uri='/home/u29/mohammadsmajdi/projects/chest_xray/artifacts_optimized_model/model',compile=False)


In [None]:
model = mlflow.keras.load_model(model_uri='models:/mnist_classifier/2',compile=False)


In [35]:
ssh_session.kill()