In [1]:
!nvcc --version

nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2020 NVIDIA Corporation
Built on Wed_Jul_22_19:09:09_PDT_2020
Cuda compilation tools, release 11.0, V11.0.221
Build cuda_11.0_bu.TC445_37.28845127_0


In [2]:
%%bash
mkdir -p data

curl -L https://cs.famaf.unc.edu.ar/\~ccardellino/resources/diplodatos/meli-challenge-2019.tar.bz2 -o ./data/meli-challenge-2019.tar.bz2
tar jxvf ./data/meli-challenge-2019.tar.bz2 -C ./data/

curl -L https://cs.famaf.unc.edu.ar/\~ccardellino/resources/diplodatos/SBW-vectors-300-min5.txt.gz -o ./data/SBW-vectors-300-min5.txt.gz

pip install torch==1.7.1+cu101 torchvision==0.8.2+cu101 -f https://download.pytorch.org/whl/torch_stable.html
pip install gensim mlflow tqdm
pip install pyngrok --quiet

# Be sure the correct nvcc is in the path with the correct pytorch installation
export CUDA_HOME=/opt/cuda/11.0
export PATH=$CUDA_HOME/bin:$PATH
export CUDA_VISIBLE_DEVICES=0


meli-challenge-2019/
meli-challenge-2019/spanish.test.jsonl.gz
meli-challenge-2019/portuguese.validation.jsonl.gz
meli-challenge-2019/portuguese.train.jsonl.gz
meli-challenge-2019/spanish.train.jsonl.gz
meli-challenge-2019/spanish_token_to_index.json.gz
meli-challenge-2019/portuguese_token_to_index.json.gz
meli-challenge-2019/spanish.validation.jsonl.gz
meli-challenge-2019/portuguese.test.jsonl.gz
Looking in links: https://download.pytorch.org/whl/torch_stable.html
Collecting torch==1.7.1+cu101
  Downloading https://download.pytorch.org/whl/cu101/torch-1.7.1%2Bcu101-cp37-cp37m-linux_x86_64.whl (735.4MB)
Collecting torchvision==0.8.2+cu101
  Downloading https://download.pytorch.org/whl/cu101/torchvision-0.8.2%2Bcu101-cp37-cp37m-linux_x86_64.whl (12.8MB)
Installing collected packages: torch, torchvision
  Found existing installation: torch 1.8.0+cu101
    Uninstalling torch-1.8.0+cu101:
      Successfully uninstalled torch-1.8.0+cu101
  Found existing installation: torchvision 0.9.0+cu10

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0  0  945M    0 81920    0     0  50319      0  5:28:15  0:00:01  5:28:14 50288  0  945M    0 6336k    0     0  2467k      0  0:06:32  0:00:02  0:06:30 2466k  2  945M    2 24.5M    0     0  7076k      0  0:02:16  0:00:03  0:02:13 7076k  4  945M    4 42.0M    0     0  9469k      0  0:01:42  0:00:04  0:01:38 9467k  6  945M    6 60.8M    0     0  10.9M      0  0:01:26  0:00:05  0:01:21 12.4M  8  945M    8 78.7M    0     0  12.0M      0  0:01:18  0:00:06  0:01:12 15.9M 10  945M   10 96.4M    0     0  12.7M      0  0:01:14  0:00:07  0:01:07 18.0M 12  945M   12  114M    0     0  13.3M      0  0:01:10  0:00:08  0:01:02 17.9M 14  945M   14  132M    0     0  13.9M      0  0:01

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
import argparse
import gzip
import json
import logging
import mlflow
import torch
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
import random

from torch.utils.data import IterableDataset

from sklearn.metrics import balanced_accuracy_score
from torch.utils.data import DataLoader
from tqdm import tqdm, trange
from IPython import get_ipython
from pyngrok import ngrok

logging.basicConfig(
    format="%(asctime)s: %(levelname)s - %(message)s",
    level=logging.INFO
)

In [5]:
class MeliChallengeDataset(IterableDataset):
    def __init__(self,
                 dataset_path,
                 random_buffer_size=2048):
        assert random_buffer_size > 0
        self.dataset_path = dataset_path
        self.random_buffer_size = random_buffer_size

        with gzip.open(self.dataset_path, "rt") as dataset:
            item = json.loads(next(dataset).strip())
            self.n_labels = item["n_labels"]
            self.dataset_size = item["size"]

    def __len__(self):
        return self.dataset_size

    def __iter__(self):
        try:
            with gzip.open(self.dataset_path, "rt") as dataset:
                shuffle_buffer = []

                for line in dataset:
                    item = json.loads(line.strip())
                    item = {
                        "data": item["data"],
                        "target": item["target"]
                    }

                    if self.random_buffer_size == 1:
                        yield item
                    else:
                        shuffle_buffer.append(item)

                        if len(shuffle_buffer) == self.random_buffer_size:
                            random.shuffle(shuffle_buffer)
                            for item in shuffle_buffer:
                                yield item
                            shuffle_buffer = []

                if len(shuffle_buffer) > 0:
                    random.shuffle(shuffle_buffer)
                    for item in shuffle_buffer:
                        yield item
        except GeneratorExit:
            return

In [6]:
common_params = {
    'train_data': "./data/meli-challenge-2019/spanish.train.jsonl.gz",
    'token_to_index': "./data/meli-challenge-2019/spanish_token_to_index.json.gz",
    'pretrained_embeddings': "./data/SBW-vectors-300-min5.txt.gz",
    'language': "spanish",
    'test_data': None, # "./data/meli-challenge-2019/spanish.test.jsonl.gz",
    'validation_data': "./data/meli-challenge-2019/spanish.validation.jsonl.gz",
}

parametrizable_params = [
  {
    'embeddings_size': 300,
    'hidden_layers': [512, 256, 128], 
    'dropout': 0.25,
    'epochs': 3,
    'act_fun': 1,
  },

  {
    'embeddings_size': 300,
    'hidden_layers': [256, 128], 
    'dropout': 0.25,
    'epochs': 3,
    'act_fun': 2,
  },
  # {
  #   'embeddings_size': 300,
  #   'hidden_layers': [1024, 512, 256, 128], 
  #   'dropout': 0.25,
  #   'epochs': 3,
  #   'act_fun': 1,
  # },
  # {
  #   'embeddings_size': 300,
  #   'hidden_layers': [1024, 512, 256, 128], 
  #   'dropout': 0.25,
  #   'epochs': 3,
  #   'act_fun': 2,
  # },
  # {
  #   'embeddings_size': 300,
  #   'hidden_layers': [1024, 512, 256, 128], 
  #   'dropout': 0.2,
  #   'epochs': 3,
  #   'act_fun': 1,
  # },
  # {
  #   'embeddings_size': 300,
  #   'hidden_layers': [1024, 512, 256, 128], 
  #   'dropout': 0.2,
  #   'epochs': 3,
  #   'act_fun': 2,
  # }
]

# cambiar: hidden_layers = (512, 256, 128) ; (1024, 512, 256, 128)
# cambiar: dropout = 0.1; 0.2; 0.3; 0.4
# cambiar: epochs = 5, 7, 10

In [7]:
class PadSequences:
    def __init__(self, pad_value=0, max_length=None, min_length=1):
        assert max_length is None or min_length <= max_length
        self.pad_value = pad_value
        self.max_length = max_length
        self.min_length = min_length

    def __call__(self, items):
        data = [item["data"] for item in items]
        target = [item["target"] for item in items]
        seq_lengths = [len(d) for d in data]

        if self.max_length:
            max_length = self.max_length
            seq_lengths = [min(self.max_length, l) for l in seq_lengths]
        else:
            max_length = max(self.min_length, max(seq_lengths))

        data = [d[:l] + [self.pad_value] * (max_length - l)
                for d, l in zip(data, seq_lengths)]

        return {
            "data": torch.LongTensor(data),
            "target": torch.LongTensor(target)
        }

In [8]:
class MLPClassifier(nn.Module):
    # Pytorch Module
    # __init__:defines the structure of the network
    def __init__(self,
                 pretrained_embeddings_path,
                 token_to_index,
                 n_labels,
                 hidden_layers=[256, 128],
                 dropout=0.3,
                 vector_size=300,
                 act_fun=1,
                 freeze_embedings=True):
        super().__init__()
        with gzip.open(token_to_index, "rt") as fh:
            token_to_index = json.load(fh)
        embeddings_matrix = torch.randn(len(token_to_index), vector_size)
        embeddings_matrix[0] = torch.zeros(vector_size)
        with gzip.open(pretrained_embeddings_path, "rt") as fh:
            next(fh)
            for line in fh:
                word, vector = line.strip().split(None, 1)
                if word in token_to_index:
                    embeddings_matrix[token_to_index[word]] =\
                        torch.FloatTensor([float(n) for n in vector.split()])
        self.embeddings = nn.Embedding.from_pretrained(embeddings_matrix,
                                                       freeze=freeze_embedings,
                                                       padding_idx=0)
        ## Hidden layers definitions
        ############################
        ## https://pytorch.org/docs/stable/generated/torch.nn.Linear.html
        self.hidden_layers = [
            nn.Linear(vector_size, hidden_layers[0]) # first layer
        ]
        for input_size, output_size in zip(hidden_layers[:-1], hidden_layers[1:]):
            self.hidden_layers.append(
                nn.Linear(input_size, output_size) # intermediate layers if hidden_layers´s size > 2
            )
        self.dropout = dropout # percentage of disabled neurons
        self.hidden_layers = nn.ModuleList(self.hidden_layers) #  last layer
        self.output = nn.Linear(hidden_layers[-1], n_labels) 
        self.vector_size = vector_size
        self.act_fun = act_fun


    ############################
    # forward: defines how the network layers interact
    def forward(self, x):
        x = self.embeddings(x)
        x = torch.mean(x, dim=1)
        for layer in self.hidden_layers:
            if self.act_fun == 1:
                x = F.relu(layer(x))
            if self.act_fun == 2:
                x = F.celu(layer(x))
            if self.dropout:
                x = F.dropout(x, self.dropout)
        x = self.output(x)
        return x

In [9]:
# class CNN(nn.Module):
#     def __init__(self, 
#                  pretrained_embeddings_path, 
#                  token_to_index,             
#                  n_labels,
#                  vector_size,
#                  FILTERS_COUNT,
#                  FILTERS_LENGTH,
#                  act_fun,
#                  freeze_embedings):
#         super().__init__()
#         with gzip.open(token_to_index, "rt") as fh:
#             token_to_index = json.load(fh)
#         embeddings_matrix = torch.randn(len(token_to_index), vector_size)
#         embeddings_matrix[0] = torch.zeros(vector_size)
#         with gzip.open(pretrained_embeddings_path, "rt") as fh:
#             next(fh)
#             for line in fh:
#                 word, vector = line.strip().split(None, 1)
#                 if word in token_to_index:
#                     embeddings_matrix[token_to_index[word]] =\
#                         torch.FloatTensor([float(n) for n in vector.split()])
#         self.embeddings = nn.Embedding.from_pretrained(embeddings_matrix,
#                                                        freeze=freeze_embedings,
#                                                        padding_idx=0)
#         self.FILTERS_COUNT = FILTERS_COUNT
#         self.FILTERS_LENGTH = FILTERS_LENGTH
#         self.act_fun = act_fun
#         self.convs = []
#         for filter_lenght in self.FILTERS_LENGTH:
#             self.convs.append(
#                 nn.Conv1d(vector_size, self.FILTERS_COUNT, filter_lenght)
#             )
#         self.convs = nn.ModuleList(self.convs)
#         self.fc = nn.Linear(self.FILTERS_COUNT * len(self.FILTERS_LENGTH), 128)
#         self.output = nn.Linear(128, n_labels)
#         self.vector_size = vector_size
    
#     @staticmethod
#     def conv_global_max_pool(x, conv):
#         return F.relu(conv(x).transpose(1, 2).max(1)[0])
    
#     def forward(self, x):
#         x = self.embeddings(x).transpose(1, 2)
#         x = [self.conv_global_max_pool(x, conv) for conv in self.convs]
#         x = torch.cat(x, dim=1)
#         if self.act_fun == 1:
#             x = F.relu(self.fc(x))
#         if self.act_fun == 2:
#             x = F.celu(self.fc(x))
#         # cambiar: x = F.hardsigmoid(layer(x)); F.celu(layer(x)); ; F.leaky_relu(layer(x))         
#         x = self.output(x)
#         return x

In [10]:
pad_sequences = PadSequences(
    pad_value=0,
    max_length=None,
    min_length=1
)

logging.info("Building training dataset")
# An iterable Dataset.
# All datasets that represent an iterable of data samples should subclass it. 
# Such form of datasets is particularly useful when data come from a stream.
# All subclasses should overwrite __iter__(), which would return an iterator of samples in this dataset.
train_dataset = MeliChallengeDataset(
    dataset_path=common_params.get('train_data'),
    random_buffer_size=2048  # This can be a hypterparameter
)
train_loader = DataLoader(
    train_dataset,              # dataset from which to load the data.
    batch_size=128,             # This can be a hyperparameter # how many samples per batch to load (default: ``1``).
    shuffle=False,              # set to ``True`` to have the data reshuffled at every epoch (default: ``False``).
    collate_fn=pad_sequences,   # merges a list of samples to form a mini-batch of Tensor(s).  Used when using batched loading from a map-style dataset.
    drop_last=False,             # set to ``True`` to drop the last incomplete batch, if the dataset size is not divisible by the batch size. 
                                # If ``False`` and the size of dataset is not divisible by the batch size, then the last batch
                                # will be smaller. (default: ``False``)
    num_workers=2             # how many subprocesses to use for data loading. ``0`` means that the data will be loaded in the main process. (default: ``0``)
)

if common_params.get('validation_data'):
     logging.info("Building validation dataset")
     validation_dataset = MeliChallengeDataset(
         dataset_path=common_params.get('validation_data'),
         random_buffer_size=1
     )
     validation_loader = DataLoader(
         validation_dataset,
         batch_size=128,
         shuffle=False,
         collate_fn=pad_sequences,
         drop_last=False
     )
else:
     validation_dataset = None
     validation_loader = None

if common_params.get('test_data'):
     logging.info("Building test dataset")
     test_dataset = MeliChallengeDataset(
         dataset_path=common_params.get('test_data'),
         random_buffer_size=1
     )
     test_loader = DataLoader(
         test_dataset,
         batch_size=128,
         shuffle=False,
         collate_fn=pad_sequences,
         drop_last=False
     )
else:
    test_dataset = None
    test_loader = None


2021-03-21 23:17:49,688: INFO - Building training dataset
2021-03-21 23:17:49,696: INFO - Building validation dataset


## Iterando params


In [11]:
for params in parametrizable_params:
  mlflow.set_experiment(f"diplodatos.{common_params.get('language')}")
  with mlflow.start_run():
    logging.info("Starting experiment")
    # Log all relevent hyperparameters
    mlflow.log_params({
      "model_type": "Multilayer Perceptron",
      "embeddings": common_params.get('pretrained_embeddings'),
      **params
    })
    device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
    logging.info("Building classifier")
    model = MLPClassifier(
        pretrained_embeddings_path=common_params.get('pretrained_embeddings'),
        token_to_index=common_params.get('token_to_index'),
        n_labels=train_dataset.n_labels,
        hidden_layers=params.get('hidden_layers'),
        dropout=params.get('dropout'),
        vector_size=params.get('embeddings_size'),
        act_fun=params.get('act_fun'),
        freeze_embedings=True  # This can be a hyperparameter
    )
    model = model.to(device)
    # loss function
    # https://pytorch.org/docs/stable/generated/torch.nn.CrossEntropyLoss.html
    loss = nn.CrossEntropyLoss()        
    # optimizer algorithm
    # https://pytorch.org/docs/stable/optim.html
    # cambiar: lr; weight_decay; momentum
    optimizer = optim.Adam(
        model.parameters(),
        lr=1e-3,           # This can be a hyperparameter
        weight_decay=1e-5  # This can be a hyperparameter # weight for L2 regularization
        # momentum=        # This can be a hyperparameter
    )

    logging.info("Training classifier")
    for epoch in trange(params.get('epochs')):
        model.train()
        running_loss = []
        for idx, batch in enumerate(tqdm(train_loader, position=0, leave=True)):
            # set to zero the parameter gradients
            optimizer.zero_grad()
            # get the inputs; data and target
            data = batch["data"].to(device)
            target = batch["target"].to(device)
            # forward + backward + optimize
            output = model(data) # MLPClassifier
            loss_value = loss(output, target)
            loss_value.backward()
            optimizer.step()
            # statistics
            running_loss.append(loss_value.item())
        mlflow.log_metric("train_loss", sum(running_loss) / len(running_loss), epoch)

        if validation_dataset:
            logging.info("Evaluating model on validation")
            model.eval()
            running_loss = []
            targets = []
            predictions = []
            with torch.no_grad():
                for batch in tqdm(validation_loader, position=0, leave=True):
                    data = batch["data"].to(device)
                    target = batch["target"].to(device)
                    output = model(data)
                    running_loss.append(
                        loss(output, target).item()
                    )
                    targets.extend(batch["target"].numpy())
                    predictions.extend(output.argmax(axis=1).detach().cpu().numpy())
                mlflow.log_metric("validation_loss", sum(running_loss) / len(running_loss), epoch)
                mlflow.log_metric("validation_bacc", balanced_accuracy_score(targets, predictions), epoch)

    if test_dataset:
        logging.info("Evaluating model on test")
        model.eval()
        running_loss = []
        targets = []
        predictions = []
        with torch.no_grad():
            for batch in tqdm(test_loader, position=0, leave=True):
                data = batch["data"].to(device)
                target = batch["target"].to(device)
                output = model(data)
                running_loss.append(
                    loss(output, target).item()
                )
                targets.extend(batch["target"].numpy())
                predictions.extend(output.argmax(axis=1).detach().cpu().numpy())
            mlflow.log_metric("test_loss", sum(running_loss) / len(running_loss), epoch)
            mlflow.log_metric("test_bacc", balanced_accuracy_score(targets, predictions), epoch)

2021-03-21 23:17:51,968: INFO - Starting experiment
2021-03-21 23:17:52,037: INFO - Building classifier


INFO: 'diplodatos.spanish' does not exist. Creating a new experiment


2021-03-21 23:18:23,917: INFO - Training classifier
76490it [06:43, 189.42it/s]
2021-03-21 23:25:07,734: INFO - Evaluating model on validation
100%|██████████| 9562/9562 [00:19<00:00, 482.66it/s]
76490it [06:56, 183.68it/s]
2021-03-21 23:32:25,684: INFO - Evaluating model on validation
100%|██████████| 9562/9562 [00:19<00:00, 487.08it/s]
76490it [06:55, 183.97it/s]
2021-03-21 23:39:42,972: INFO - Evaluating model on validation
100%|██████████| 9562/9562 [00:19<00:00, 483.39it/s]
100%|██████████| 3/3 [21:40<00:00, 433.56s/it]
2021-03-21 23:40:04,632: INFO - Starting experiment
2021-03-21 23:40:04,637: INFO - Building classifier
2021-03-21 23:40:29,232: INFO - Training classifier
76490it [06:18, 202.05it/s]
2021-03-21 23:46:47,819: INFO - Evaluating model on validation
100%|██████████| 9562/9562 [00:18<00:00, 505.82it/s]
76490it [06:18, 202.30it/s]
2021-03-21 23:53:26,710: INFO - Evaluating model on validation
100%|██████████| 9562/9562 [00:18<00:00, 513.25it/s]
76490it [06:15, 203.62it/

In [None]:
# for params in parametrizable_params:
#   mlflow.set_experiment(f"diplodatos.{common_params.get('language')}")
#   with mlflow.start_run():
#     logging.info("Starting experiment")
#     # Log all relevent hyperparameters
#     mlflow.log_params({
#       "model_type": "Convolutional Neural Network",
#       "embeddings": common_params.get('pretrained_embeddings'),
#       **params
#     })
#     device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

#     logging.info("Building classifier")
#     model = CNN(
#         pretrained_embeddings_path=common_params.get('pretrained_embeddings'),
#         token_to_index=common_params.get('token_to_index'),
#         n_labels=train_dataset.n_labels,
#         vector_size=params.get('embeddings_size'),
#         FILTERS_COUNT=params.get('FILTERS_COUNT'),
#         FILTERS_LENGTH=params.get('FILTERS_LENGTH'),
#         act_fun=params.get('act_fun'),
#         freeze_embedings=True  # This can be a hyperparameter
#     )
     
#     model = model.to(device)
#     # loss function
#     # https://pytorch.org/docs/stable/generated/torch.nn.CrossEntropyLoss.html
#     loss = nn.CrossEntropyLoss()        
#     # optimizer algorithm
#     # https://pytorch.org/docs/stable/optim.html
#     # cambiar: lr; weight_decay; momentum
#     optimizer = optim.Adam(
#         model.parameters(),
#         lr=1e-3,           # This can be a hyperparameter
#         weight_decay=1e-5  # This can be a hyperparameter # weight for L2 regularization
#         # momentum=        # This can be a hyperparameter
#     )

#     logging.info("Training classifier")
#     for epoch in trange(params.get('epochs')):
#         model.train()
#         running_loss = []
#         for idx, batch in enumerate(tqdm(train_loader, position=0, leave=True)):
#             # set to zero the parameter gradients
#             optimizer.zero_grad()
#             # get the inputs; data and target
#             data = batch["data"].to(device)
#             target = batch["target"].to(device)
#             # forward + backward + optimize
#             output = model(data) # MLPClassifier
#             loss_value = loss(output, target)
#             loss_value.backward()
#             optimizer.step()
#             # statistics
#             running_loss.append(loss_value.item())
#         mlflow.log_metric("train_loss", sum(running_loss) / len(running_loss), epoch)

#         if validation_dataset:
#             logging.info("Evaluating model on validation")
#             model.eval()
#             running_loss = []
#             targets = []
#             predictions = []
#             with torch.no_grad():
#                 for batch in tqdm(validation_loader, position=0, leave=True):
#                     data = batch["data"].to(device)
#                     target = batch["target"].to(device)
#                     output = model(data)
#                     running_loss.append(
#                         loss(output, target).item()
#                     )
#                     targets.extend(batch["target"].numpy())
#                     predictions.extend(output.argmax(axis=1).detach().cpu().numpy())
#                 mlflow.log_metric("validation_loss", sum(running_loss) / len(running_loss), epoch)
#                 mlflow.log_metric("validation_bacc", balanced_accuracy_score(targets, predictions), epoch)

#     if test_dataset:
#         logging.info("Evaluating model on test")
#         model.eval()
#         running_loss = []
#         targets = []
#         predictions = []
#         with torch.no_grad():
#             for batch in tqdm(test_loader, position=0, leave=True):
#                 data = batch["data"].to(device)
#                 target = batch["target"].to(device)
#                 output = model(data)
#                 running_loss.append(
#                     loss(output, target).item()
#                 )
#                 targets.extend(batch["target"].numpy())
#                 predictions.extend(output.argmax(axis=1).detach().cpu().numpy())
#             mlflow.log_metric("test_loss", sum(running_loss) / len(running_loss), epoch)
#             mlflow.log_metric("test_bacc", balanced_accuracy_score(targets, predictions), epoch)

In [12]:
    # run tracking UI in the background
    get_ipython().system_raw("mlflow ui --port 5000 &") # run tracking UI in the background


    # create remote tunnel using ngrok.com to allow local port access
    # borrowed from https://colab.research.google.com/github/alfozan/MLflow-GBRT-demo/blob/master/MLflow-GBRT-demo.ipynb#scrollTo=4h3bKHMYUIG6


    # Terminate open tunnels if exist
    ngrok.kill()

    # Setting the authtoken (optional)
    # Get your authtoken from https://dashboard.ngrok.com/auth
    NGROK_AUTH_TOKEN = ""
    ngrok.set_auth_token(NGROK_AUTH_TOKEN)

    # Open an HTTPs tunnel on port 5000 for http://localhost:5000
    ngrok_tunnel = ngrok.connect(addr="5000", proto="http", bind_tls=True)
    print("MLflow Tracking UI:", ngrok_tunnel.public_url)



2021-03-22 00:00:30,704: INFO - Updating authtoken for default "config_path" of "ngrok_path": /usr/local/lib/python3.7/dist-packages/pyngrok/bin/ngrok
2021-03-22 00:00:30,804: INFO - Opening tunnel named: http-5000-6a910677-6269-4ce6-9bc3-8d6f4e656ddd
2021-03-22 00:00:30,900: INFO - t=2021-03-22T00:00:30+0000 lvl=info msg="no configuration paths supplied"
2021-03-22 00:00:30,902: INFO - t=2021-03-22T00:00:30+0000 lvl=info msg="using configuration at default config path" path=/root/.ngrok2/ngrok.yml
2021-03-22 00:00:30,903: INFO - t=2021-03-22T00:00:30+0000 lvl=info msg="open config file" path=/root/.ngrok2/ngrok.yml err=nil
2021-03-22 00:00:30,906: INFO - t=2021-03-22T00:00:30+0000 lvl=info msg="starting web service" obj=web addr=127.0.0.1:4040
2021-03-22 00:00:30,990: INFO - t=2021-03-22T00:00:30+0000 lvl=info msg="tunnel session started" obj=tunnels.session
2021-03-22 00:00:30,991: INFO - t=2021-03-22T00:00:30+0000 lvl=info msg="client session established" obj=csess id=b4a8805c27c9
2

MLflow Tracking UI: https://6f6b22605d2f.ngrok.io


2021-03-22 00:00:31,028: INFO - t=2021-03-22T00:00:31+0000 lvl=info msg=end pg=/api/tunnels id=71dcb82747045b16 status=201 dur=19.902363ms


In [13]:
!zip -r ./mlruns_mlp.zip ./mlruns
from google.colab import files
# files.download("./mlruns.zip")
!cp ./mlruns_mlp.zip ./drive/MyDrive

  adding: mlruns/ (stored 0%)
  adding: mlruns/.trash/ (stored 0%)
  adding: mlruns/1/ (stored 0%)
  adding: mlruns/1/69f1c2fbcdb54e108e898baa14069b5a/ (stored 0%)
  adding: mlruns/1/69f1c2fbcdb54e108e898baa14069b5a/metrics/ (stored 0%)
  adding: mlruns/1/69f1c2fbcdb54e108e898baa14069b5a/metrics/validation_bacc (deflated 35%)
  adding: mlruns/1/69f1c2fbcdb54e108e898baa14069b5a/metrics/train_loss (deflated 32%)
  adding: mlruns/1/69f1c2fbcdb54e108e898baa14069b5a/metrics/validation_loss (deflated 35%)
  adding: mlruns/1/69f1c2fbcdb54e108e898baa14069b5a/artifacts/ (stored 0%)
  adding: mlruns/1/69f1c2fbcdb54e108e898baa14069b5a/tags/ (stored 0%)
  adding: mlruns/1/69f1c2fbcdb54e108e898baa14069b5a/tags/mlflow.source.type (stored 0%)
  adding: mlruns/1/69f1c2fbcdb54e108e898baa14069b5a/tags/mlflow.user (stored 0%)
  adding: mlruns/1/69f1c2fbcdb54e108e898baa14069b5a/tags/mlflow.source.name (stored 0%)
  adding: mlruns/1/69f1c2fbcdb54e108e898baa14069b5a/meta.yaml (deflated 44%)
  adding: mlruns