<a href="https://colab.research.google.com/github/abdulsam/MLOps_wandb/blob/main/Improving_baseline_Model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Install dependencies (run once)
!wget https://raw.githubusercontent.com/wandb/edu/main/mlops-001/lesson1/requirements.txt
!wget https://raw.githubusercontent.com/wandb/edu/main/mlops-001/lesson2/params.py
!wget https://raw.githubusercontent.com/wandb/edu/main/mlops-001/lesson2/utils.py
!pip install -r requirements.txt

--2023-08-04 06:50:50--  https://raw.githubusercontent.com/wandb/edu/main/mlops-001/lesson1/requirements.txt
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.111.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.111.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 82 [text/plain]
Saving to: ‘requirements.txt’


2023-08-04 06:50:50 (3.82 MB/s) - ‘requirements.txt’ saved [82/82]

--2023-08-04 06:50:50--  https://raw.githubusercontent.com/wandb/edu/main/mlops-001/lesson2/params.py
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 305 [text/plain]
Saving to: ‘params.py’


2023-08-04 06:50:50 (18.2 MB/s) - ‘params.py’ saved [305/305]

--2023-0

# Refactor of Baseline Model
<!--- @wandbcode{course-lesson2} -->

The same code as in the previous notebook, but refactored into a small functions. This will be useful for the hyperparameter tuning.

In [2]:
import wandb
import pandas as pd
import torchvision.models as tvmodels
from fastai.vision.all import *
from fastai.callback.wandb import WandbCallback

import params
from utils import get_predictions, create_iou_table, MIOU, BackgroundIOU, \
                  RoadIOU, TrafficLightIOU, TrafficSignIOU, PersonIOU, VehicleIOU, BicycleIOU

In [3]:
train_config = SimpleNamespace(
    framework="fastai",
    img_size=(180, 320),
    batch_size=8,
    augment=True, # use data augmentation
    epochs=10,
    lr=2e-3,
    arch="resnet18",
    pretrained=True,  # whether to use pretrained encoder
    seed=42,
    log_preds=True,
)

In [12]:
def download_data():
    print("At download")
    processed_data_at = wandb.use_artifact(f'{params.PROCESSED_DATA_AT}:latest')
    processed_dataset_dir = Path(processed_data_at.download())
    print("Downloaded")
    return processed_dataset_dir

In [13]:
def label_func(fname):
    return (fname.parent.parent/"labels")/f"{fname.stem}_mask.png"

In [14]:
def get_df(processed_dataset_dir, is_test=False):
    print("get_df")
    df = pd.read_csv(processed_dataset_dir / 'data_split.csv')

    if not is_test:
        df = df[df.Stage != 'test'].reset_index(drop=True)
        df['is_valid'] = df.Stage == 'valid'
    else:
        df = df[df.Stage == 'test'].reset_index(drop=True)


    # assign paths
    df["image_fname"] = [processed_dataset_dir/f'images/{f}' for f in df.File_Name.values]
    df["label_fname"] = [label_func(f) for f in df.image_fname.values]
    print("returning get_df")
    return df

In [15]:
def get_data(df, bs=4, img_size=(180, 320), augment=True):
    print("In get data")
    block = DataBlock(blocks=(ImageBlock, MaskBlock(codes=params.BDD_CLASSES)),
                  get_x=ColReader("image_fname"),
                  get_y=ColReader("label_fname"),
                  splitter=ColSplitter(),
                  item_tfms=Resize(img_size),
                  batch_tfms=aug_transforms() if augment else None,
                 )
    print("got the data")
    return block.dataloaders(df, bs=bs)

In [16]:
def log_predictions(learn):
    "Log a Table with model predictions"
    samples, outputs, predictions = get_predictions(learn)
    table = create_iou_table(samples, outputs, predictions, params.BDD_CLASSES)
    wandb.log({"pred_table":table})

In [17]:
def log_final_metrics(learn):
    scores = learn.validate()
    metric_names = ['final_loss'] + [f'final_{x.name}' for x in learn.metrics]
    final_results = {metric_names[i] : scores[i] for i in range(len(scores))}
    for k,v in final_results.items():
        wandb.summary[k] = v

In [18]:
def train(config):
    set_seed(config.seed, reproducible=True)
    run = wandb.init(project=params.WANDB_PROJECT, job_type="training", config=config)
    print("Project Initialized")

    config = wandb.config

    processed_dataset_dir = download_data()
    df = get_df(processed_dataset_dir)

    dls = get_data(df, bs=config.batch_size, img_size=config.img_size, augment=config.augment)

    metrics = [MIOU(), BackgroundIOU(), RoadIOU(), TrafficLightIOU(), \
               TrafficSignIOU(), PersonIOU(), VehicleIOU(), BicycleIOU()]

    learn = unet_learner(dls, arch=getattr(tvmodels, config.arch), pretrained=config.pretrained, metrics=metrics)

    callbacks = [
        SaveModelCallback(monitor='miou'),
        WandbCallback(log_preds=False, log_model=True)
    ]

    learn.fit_one_cycle(config.epochs, config.lr, cbs=callbacks)

    if config.log_preds:
        log_predictions(learn)

    log_final_metrics(learn)

    wandb.finish()

## Run the training

In [19]:
train(train_config)

Project Initialized
At download


[34m[1mwandb[0m: Downloading large artifact bdd_simple_1k_split:latest, 813.25MB. 4010 files... 
[34m[1mwandb[0m:   4010 of 4010 files downloaded.  
Done. 0:0:43.8


Downloaded
get_df
returning get_df
In get data
got the data


Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 171MB/s]


epoch,train_loss,valid_loss,miou,background_iou,road_iou,traffic_light_iou,traffic_sign_iou,person_iou,vehicle_iou,bicycle_iou,time
0,0.5035,0.364788,0.302899,0.856045,0.682933,0.0,0.0,0.0,0.581317,0.0,00:44
1,0.44154,0.456096,0.223625,0.804077,0.752603,0.0,0.0,0.0,0.008696,0.0,00:40
2,0.341107,0.31037,0.335007,0.889018,0.781805,0.0,0.0,0.0,0.674224,0.0,00:40
3,0.308571,0.294588,0.335348,0.888502,0.773587,0.0,0.0,0.0,0.685345,0.0,00:40
4,0.284449,0.29517,0.337692,0.890041,0.753821,0.0,0.0,0.0,0.719981,0.0,00:41
5,0.244119,0.291671,0.347504,0.902776,0.799168,0.0,0.0,0.0,0.730585,0.0,00:42
6,0.226735,0.267549,0.35669,0.909136,0.822833,0.019914,0.0,0.0,0.744949,0.0,00:42
7,0.206693,0.256822,0.360256,0.910406,0.823629,0.038498,0.0,0.0,0.749256,0.0,00:42
8,0.19184,0.244747,0.367153,0.912149,0.82566,0.075743,0.0,0.0,0.756516,0.0,00:42
9,0.179307,0.243743,0.365229,0.912416,0.826369,0.063608,0.0,0.0,0.754213,0.0,00:42


Better model found at epoch 0 with miou value: 0.30289944952858733.
Better model found at epoch 2 with miou value: 0.33500671276999844.
Better model found at epoch 3 with miou value: 0.3353476620971991.
Better model found at epoch 4 with miou value: 0.33769184654925394.
Better model found at epoch 5 with miou value: 0.3475041149280384.
Better model found at epoch 6 with miou value: 0.3566903117209734.
Better model found at epoch 7 with miou value: 0.36025550714110466.
Better model found at epoch 8 with miou value: 0.3671525157565793.


[34m[1mwandb[0m: [32m[41mERROR[0m Control-C detected -- Run data was not synced


In [None]:
WANDB_PROJECT = "MLOps"
ENTITY = 'None'
BDD_CLASSES = {i:c for i,c in enumerate(['background', 'road', 'traffic light', 'traffic sign', 'person', 'vehicle', 'bicycle'])}
RAW_DATA_AT = 'abdussamad/MLOps/bdd_simple_1k'
PROCESSED_DATA_AT = 'abdussamad/MLOps/bdd_simple_1k_split'