In [1]:
from IPython import get_ipython

IS_COLAB = False
if "google.colab" in str(get_ipython()):
    # Make sure to go to Runtime > Change runtime type > Hardware Accelerator: GPU
    IS_COLAB = True
    !pip install fastai --upgrade -q
    !pip install wandb
    !pip freeze | grep fast

[K     |████████████████████████████████| 204kB 22.6MB/s 
[K     |████████████████████████████████| 61kB 6.2MB/s 
[?25hCollecting wandb
[?25l  Downloading https://files.pythonhosted.org/packages/98/5f/45439b4767334b868e1c8c35b1b0ba3747d8c21be77b79f09eed7aa3c72b/wandb-0.10.30-py2.py3-none-any.whl (1.8MB)
[K     |████████████████████████████████| 1.8MB 31.5MB/s 
Collecting configparser>=3.8.1
  Downloading https://files.pythonhosted.org/packages/fd/01/ff260a18caaf4457eb028c96eeb405c4a230ca06c8ec9c1379f813caa52e/configparser-5.0.2-py3-none-any.whl
Collecting pathtools
  Downloading https://files.pythonhosted.org/packages/e7/7f/470d6fcdf23f9f3518f6b0b76be9df16dcc8630ad409947f8be2eb0ed13a/pathtools-0.1.2.tar.gz
Collecting GitPython>=1.0.0
[?25l  Downloading https://files.pythonhosted.org/packages/27/da/6f6224fdfc47dab57881fe20c0d1bc3122be290198ba0bf26a953a045d92/GitPython-3.1.17-py3-none-any.whl (166kB)
[K     |████████████████████████████████| 174kB 47.7MB/s 
Collecting subprocess32

In [2]:
if IS_COLAB:
    from google.colab import drive

    drive.mount("/gdrive")
    %ls /gdrive/MyDrive/

Mounted at /gdrive
[0m[01;34m10605[0m/  [01;34m17731[0m/                       [01;34mjane-street-kaggle[0m/
[01;34m15688[0m/  2AxisFaults.gdraw            OLD-5.0-ab-sonogram-model.ipynb
[01;34m16440[0m/  5.0-ab-sonogram-model.ipynb  [01;34mpracds_final[0m/
[01;34m17637[0m/  [01;34mGradeCalculations[0m/


In [17]:
import json
from pathlib import Path

import pandas as pd
import wandb
from fastai.callback.wandb import WandbCallback
from fastai.learner import load_model, save_model
from fastai.metrics import accuracy
from fastai.vision.all import cnn_learner
from fastai.vision.data import ImageDataLoaders
from fastai.vision.models import xresnet

ROOT_PATH = Path("..")
TRAIN = True
if IS_COLAB:
    import torch

    # Make sure that this path exists
    ROOT_PATH = Path("/gdrive/MyDrive/pracds_final")
    print(torch.cuda.get_device_name(0))

!wandb login

Tesla T4
[34m[1mwandb[0m: Currently logged in as: [33madithyabsk[0m (use `wandb login --relogin` to force relogin)


In [4]:
# Setup y_train, y_test
df = pd.read_csv(ROOT_PATH / "data/raw/metadata.csv")
svc_ids = pd.read_json(ROOT_PATH / "data/raw/song_vs_call.json").squeeze()
svc_df = df.loc[df.id.isin(svc_ids)].copy()

with open(ROOT_PATH / "data/processed/svc_split.json") as svc_split_file:
    svc_split = json.load(svc_split_file)
    train_ids = svc_split["train_ids"]
    test_ids = svc_split["test_ids"]

# Add response variable
type_col = svc_df.type.str.lower().str.replace(" ", "").str.split(",")
filtered_type_col = type_col.apply(lambda l: set(l) - {"call", "song"})
svc_df["label"] = type_col.apply(lambda l: "call" in l).astype(int)

y_df = svc_df.reindex(columns=["id", "pred"]).copy()
y_train, y_test = (
    y_df[y_df.id.isin(train_ids)].drop(columns=["id"]).squeeze(),
    y_df[y_df.id.isin(test_ids)].drop(columns=["id"]).squeeze(),
)

In [5]:
svc_df["name"] = svc_df.id.astype(str) + ".png"
svc_df["is_valid"] = svc_df.id.isin(test_ids)
image_df = (
    svc_df.reindex(columns=["id", "name", "label", "is_valid"]).set_index("id").copy()
)

In [7]:
bs = 128  # Batch size
kwargs = {}
if IS_COLAB:
    kwargs["num_workers"] = 0
data = (
    # convert_mode is passed on intern|ally to the relevant function that will handle converting the images;
    # 'L' results in one color channel
    ImageDataLoaders.from_df(
        image_df,
        folder=ROOT_PATH / "data/raw/sonograms",
        valid_col="is_valid",
        bs=bs,
        # num_works needs to be set to 0 for local evaluation to turn off multiprocessing
        **kwargs,
    )
)
learn = cnn_learner(data, xresnet.xresnet18, pretrained=True)

In [8]:
# Make sure this path exists on colab
fname = "sono_model.pth"
model_path = (ROOT_PATH / f"models/{fname}").resolve().absolute()
if IS_COLAB and TRAIN:
    # Fine tune model
    wandb.init(project="sono-model")
    learn.fit_one_cycle(1, cbs=WandbCallback())
    # GDrive fails when you try to use mkdir
    # so we manually call `save_model`
    save_path = f"/home/{fname}"
    save_model(save_path, learn.model, getattr(learn, "opt", None))
    %ls -al /home
    from google.colab import files

    files.download(save_path)
else:
    load_model(model_path, learn.model, learn.opt)

[34m[1mwandb[0m: Currently logged in as: [33madithyabsk[0m (use `wandb login --relogin` to force relogin)


WandbCallback requires use of "SaveModelCallback" to log best model


epoch,train_loss,valid_loss,time
0,1.007792,0.67467,18:15


total 50144
drwxr-xr-x 1 root root     4096 May 18 02:27 [0m[01;34m.[0m/
drwxr-xr-x 1 root root     4096 May 18 02:07 [01;34m..[0m/
-rw-r--r-- 1 root root 51338051 May 18 02:27 sono_model.pth


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [18]:
learn.metrics = [accuracy]

In [26]:
train_metrics = learn.validate(ds_idx=0)
print(f"Train Loss: {train_metrics[0]:.4f}")
print(f"Train Accuracy: {train_metrics[1]:.4f}")

Train Loss: 0.6611
Train Accuracy: 0.6818


In [21]:
val_metrics = learn.validate()
print(f"Validation Loss: {val_metrics[0]:.4f}")
print(f"Validation Accuracy: {val_metrics[1]:.4f}")
# learn.predict(ROOT_PATH / "data/raw/sonograms/1136.png")

Validation Loss: 0.6747
Validation Accuracy: 0.6821
