In [2]:
from IPython import get_ipython

IS_COLAB = False
if "google.colab" in str(get_ipython()):
    # Make sure to go to Runtime > Change runtime type > Hardware Accelerator: GPU
    IS_COLAB = True
    !pip install fastai --upgrade -q
    !pip freeze | grep fast

In [None]:
if IS_COLAB:
    from google.colab import drive

    drive.mount("/gdrive")
    %cd /gdrive
    %ls MyDrive/

In [None]:
import json
from pathlib import Path

import pandas as pd
from fastai.learner import load_model, save_model
from fastai.vision.all import cnn_learner
from fastai.vision.data import ImageDataLoaders
from fastai.vision.models import xresnet

ROOT_PATH = Path("..")
TRAIN = False
if IS_COLAB:
    import torch

    # Make sure that this path exists
    ROOT_PATH = Path("MyDrive/pracds_final")
    print(torch.cuda.get_device_name(0))

In [2]:
# Setup y_train, y_test
df = pd.read_csv(ROOT_PATH / "data/raw/metadata.csv")
svc_ids = pd.read_json(ROOT_PATH / "data/raw/song_vs_call.json").squeeze()
svc_df = df.loc[df.id.isin(svc_ids)].copy()

with open(ROOT_PATH / "data/processed/svc_split.json") as svc_split_file:
    svc_split = json.load(svc_split_file)
    train_ids = svc_split["train_ids"]
    test_ids = svc_split["test_ids"]

# Add response variable
type_col = svc_df.type.str.lower().str.replace(" ", "").str.split(",")
filtered_type_col = type_col.apply(lambda l: set(l) - {"call", "song"})
svc_df["label"] = type_col.apply(lambda l: "call" in l).astype(int)

y_df = svc_df.reindex(columns=["id", "pred"]).copy()
y_train, y_test = (
    y_df[y_df.id.isin(train_ids)].drop(columns=["id"]).squeeze(),
    y_df[y_df.id.isin(test_ids)].drop(columns=["id"]).squeeze(),
)

In [3]:
svc_df["name"] = svc_df.id.astype(str) + ".png"
svc_df["is_valid"] = svc_df.id.isin(test_ids)
image_df = (
    svc_df.reindex(columns=["id", "name", "label", "is_valid"]).set_index("id").copy()
)

In [6]:
# bs = 64  # Batch size
kwargs = {}
if IS_COLAB:
    kwargs["num_workers"] = 0
data = (
    # convert_mode is passed on intern|ally to the relevant function that will handle converting the images;
    # 'L' results in one color channel
    ImageDataLoaders.from_df(
        image_df,
        folder=ROOT_PATH / "data/raw/sonograms",
        valid_col="is_valid",
        # num_works needs to be set to 0 for local evaluation to turn off multiprocessing
        **kwargs
    )
)
learn = cnn_learner(data, xresnet.xresnet34)

In [None]:
# Make sure this path exists on colab
model_path = (ROOT_PATH / "models/sono_model.pth").resolve().absolute()
if IS_COLAB and TRAIN:
    # Fine tune model
    learn.fine_tune(1)
    # GDrive fails when you try to use mkdir
    # so we manually call `save_model`
    save_model(model_path, learn.model, getattr(learn, "opt", None))
    %ls -al /home
    from google.colab import files

    files.download(model_path)
else:
    load_model(model_path, learn.model, learn.opt)

In [7]:
learn.validate()
# learn.predict(ROOT_PATH / "data/raw/sonograms/1136.png")

(#1) [0.5482389330863953]