In [None]:
from fastai.vision.all import *
from torchsummary import summary

In [None]:
# Using device 2 to avoid other users on the server
torch.cuda.set_device(0)
default_device()

# Data Processing

1. Copy files from local box folder to the server (and convert HEIC)

```bash
# From local machine
mogrify -monitor -format jpg FrankOrFrary/**/*.HEIC
find FrankOrFrary -name "*.HEIC" -print0 | xargs -0 rm -rf
rsync -aivP FrankOrFrary dgx01:/data/cs152/
```

2. Convert file types (e.g., HEIC)

```bash
!/usr/bin/env bash

# Set options for recurisve glob
shopt -s globstar nullglob

RAW_DATASET_PATH=/data/cs152/FrankOrFrary/raw
PROCESSED_DATASET_PATH=/data/cs152/FrankOrFrary/processed

find $RAW_DATASET_PATH -name ".DS_Store" -type f -delete

# Loop over all images in the raw dataset
for image_to_convert in "$RAW_DATASET_PATH"/**/*; do
    # Skip directories
    if [[ -d "$image_to_convert" ]]; then continue; fi

    # Get the file name and replace extension with jpg
    image_name=$(basename "$image_to_convert")
    image_name="${image_name%.*}.jpg"

    # Ensure the output directory exists
    parent_name=$(basename "$(dirname "$image_to_convert")")
    mkdir -p "$PROCESSED_DATASET_PATH"/"$parent_name"

    # Create the new image name
    image_name="$PROCESSED_DATASET_PATH"/"$parent_name"/"$image_name"

    # Create the new image if it doesn't exist
    if [[ -f "$image_name" ]]; then
        echo "$image_name" already exists
    else
        echo "Creating $image_name"
        convert "$image_to_convert" -strip -thumbnail '1000>' -format jpg "$image_name"
    fi
done

```


Some issues:

```text
convert: Invalid SOS parameters for sequential JPEG `/data/cs152/FrankOrFrary/raw/Frary/Alan-Frary-15.jpg' @ warning/jpeg.c/JPEGWarningHandler/403.

convert: no decode delegate for this image format `HEIC' @ error/constitute.c/ReadImage/746.
convert: no images defined `/data/cs152/FrankOrFrary/processed/Frary/Aldo-frary-03.jpg' @ error/convert.c/ConvertImageCommand/3342.
```

In [None]:
path = Path("./CampusBuildingsProcessed")
path.ls()

In [None]:
dls = ImageDataLoaders.from_folder(path, valid_pct=0.2, item_tfms=Resize(224), num_workers=16)
dls.show_batch()
# file <filename> on invalid files

In [None]:
print("Dataset classes:", dls.vocab)
print("Validation dataset size:", len(dls.valid_ds))
print("Training dataset size:", len(dls.train_ds))

In [None]:
learn = vision_learner(dls, resnet34, metrics=accuracy)
summary(learn.model);

In [None]:
learn.lr_find()

In [None]:
learn.fine_tune(4, 5e-3)

In [None]:
learn.show_results()

In [None]:
# interp = Interpretation.from_learner(learn)
interp = ClassificationInterpretation.from_learner(learn)

In [None]:
interp.plot_top_losses(9, figsize=(10, 10))

In [None]:
interp.plot_confusion_matrix(figsize=(10, 10))

In [None]:
learn.export("./CampusClassifier.pkl")

In [None]:
!mv ./CampusBuildingsProcessed/CampusClassifier.pkl .

In [None]:
!ls