In [None]:
using Pkg
envpath = expanduser("~/envs/d11/")
Pkg.activate(envpath)

# libs
using Images
using DataFrames
using CSV
using Random
using MLUtils: splitobs, randobs
using FLoops

# private libs
using CocoTools
using PreprocessingImages; const p=PreprocessingImages

In [None]:
# output folders
workpath = pwd() * "/"
workpath = replace(workpath, homedir() => "~")

In [None]:
dftrain_master = CSV.read(expanduser(workpath) * "dftrain.csv", DataFrame)
dftest = CSV.read(expanduser(workpath) * "dftest.csv", DataFrame)
size(dftrain_master), size(dftest)

In [None]:
# # debug
# dftrain = dftrain[1:10, :]
# dftest = dftest[1:10, :];

In [None]:
classnames = ["cat", "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe",]
classes = map(x -> coco_classnames[x], classnames)
classes'

In [None]:
function check_elements(a, b)
    for element in a
        if element in b
            return true
        end
    end
    return false
end

In [None]:
# search masks for ALL classes of interest
dftrain_master_classes = dftrain_master
dftest_classes = dftest
dfs = [dftrain_master_classes, dftest_classes]

for df in dfs
    r = size(df,1)
    coi = zeros(Bool, r)
    cow = zeros(Bool, r)

    @floop for (i,row) in enumerate(eachrow(df))
        fpfn = expanduser(row.y)
        mask = load(fpfn)
        g2i_mask = p.gray2Int32(mask)
        coi[i] = check_elements(classes, g2i_mask)
        cow[i] = coco_classnames["cow"] in g2i_mask
    end

    df[!, Symbol("coi")] = coi
    df[!, Symbol("cow")] = cow
end

In [None]:
# split dftrain_master_classes into train and valid
df = dftrain_master_classes[dftrain_master_classes.coi, :]
Random.seed!(1234)   ### NEVER CHANGE THIS SEED
indtrain, indvalid = splitobs(1:size(df,1), at=0.8, shuffle=true)
dftrain_coi = df[indtrain,:]
dfvalid_coi = df[indvalid,:]
dftrain_cow = dftrain_coi[dftrain_coi.cow, :]
dfvalid_cow = dfvalid_coi[dfvalid_coi.cow, :]

size(dftrain_coi), size(dfvalid_coi), size(dftrain_cow), size(dfvalid_cow)

In [None]:
# create negative classes for balancing classification
df_nocoi = dftrain_master_classes[.!dftrain_master_classes.coi, :]
Ntrain_coi = size(dftrain_coi, 1)
Nvalid_coi = size(dfvalid_coi, 1)
dftrain_nocoi = randobs(df_nocoi, Ntrain_coi)
dfvalid_nocoi = randobs(df_nocoi, Nvalid_coi)

Ntrain_cow = size(dftrain_cow, 1)
Nvalid_cow = size(dfvalid_cow, 1)
dftrain_nocow = randobs(dftrain_nocoi, Ntrain_cow)
dfvalid_nocow = randobs(dfvalid_nocoi, Nvalid_cow)

size(dftrain_nocoi), size(dfvalid_nocoi), size(dftrain_nocow), size(dfvalid_nocow)

In [None]:
dftrain_coi_balanced = vcat(dftrain_coi, dftrain_nocoi)
Random.seed!(1234)   ### NEVER CHANGE THIS SEED
dftrain_coi_balanced = dftrain_coi_balanced[shuffle(1:end), :]
size(dftrain_coi_balanced)

In [None]:
dfvalid_coi_balanced = vcat(dfvalid_coi, dfvalid_nocoi)
Random.seed!(1234)   ### NEVER CHANGE THIS SEED
dfvalid_coi_balanced = dfvalid_coi_balanced[shuffle(1:end), :]
size(dfvalid_coi_balanced)

In [None]:
dftrain_cow_balanced = vcat(dftrain_cow, dftrain_nocow)
Random.seed!(1234)   ### NEVER CHANGE THIS SEED
dftrain_cow_balanced = dftrain_cow_balanced[shuffle(1:end), :]
size(dftrain_cow_balanced)

In [None]:
dfvalid_cow_balanced = vcat(dfvalid_cow, dfvalid_nocow)
Random.seed!(1234)   ### NEVER CHANGE THIS SEED
dfvalid_cow_balanced = dfvalid_cow_balanced[shuffle(1:end), :]
size(dfvalid_cow_balanced)

In [None]:
first(dftrain_coi_balanced,3)

In [None]:
# checkpoint
df = dftrain_coi_balanced[dftrain_coi_balanced.coi, :]

k = rand(1:size(df,1))
fpfn_img = expanduser(df.X[k])
fpfn_mask = expanduser(df.y[k])

img = Images.load(fpfn_img)
img = imresize(img, ratio=1/2)

mask = Images.load(fpfn_mask)
mask = imresize(mask, ratio=1/2)

mosaicview(img, mask; nrow=1)

In [None]:
# checkpoint
df = dftrain_cow_balanced[dftrain_cow_balanced.cow, :]

k = rand(1:size(df,1))
fpfn_img = expanduser(df.X[k])
fpfn_mask = expanduser(df.y[k])

img = Images.load(fpfn_img)
img = imresize(img, ratio=1/2)

mask = Images.load(fpfn_mask)
mask = imresize(mask, ratio=1/2)

mosaicview(img, mask; nrow=1)

In [None]:
# save data frames
CSV.write(expanduser(workpath) * "dftrain-coi-balanced.csv", dftrain_coi_balanced)
CSV.write(expanduser(workpath) * "dftrain-cow-balanced.csv", dftrain_cow_balanced)

CSV.write(expanduser(workpath) * "dfvalid-coi-balanced.csv", dfvalid_coi_balanced)
CSV.write(expanduser(workpath) * "dfvalid-cow-balanced.csv", dfvalid_cow_balanced)

CSV.write(expanduser(workpath) * "dftest-classes.csv", dftest_classes)