pascalvoc: resize original images

Strategy:
1. Read: Images RGB / Masks RGB (original dataset)
2. Pad
3. Save: Images RGB / Masks RGB

In [None]:
padsize = (512,512)

environment

In [None]:
using Pkg
envpath = expanduser("~/envs/d11/")
Pkg.activate(envpath)

In [None]:
# libs
using Images
using DataFrames
using CSV
using FLoops
using MLUtils: splitobs, randobs
using Random

In [None]:
# private libs
using PreprocessingImages; p=PreprocessingImages

In [None]:
# output folders
workpath = pwd() * "/"
workpath = replace(workpath, homedir() => "~")

resizedTrainsetPath = workpath * "images/resized/train/"
resizedValidsetPath = workpath * "images/resized/valid/"
resizedTestsetPath  = workpath * "images/resized/test/"
mkpath(expanduser(resizedTrainsetPath))
mkpath(expanduser(resizedValidsetPath))
mkpath(expanduser(resizedTestsetPath))

input dataset

In [None]:
fpfn = expanduser(workpath) * "dftrain.csv"
dftrain = CSV.read(fpfn, DataFrame)
dftrain = dftrain[dftrain.segmented.==1, :]

fpfn = expanduser(workpath) * "dfvalid.csv"
dfvalid = CSV.read(fpfn, DataFrame)
dfvalid = dfvalid[dfvalid.segmented.==1, :]

fpfn = expanduser(workpath) * "dftest.csv"
dftest = CSV.read(fpfn, DataFrame)
dftest = dftest[dftest.segmented.==1, :];

size(dftrain), size(dfvalid), size(dftest)

In [None]:
first(dftrain,3)

In [None]:
# create datasets with resized images
dftrain_resized = copy(dftrain)
dfvalid_resized = copy(dfvalid)
dftest_resized = copy(dftest);

In [None]:
@. dftrain_resized.X = resizedTrainsetPath * basename(dftrain_resized.X)
@. dfvalid_resized.X = resizedValidsetPath * basename(dfvalid_resized.X)
@. dftest_resized.X  = resizedTestsetPath * basename(dftest_resized.X)

@. dftrain_resized.y = resizedTrainsetPath * basename(dftrain_resized.y)
@. dfvalid_resized.y = resizedValidsetPath * basename(dfvalid_resized.y)
@. dftest_resized.y  = resizedTestsetPath * basename(dftest_resized.y)

first(dftrain_resized,3)

resize

In [None]:
dfs_in  = [dftrain, dfvalid, dftest]
dfs_out = [dftrain_resized, dfvalid_resized, dftest_resized]

# resize
for (df_in, df_out) in zip(dfs_in, dfs_out)
    N = size(df_in, 1)

    # images
    @floop for i in 1:N
        # resize all images
        local fpfn_in = expanduser(df_in.X[i])
        img = Images.load(fpfn_in)                       # read
        img = p.center_padview(img, padsize) .|> RGB     # pad
        local fpfn_out = expanduser(df_out.X[i])
        Images.save(fpfn_out, img)                       # save
    end

    # masks
    @floop for i in 1:N
        local fpfn_in = expanduser(df_in.y[i])
        img = Images.load(fpfn_in)                       # read
        img = p.center_padview(img, padsize) .|> Gray    # pad
        local fpfn_out = expanduser(df_out.y[i])
        Images.save(fpfn_out, img)                       # save
    end
end

In [None]:
size(dftrain_resized), size(dfvalid_resized), size(dftest_resized)

In [None]:
# save dataframes
fpfn = expanduser(workpath) * "dftrain-resized.csv"
CSV.write(fpfn, dftrain_resized)
fpfn = expanduser(workpath) * "dfvalid-resized.csv"
CSV.write(fpfn, dfvalid_resized)
fpfn = expanduser(workpath) * "dftest-resized.csv"
CSV.write(fpfn, dftest_resized)

In [None]:
# checkpoint
df = CSV.read(expanduser(workpath) * "dftrain-resized.csv", DataFrame)
k = rand(1:size(df, 1))
img = Images.load(expanduser(df.X[k]))
mask = Images.load(expanduser(df.y[k]))
mosaicview([img, mask]; nrow=1)