# Fish Image Processor

Process and clea files to make them suitable for the GAN

## Exploring the data

In [None]:
import numpy as np  
import pandas as pd 
import matplotlib.pyplot as plt
import os
import glob
import math
import random
import time
import datetime
import shutil
import imageio
from tqdm import tqdm, tqdm_notebook

#from dataclasses import dataclass
from pathlib import Path
import warnings
#from scipy import linalg

#import xml.etree.ElementTree as ET 

import cv2
from PIL import Image

print(os.listdir("./"))

In [None]:
WebFishDIR='./GrabbedFromWeb/'
WebFishDIRback='./GrabbedFromWebBack/'
NewLabeledFishDIR='./NewLabeledFish/'
ProcessedFishDIR='./ProcessedFish/'
ProcessedFishDIR='./CustomFish/'

In [None]:
def read_image(src):
    img = cv2.imread(src)
    if img is None:
        print(src)
        raise FileNotFoundError
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    return img

def write_image(img,filename):
    img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
    cv2.imwrite(filename, img)

# Combine and edit files

Convert all files to a 64x64 fish image

In [None]:
def getNewFishFileName(idVal):
    defaultHead = 'D0000'
    curname = str(idVal)
    newString = defaultHead[:5-len(curname)]+curname+'.jpg'
    return newString

In [None]:
# custom padding routine to expand edges in a way that is less prone to introducing noisy 
# behaviors that the GAN could target, such as sharp vertical and horizontal lines
def padbycounter(line,count):
    if count == 0:
        return line
    if count < 0:
        line = np.concatenate((cv2.flip(line[1:-count+1],0),line[:count]))
    elif count > 0:
        line = np.concatenate((line[count:],cv2.flip(line[-count-1:-1],0)))
    return line

def custpad(img, padding, ldown, FADETOAV = True, RANDOMDRIFT = True, RNDRANGE = 4):
    flipped = False
    #for fading, drift to this average
    paddown = cv2.flip(padding, 0)[1:]
    while len(paddown) < ldown:
        if flipped:
            paddown = np.concatenate((paddown,cv2.flip(padding, 0)[1:]))
        else:
            paddown = np.concatenate((paddown,padding))
        flipped = not flipped
    paddown = paddown[:ldown]
    if FADETOAV:
        padav = (np.round(np.sum(img[-1], axis=0)/len(img[-1]))).astype(int)
        for i in range(len(paddown)):
            paddown[i] = np.round(paddown[i]*1.*(ldown - i)/ldown + 1.*padav*i/ldown).astype(int)
    if RANDOMDRIFT:
        counter = 0
        for i in range(len(paddown)):
            counter += (random.randint(-RNDRANGE, RNDRANGE))
            paddown[i] = padbycounter(paddown[i],counter)
    return np.concatenate((img, paddown))
        
def imageSculptor(img, PIXELGAP = 10):
    x = len(img[0])
    y = len(img)
    flipping = x < y
    if flipping:
        img = cv2.rotate(img, cv2.ROTATE_90_CLOCKWISE)
    p = abs(x-y)/2
    ldown = int(np.floor(p))
    downpad = img[-PIXELGAP:]
    #print("{} {} {}".format(x,y,p))
    img = custpad(img, downpad, ldown)
    lup = int(np.ceil(p))
    img = cv2.flip(img, 0)
    uppad = img[-PIXELGAP:]
    img = cv2.flip(custpad(img, uppad, lup), 0)
    if flipping:
        img = cv2.rotate(img, cv2.ROTATE_90_COUNTERCLOCKWISE)
    return img

In [None]:
plt.imshow(imageSculptor(img))

In [None]:
def fixAspectAndScale(img,SIZE = 128): #,inter = cv2.INTER_AREA
    # pad edges with replicate to fix, then scale
    replicate = imageSculptor(img)
    #Note:  designing a more sophisticated treatment for this to remove stark vertical or horizontal lines
    #replicate = cv2.copyMakeBorder(img,yu,yb,xl,xr,cv2.BORDER_REPLICATE)
    if len(replicate) > SIZE:
        resized = cv2.resize(replicate,(SIZE,SIZE),interpolation = cv2.INTER_CUBIC)
    else:
        resized = cv2.resize(replicate,(SIZE,SIZE),interpolation = cv2.INTER_CUBIC)
    return resized

In [None]:
AllWebFiles = os.listdir(WebFishDIR)
AllNLFFiles = os.listdir(NewLabeledFishDIR)
imgID=0

In [None]:
aspectRatioSpread = []
for file in AllWebFiles:
    filename = WebFishDIR+file
    img = read_image(filename)
    aspectRatioSpread.append((1.*len(img[0]))/(1.*len(img)))
for file in AllNLFFiles:
    filename = NewLabeledFishDIR+file
    img = read_image(filename)
    aspectRatioSpread.append((1.*len(img[0]))/(1.*len(img)))
plt.hist(aspectRatioSpread)

In [None]:
imgID=0

In [None]:
print(AllWebFiles[imgID])
print(imgID)
filename = WebFishDIR+AllWebFiles[imgID]
img = read_image(filename)
imgID+=1
plt.imshow(img)

In [None]:
newimg = fixAspectAndScale(img)
plt.imshow(newimg)

Using INTER_AREA for resizing blurs out fine features, particularly scales, on the fish more than INTER_CUBIC.  INTER_CUBIC seems to perform better on small resizings especially.  Larger resizings have INTER_AREA doing a bit better.

In [None]:
idVal=0

In [None]:
for file in AllWebFiles:
    filename = WebFishDIR+file
    img = read_image(filename)
    newimg = fixAspectAndScale(img)
    newfn = ProcessedFishDIR + getNewFishFileName(idVal)
    write_image(newimg,newfn)
    idVal+=1
for file in AllNLFFiles:
    filename = NewLabeledFishDIR+file
    img = read_image(filename)
    newimg = fixAspectAndScale(img)
    newfn = ProcessedFishDIR + getNewFishFileName(idVal)
    write_image(newimg,newfn)
    idVal+=1

In [None]:
idVal = 609
for file in NewFiles:
    filename = WebFishDIR+file
    img = read_image(filename)
    newimg = fixAspectAndScale(img)
    newfn = ProcessedFishDIR + getNewFishFileName(idVal)
    write_image(newimg,newfn)
    idVal+=1

### Examine and clean webfiles (done)

In [None]:
AllWebFiles = os.listdir(WebFishDIR)
imgID=1

In [None]:
AllWebFiles.sort()

In [None]:
NewFiles = AllWebFiles[524:]

In [None]:
imgID=286

In [None]:
print("{} {}".format(len(img),len(img[0])))

In [None]:
print(NewFiles[imgID])
print(imgID)
filename = WebFishDIR+NewFiles[imgID]
img = read_image(filename)
imgID+=1
plt.imshow(img)

In [None]:
#fish = img[2:-5,2:-2]
fish = img[0:,0:250]
plt.imshow(fish)

In [None]:
write_image(fish,filename)

In [None]:
#write_image(fish,"./GrabbedFromWeb/AA0416.jpg")

In [None]:
os.remove(filename)

Find repeated files

In [None]:
listofrepeats = []
for i in range(0,len(OldFiles)):
    filenamei = WebFishDIRback+OldFiles[i]
    imgi = read_image(filenamei)
    for j in range(0,len(NewFiles)):
        filenamej = WebFishDIR+NewFiles[j]
        imgj = read_image(filenamej)
        if len(imgi) == len(imgj):
            if len(imgi[0]) == len(imgj[0]):
                if (imgi == imgj).all():
                    listofrepeats.append([filenamei,filenamej])
                    print(filenamei + " : " + filenamej)

In [None]:
for i in range(0,len(NewFiles)):
    filenamei = WebFishDIR+NewFiles[i]
    imgi = read_image(filenamei)
    for j in range(i+1,len(NewFiles)):
        filenamej = WebFishDIR+NewFiles[j]
        imgj = read_image(filenamej)
        if len(imgi) == len(imgj):
            if len(imgi[0]) == len(imgj[0]):
                if (imgi == imgj).all():
                    listofrepeats.append([filenamei,filenamej])
                    print(filenamei + " : " + filenamej)

In [None]:
tosslistx = []
for tosslist in listofrepeats:
    #filetotoss = WebFishDIR+tosslist[1][21:]
    filetotoss = tosslist[1]
    tosslistx.append(filetotoss)

In [None]:
for i in tosslistx:
    try:
        os.remove(i)
    except:
        pass

### Process Labeled Fish sample into a useful form (parent files removed)

In [None]:
def getNewFishFileName(idVal):
    defaultHead = 'C0000'
    curname = str(idVal)
    newString = defaultHead[:5-len(curname)]+curname+'.jpg'
    return newString
IDVal=1
imgID=0

AllLFFiles = os.listdir(LabeledFishDIR)

In [None]:
imgID=1254

In [None]:
imgID+=1
print(AllLFFiles[imgID])
print(imgID)
img = read_image(LabeledFishDIR+AllLFFiles[imgID])
plt.imshow(img)

In [None]:
fish = img[150:400,300:750]
plt.imshow(fish)

In [None]:
filename = getNewFishFileName(IDVal)
IDVal+=1
print(IDVal)
write_image(fish,filename)