In [169]:
import cv2
import numpy as np
from pandas import DataFrame, read_csv, concat
from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split
from shutil import copy2
from tqdm.notebook import tqdm

In [156]:
y_filename = 'training_solutions_rev1.csv'
y = read_csv(y_filename, index_col=0)[['Class1.1','Class1.2','Class1.3']]
y['Class'] = y.idxmax(axis=1)
y_1 = y['Class']

In [157]:
class1 = y.loc[y_1=='Class1.1']
class2 = y.loc[y_1=='Class1.2']
class3 = y.loc[y_1=='Class1.3']

In [158]:
class1 = class1.sort_values(by='Class1.1', ascending=False)
class2 = class2.sort_values(by='Class1.2', ascending=False)
class3 = class3.sort_values(by='Class1.3', ascending=False)

In [159]:
class1_imgs = []
class2_imgs = []
class3_imgs = []

In [160]:
def isolate(image, feather=(30,30)):
    image = image.copy()
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    gray = cv2.blur(gray,(5,5))

    blur = cv2.blur(image,(30,30))

    ret, thresh = cv2.threshold(gray,0,255,cv2.THRESH_OTSU)

    ret, markers = cv2.connectedComponents(thresh)
    markers = cv2.watershed(blur,markers)
    centergroup= markers[212,212]

    mask = np.array(markers).astype(np.uint8)
    mask[:,:]=0
    mask[markers == centergroup] = 255

    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, feather)
    mask = cv2.dilate(mask,kernel,iterations = 1)
    mask = cv2.blur(mask, feather).astype(float)

    for i in range(3):
        image[:,:,i] = (image[:,:,i] * mask/255.0).astype(np.uint8)
    return image

In [161]:
def crop(image, size=(128,128)):
    centerx = image.shape[0]//2
    centery = image.shape[1]//2
    hheight = size[0]//2
    hwidth = size[1]//2
    return image[centerx-hwidth:centerx+hwidth, centery-hheight:centery+hheight]

In [162]:
crop_shape = (256,256)
input_shape = (96,96)
def process(image):
    image = isolate(image)
    image = crop(image, crop_shape)
    return cv2.resize(image, input_shape)

In [163]:
dir = './images_training_rev1/'


def process_sort(index, destdir):
    filename = dir + str(index) + '.jpg'
    destname = destdir + str(index) + '.jpg'
    img = cv2.imread(filename)
    img = process(img)
    cv2.imwrite(destname, img)

In [164]:
C1_train, C1_valid = train_test_split(class1, test_size=0.3, random_state=42)
C2_train, C2_valid = train_test_split(class2, test_size=0.3, random_state=42)
C3_train, C3_valid = train_test_split(class3, test_size=0.3, random_state=42)

In [165]:
C1_train.shape, class1.shape, C2_train.shape, y.shape

((18685, 4), (26693, 4), (24378, 4), (61578, 4))

In [166]:
traindir = './train/'
testdir = './test/'

for index in tqdm(C1_train.index):
    destdir = traindir + 'Class1.1/' 
    process_sort(index, destdir)
for index in tqdm(C2_train.index):
    destdir = traindir + 'Class1.2/'
    process_sort(index, destdir)
for index in tqdm(C3_train.index):
    destdir = traindir + 'Class1.3/'
    process_sort(index, destdir)
    

for index in tqdm(C1_valid.index):
    destdir = testdir + 'Class1.1/' 
    process_sort(index, destdir)
for index in tqdm(C2_valid.index):
    destdir = testdir + 'Class1.2/'
    process_sort(index, destdir)
for index in tqdm(C3_valid.index):
    destdir = testdir + 'Class1.3/' 
    process_sort(index, destdir)

HBox(children=(FloatProgress(value=0.0, max=18685.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=24378.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=41.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=8008.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=10448.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=18.0), HTML(value='')))




In [177]:
train_df = concat([C1_train['Class'], C2_train['Class'], C3_train['Class']])
valid_df = concat([C1_valid['Class'], C2_valid['Class'], C3_valid['Class']])

train_df.to_csv('y_train.csv', header=True)
valid_df.to_csv('y_valid.csv', header=True)

  after removing the cwd from sys.path.
  """


In [178]:
valid_df

GalaxyID
493144    Class1.1
176103    Class1.1
194309    Class1.1
300270    Class1.1
970529    Class1.1
147864    Class1.1
994503    Class1.1
195963    Class1.1
125277    Class1.1
204216    Class1.1
820345    Class1.1
545272    Class1.1
727585    Class1.1
301285    Class1.1
201087    Class1.1
588247    Class1.1
953693    Class1.1
364082    Class1.1
140218    Class1.1
928979    Class1.1
321948    Class1.1
562018    Class1.1
226183    Class1.1
816381    Class1.1
444913    Class1.1
159446    Class1.1
211719    Class1.1
884174    Class1.1
585212    Class1.1
228834    Class1.1
            ...   
597240    Class1.2
765998    Class1.2
424411    Class1.2
407050    Class1.2
621051    Class1.2
794805    Class1.2
720396    Class1.2
975466    Class1.2
557882    Class1.2
657188    Class1.2
828285    Class1.2
734660    Class1.2
356310    Class1.3
515600    Class1.3
320852    Class1.3
809904    Class1.3
689434    Class1.3
674832    Class1.3
901883    Class1.3
998352    Class1.3
723850    Class1.3
991