In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

#import numpy as np # linear algebra
#import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import numpy as np # linear algebra
import pandas as pd
#from sklearn.model_selection import train_test_split
from zipfile import ZipFile
from skimage.transform import resize
from tqdm import tqdm
import matplotlib.pyplot as plt
%matplotlib inline

from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D
from tensorflow.keras.layers import Activation, Dropout, Flatten, Dense, BatchNormalization, GlobalMaxPooling2D
from tensorflow.keras import backend as K
from tensorflow.keras.callbacks import Callback
from tensorflow.keras.callbacks import ModelCheckpoint, Callback, EarlyStopping

In [None]:
traindf = pd.read_csv('../input/galaxy-zoo-the-galaxy-challenge/training_solutions_rev1.zip')

#df_train, df_test = train_test_split(traindf, test_size=.1)
#df_train.shape, df_test.shape

In [None]:
import zipfile

with zipfile.ZipFile("/kaggle/input/galaxy-zoo-the-galaxy-challenge/images_training_rev1.zip","r") as z:
    z.extractall(".")

In [None]:
def append_ext(fn):
    return fn + ".jpg"

traindf["id"] = traindf['GalaxyID'].astype(str).apply(append_ext)

In [None]:
def crop_image(image):
  #Image - numpy array of rank 3
  #Crop image to half its size, preserving the center
  #resize image to shape
  im_size_x = image.shape[0]
  im_size_y = image.shape[0]
  cropx = im_size_x // 4
  cropy = im_size_y // 4
  image = image[cropx:im_size_x-cropx, cropy:im_size_y-cropy]

  re_shape = [64,64]
  image = resize(image, re_shape)
  return image

In [None]:
%%time
#########################################
## Import data with ImageDataGenerator
datagen = ImageDataGenerator(
    fill_mode='constant',
    cval=0,
    rescale=1. / 255,
    #rotation_range=90,
    #width_shift_range=0.1,
    #height_shift_range=0.1,
    #horizontal_flip=True,
    #vertical_flip=True,
    preprocessing_function=crop_image,
    validation_split=0.2)

image_dir = './images_training_rev1/'
classes = ['Class1.1', 'Class1.2', 'Class1.3', 'Class2.1', 'Class2.2', 'Class3.1','Class3.2', 
           'Class4.1', 'Class4.2', 'Class5.1', 'Class5.2', 'Class5.3','Class5.4', 'Class6.1', 
           'Class6.2', 'Class7.1', 'Class7.2', 'Class7.3','Class8.1', 'Class8.2', 'Class8.3', 
           'Class8.4', 'Class8.5', 'Class8.6', 'Class8.7', 'Class9.1', 'Class9.2', 'Class9.3', 
           'Class10.1', 'Class10.2', 'Class10.3', 'Class11.1', 'Class11.2', 'Class11.3', 
           'Class11.4', 'Class11.5', 'Class11.6']

train_generator = datagen.flow_from_dataframe(
    dataframe=traindf,
    directory=image_dir,
    x_col="id",
    y_col=classes,
    subset="training",
    batch_size=64,
    seed=123,
    shuffle=True,
    class_mode="raw",
    target_size=(64, 64))

valid_generator = datagen.flow_from_dataframe(
    dataframe=traindf,
    directory=image_dir,
    x_col="id",
    y_col=classes,
    subset="validation",
    batch_size=64,
    seed=123,
    shuffle=True,
    class_mode="raw",
    target_size=(64, 64))

STEP_SIZE_TRAIN = train_generator.n // train_generator.batch_size
STEP_SIZE_VALID = valid_generator.n // valid_generator.batch_size

In [None]:
def root_mean_squared_error(y_true, y_pred):
        return K.sqrt(K.mean(K.square(y_pred - y_true))) 

model = Sequential()
model.add(Conv2D(512, (3, 3), input_shape=(64, 64, 3)))
model.add(Conv2D(256, (3, 3)))
#model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(256, (3, 3)))
model.add(Conv2D(128, (3, 3)))
#model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(128, (3, 3)))
model.add(Conv2D(128, (3, 3)))
#model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(GlobalMaxPooling2D())


model.add(Dropout(0.25))
model.add(Dense(128))
model.add(Activation('relu'))
model.add(Dropout(0.25))
model.add(Dense(128))
model.add(Activation('relu'))
model.add(Dropout(0.25))
model.add(Dense(128))
model.add(Activation('relu'))
model.add(Dropout(0.25))
model.add(Dense(37))
model.add(Activation('sigmoid'))

model.compile(loss='binary_crossentropy', optimizer='adamax', metrics=[root_mean_squared_error])
model.summary()

In [None]:
data_dir = './'

class LossHistory(Callback):
    def on_train_begin(self, logs={}):
        self.losses = []
        self.val_losses = []
 
    def on_batch_end(self, batch, logs={}):
        self.losses.append(logs.get('loss'))
        self.val_losses.append(logs.get('val_loss'))

early_stopping = EarlyStopping(
    monitor='val_loss', patience=4, verbose=1, mode='auto')

history = LossHistory()

from keras.callbacks import ModelCheckpoint

checkpointer = ModelCheckpoint(
    filepath=data_dir+'weights_full.hdf5', verbose=2, save_best_only=True)

hist = model.fit(
    train_generator,
    steps_per_epoch=STEP_SIZE_TRAIN,
    validation_data=valid_generator,
    validation_steps=STEP_SIZE_VALID,
    epochs=15,
    callbacks=[history, checkpointer, early_stopping])

In [None]:
model.save("my_h5_model.h5")

In [None]:
# Plotting training and validation loss
plt.figure(figsize=(12, 8))
plt.plot(hist.epoch, hist.history['loss'], label='Training Loss')
plt.plot(
    hist.epoch, hist.history['val_loss'], label='Validation', linestyle='--')
plt.xlabel("Epochs")
plt.ylabel("RMSE")
plt.legend()
plt.show()

In [None]:
import zipfile

with zipfile.ZipFile("/kaggle/input/galaxy-zoo-the-galaxy-challenge/images_test_rev1.zip","r") as z:
    z.extractall(".")

In [None]:
testdf = pd.read_csv('../input/galaxy-zoo-the-galaxy-challenge/all_zeros_benchmark.zip')
testdf["id"] = testdf['GalaxyID'].astype(str).apply(append_ext)

In [None]:
ycols = ['GalaxyID'] + classes

## Import test data with ImageDataGenerator
testdatagen = ImageDataGenerator(
    fill_mode='constant',
    cval=0,
    rescale=1. / 255,
    #rotation_range=90,
    #width_shift_range=0.1,
    #height_shift_range=0.1,
    #horizontal_flip=True,
    #vertical_flip=True,
    preprocessing_function=crop_image,
    validation_split=0.0)

test_image_dir = './images_test_rev1/'

test_generator = testdatagen.flow_from_dataframe(
    dataframe=testdf,
    directory=test_image_dir,
    x_col="id",
    y_col=ycols,
    subset="training",
    batch_size=64,
    seed=123,
    shuffle=True,
    class_mode="raw",
    target_size=(64, 64))

In [None]:
n_batches = len(test_generator)
num_in_batch = test_generator[0][0].shape[0]

In [None]:
from tqdm.auto import tqdm
from numpy import expand_dims

val_predictions = []
ids = []

##for i in tqdm(range(n_batches)):
for i in tqdm(range(50)):
    for j in range(num_in_batch):
        img = test_generator[i][0][j]
        img = expand_dims(img, axis=0)
        y_pred = model.predict(img)
        
        val_predictions.append(y_pred)
        ids.append(test_generator[i][1][j][0])
        
val_predictions = np.array(val_predictions)
Y_pred = np.vstack(val_predictions)
ids = np.array(ids).reshape(len(ids),1)

#print(Y_pred)
#print(ids)

submission_df = pd.DataFrame(np.hstack((ids, Y_pred)), columns=testdf.columns[0:38])
submission_df = submission_df.sort_values(by=['GalaxyID'])
submission_df['GalaxyID'] = submission_df['GalaxyID'].astype(str)

In [None]:
submission_df.to_csv('test_submission_1.csv', index=False)

In [None]:
from tensorflow.keras.models import load_model
model = load_model(data_dir+"my_h5_model.h5", custom_objects={'root_mean_squared_error': root_mean_squared_error})

In [None]:
img = test_generator[6][0][10]
img = expand_dims(img, axis=0)
test_pred = model.predict(img)
print(test_pred)