In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

df = pd.read_csv('class1_df.csv')

df_train, df_val = train_test_split(df, test_size=.2)
df_train.shape, df_val.shape

((49262, 4), (12316, 4))

In [2]:
from skimage.transform import resize
from tqdm import tqdm
import matplotlib.pyplot as plt
%matplotlib inline
          

ORIG_SHAPE = (424,424)
CROP_SIZE = (256,256)
IMG_SHAPE = (75,75)

def get_image(path, x1,y1, shape, crop_size):
    x = plt.imread(path)
    x = x[x1:x1+crop_size[0], y1:y1+crop_size[1]]
    x = resize(x, shape)
    x = x/255.
    return x
    
def get_all_images(dataframe, shape=IMG_SHAPE, crop_size=CROP_SIZE):
    x1 = (ORIG_SHAPE[0]-CROP_SIZE[0])//2
    y1 = (ORIG_SHAPE[1]-CROP_SIZE[1])//2
   
    sel = dataframe.values
    ids = sel[:,0].astype(int).astype(str)
    y_batch = sel[:,1:]
    x_batch = []
    for i in tqdm(ids):
        x = get_image('./galaxy-zoo-data/images_training_rev1/'+i+'.jpg', x1,y1, shape=shape, crop_size=crop_size)
        x_batch.append(x)
    x_batch = np.array(x_batch)
    return x_batch, y_batch
        
X_train, y_train = get_all_images(df_train)
X_val, y_val = get_all_images(df_val)

100%|██████████| 49262/49262 [18:23<00:00, 44.66it/s]
100%|██████████| 12316/12316 [04:31<00:00, 45.36it/s]


In [3]:
X_train.shape, y_train.shape

((49262, 75, 75, 3), (49262, 3))

In [4]:
y_val.shape

(12316, 3)

In [5]:
# ignore Warning
import warnings
warnings.filterwarnings("ignore")

# Model Layers
from keras.models import Sequential, Model, Input
from keras.layers import Input, Dense, Dropout, BatchNormalization, Activation
from keras.layers import Conv2D, GlobalMaxPool2D, GlobalAveragePooling2D
from keras.utils import multi_gpu_model

# Open-source models
from keras.applications.inception_v3 import InceptionV3
from keras.applications.inception_resnet_v2 import InceptionResNetV2
from keras.applications.densenet import DenseNet121

img_rows = 75
img_cols = 75
total_classes = 3
pretrained_model = InceptionResNetV2(weights=None, include_top=False, input_shape = (img_rows, img_cols, 3))

# Add a global spatial average pooling layer
x = pretrained_model.output
x = GlobalAveragePooling2D()(x)
x = Dropout(.5)(x)  
x = Dense(20, kernel_initializer='he_normal')(x)
x = BatchNormalization()(x)
x = Activation("relu")(x)
x = Dense(total_classes, kernel_initializer='he_normal')(x)
predictions = Activation('softmax')(x)
model = Model(inputs=pretrained_model.input, outputs=predictions)



Using TensorFlow backend.















Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.



In [6]:
import keras
# Set Optimizer
#opt = adam(lr=0.001)
model.compile(
  loss='categorical_crossentropy',
  optimizer=keras.optimizers.Adamax(),
  metrics=['accuracy'],
)





In [8]:
# ignore Warning
import warnings
warnings.filterwarnings("ignore")

from keras.callbacks import EarlyStopping

callbacks = [EarlyStopping(monitor='val_acc', patience=10)]

history = model.fit(
  X_train,
  y_train,
  epochs=10,
  validation_data=(X_val, y_val),
  batch_size=X_train.shape[1],
  callbacks=callbacks
)

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


Train on 49262 samples, validate on 12316 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
 3900/49262 [=>............................] - ETA: 1:38:15 - loss: 0.6401 - acc: 0.8544

KeyboardInterrupt: 

In [None]:
model.summary()

In [None]:
# Predict.
predictions = model.predict(X_val)
# Print our model's predictions.
print("predictions: " + str(predictions[:1]))

# Check our predictions against the ground truths.
print(y_val[:1]) 

In [None]:
score = model.evaluate(X_val, y_val, verbose=0)
print('Val loss:', score[0])
print('Val accuracy:', score[1])

In [None]:
pd.DataFrame(history.history).plot(figsize=(8, 5))
plt.grid(True)
plt.title('Model performance throughout training')
plt.ylabel('Loss')
plt.xlabel('epoch')
plt.show()

## Score

Avec ce modèle:  
model = Sequential()  
model.add(Conv2D(filters=8, kernel_size=(3,3),input_shape=(64,64,3),activation='relu'))  
model.add(MaxPooling2D(pool_size=(2,2)))  
model.add(Conv2D(filters=8, kernel_size=(3,3),activation='relu')) #82% avec 16  
model.add(MaxPooling2D(pool_size=(2,2)))  
model.add(Flatten())  
model.add(Dense(3, activation='softmax'))

model.compile(
  loss='categorical_crossentropy',
  optimizer=keras.optimizers.Adamax(),
  metrics=['accuracy'],
)

history = model.fit(
  X_train,
  y_train,
  epochs=100,
  validation_data=(X_val, y_val),
  batch_size=X_train.shape[1], 
  callbacks=callbacks
)

Accuracy = 82,25 %  
Prediction/correct value: predictions: [[0.5717658 0.4046546 0.0235796]]/correct_value[[0.548188 0.416942 0.03487 ]]