In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import time
import warnings

In [1]:
import tensorflow as tf
import tensorflow.keras as keras
from tensorflow.keras.applications.inception_v3 import InceptionV3, preprocess_input, decode_predictions
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import Input, UpSampling2D, Flatten, BatchNormalization, Dense, Dropout, GlobalAveragePooling2D
from tensorflow.keras import optimizers
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [5]:
!pip install gdown

Collecting gdown
  Downloading gdown-4.3.1.tar.gz (13 kB)
  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone
Building wheels for collected packages: gdown
  Building wheel for gdown (pyproject.toml) ... [?25ldone
[?25h  Created wheel for gdown: filename=gdown-4.3.1-py3-none-any.whl size=14493 sha256=b8a25c1c9532e4025e031474096520f429c68f6edda728e187d94172b28becc4
  Stored in directory: /home/ec2-user/.cache/pip/wheels/d1/1b/3e/f03df6be3040b0f9a1a29db63caba2d18ae5aa869217dc4199
Successfully built gdown
Installing collected packages: gdown
Successfully installed gdown-4.3.1


In [None]:
https://drive.google.com/file/d/1mPBHTBWnoAKOf21niapRghwI1dhsnim9/view?usp=sharing

In [7]:
import gdown

url = 'https://drive.google.com/uc?id=1mPBHTBWnoAKOf21niapRghwI1dhsnim9'
output = 'rockai_images.h5'
gdown.download(url, output, quiet=False)

Downloading...
From: https://drive.google.com/uc?id=1mPBHTBWnoAKOf21niapRghwI1dhsnim9
To: /home/ec2-user/SageMaker/Explore/Chris/rockAI/rockai_images.tgz
100%|██████████| 1.76G/1.76G [00:44<00:00, 39.7MB/s]


'rockai_images.tgz'

In [10]:
def load_data(filepath):
    import h5py
    import numpy as np
    h5f = h5py.File(filepath, 'r')
    X = h5f['X'][:]
    classnames = [s.decode('utf-8') for s in h5f['classname'][:]]
    filenames = [s.decode('utf-8') for s in h5f['filename'][:]]
    h5f.close()
    return X, np.array(classnames), np.array(filenames)

def create_train_test_idx(classnames):
    import pandas as pd
    df = pd.DataFrame(data=enumerate(classnames), columns=['index', 'classname'])
    test_df = None
    train_df = None
    for classname in df['classname'].unique():
        test_tmp_df = df[df['classname']==classname].sample(50, replace=False, random_state=1234)
        train_tmp_df = df[(df['classname']==classname) & ~(df['index'].isin(test_tmp_df['index']))]
        test_df = test_tmp_df if test_df is None else pd.concat([test_df, test_tmp_df])
        train_df = train_tmp_df if train_df is None else pd.concat([train_df, train_tmp_df])    
    return train_df['index'].values, test_df['index'].values
    
X, classnames, filenames = load_data('rockai_images.h5')
train_idx, test_idx = create_train_test_idx(classnames)
X_train, X_test = X[train_idx], X[test_idx]
y_train = [classnames[i] for i in train_idx]
y_test = [classnames[i] for i in test_idx]

num_classes = 2

#Pre-process the data
X_train = preprocess_input(X_train)
X_test = preprocess_input(X_test)
y_train = [0 if x=='No_RA' else 1 for x in y_train]
y_test = [0 if x=='No_RA' else 1 for x in y_test]
y_train = tf.keras.utils.to_categorical(y_train, num_classes)
y_test = tf.keras.utils.to_categorical(y_test, num_classes)

In [11]:
import numpy as np

def get_random_eraser(p=0.5, s_l=0.02, s_h=0.4, r_1=0.3, r_2=1/0.3, v_l=0, v_h=255, pixel_level=False):
    def eraser(input_img):
        img_h, img_w, img_c = input_img.shape
        p_1 = np.random.rand()

        if p_1 > p:
            return input_img

        while True:
            s = np.random.uniform(s_l, s_h) * img_h * img_w
            r = np.random.uniform(r_1, r_2)
            w = int(np.sqrt(s / r))
            h = int(np.sqrt(s * r))
            left = np.random.randint(0, img_w)
            top = np.random.randint(0, img_h)

            if left + w <= img_w and top + h <= img_h:
                break

        if pixel_level:
            c = np.random.uniform(v_l, v_h, (h, w, img_c))
        else:
            c = np.random.uniform(v_l, v_h)

        input_img[top:top + h, left:left + w, :] = c

        return input_img

    return eraser


In [12]:
inception_model = InceptionV3(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
inception_model.trainable = False

model = Sequential()
model.add(inception_model)
model.add(GlobalAveragePooling2D())
model.add(Dense(256, activation='relu'))
model.add(Dropout(.5))
model.add(Dense(num_classes, activation='softmax'))

Downloading data from https://github.com/fchollet/deep-learning-models/releases/download/v0.5/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5


In [17]:
def run_data_random():
    X, classnames, filenames = load_data('rockai_images.h5')
    train_idx, test_idx = create_train_test_idx(classnames)
    X_train, X_test = X[train_idx], X[test_idx]
    y_train = [classnames[i] for i in train_idx]
    y_test = [classnames[i] for i in test_idx]

    #Pre-process the data
    X_train = preprocess_input(X_train)
    X_test = preprocess_input(X_test)
    y_train = [0 if x=='No_RA' else 1 for x in y_train]
    y_test = [0 if x=='No_RA' else 1 for x in y_test]
    y_train = tf.keras.utils.to_categorical(y_train, num_classes)
    y_test = tf.keras.utils.to_categorical(y_test, num_classes)
    
    return X_train, y_train, X_test, y_test

In [25]:
from sklearn.metrics import f1_score, precision_score, recall_score, confusion_matrix,accuracy_score
def run_model(X_train, y_train, X_test, y_test):
    datagen = ImageDataGenerator(preprocessing_function=get_random_eraser(v_l=0, v_h=1, pixel_level=True))
    # datagen = ImageDataGenerator()
    datagen.fit(X_train)
    batch_size = 32
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    historytemp = model.fit(datagen.flow(X_train, y_train, batch_size=32),
                        steps_per_epoch=X_train.shape[0] // batch_size,
                        epochs=30)
    # evaluate the model
    y_pred1 = model.predict(X_test)
    y_pred = np.argmax(y_pred1, axis=1)
    #y_test_bool = np.argmax(y_test, axis=1)
    #f = f1_score(y_test_bool, y_pred , average="macro")
    #a = accuracy_score(y_test_bool, y_pred)
    
    return y_pred

In [21]:
#X_train, y_train, X_test, y_test = run_data_random()
trx = []
tex = []
trY = []
teY = []
for i in range(3):
    X_train, y_train, X_test, y_test = run_data_random()
    trx.append(X_train)
    tex.append(X_test)
    trY.append(y_train)
    teY.append(y_test)

In [26]:
pred_all= []


for i in range(3):
    y_pred = run_model(trx[i],trY[i],tex[i],teY[i])
    pred_all.append(y_pred)
    print('done for one repeat')

  ...
    to  
  ['...']
Train for 42 steps
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
done for one repeat
  ...
    to  
  ['...']
Train for 42 steps
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
done for one repeat
  ...
    to  
  ['...']
Train for 42 steps
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epo

In [15]:
acc

[0.84]

In [16]:
f1

[0.8357963875205254]

In [27]:
y_test_bool = np.argmax(y_test, axis=1)
a = accuracy_score(y_test_bool, pred_all[0])
f = f1_score(y_test_bool, pred_all[0])
print(a,f)

0.75 0.8


In [28]:
a = accuracy_score(y_test_bool, pred_all[1])
f = f1_score(y_test_bool, pred_all[1])
print(a,f)

0.85 0.8695652173913044


In [29]:
a = accuracy_score(y_test_bool, pred_all[2])
f = f1_score(y_test_bool, pred_all[2])
print(a,f)

0.76 0.8064516129032258


In [30]:
print(np.mean([0.75,0.85,0.76]))
print(np.mean([0.80,0.8695,0.80645]))

0.7866666666666667
0.8253166666666667
