In [1]:
from mpl_toolkits.mplot3d import Axes3D
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt # plotting
import numpy as np # linear algebra
import os # accessing directory structure
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import seaborn as sns
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, MaxPool2D, Flatten
from tensorflow.keras.callbacks import EarlyStopping

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))


# 1. Using Class Weights

In [2]:
ytrain_v = np.load("/kaggle/input/mhsmadataset/y_vacuole_train.npy")
neg_train, pos_train = np.bincount(ytrain_v)
ytest_v = np.load("/kaggle/input/mhsmadataset/y_vacuole_test.npy")
neg_test, pos_test = np.bincount(ytest_v)

neg = neg_train + neg_test
pos = pos_train + pos_test
total=neg+pos

# Scaling by total/2 helps keep the loss to a similar magnitude.
# The sum of the weights of all examples stays the same.
weight_for_0 = (1 / neg) * (total / 2.0)
weight_for_1 = (1 / pos) * (total / 2.0)

class_weight = {0: weight_for_0, 1: weight_for_1}

print('Weight for class 0: {:.2f}'.format(weight_for_0))
print('Weight for class 1: {:.2f}'.format(weight_for_1))


In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, MaxPool2D, Flatten
from tensorflow.keras.callbacks import EarlyStopping

from sklearn.metrics import classification_report

In [4]:
X_train = np.load("/kaggle/input/mhsmadataset/x_128_train.npy")
train_64 = np.load("/kaggle/input/mhsmadataset/x_64_train.npy")
y_acrosome_train = np.load("/kaggle/input/mhsmadataset/y_acrosome_train.npy")
y_head_train = np.load("/kaggle/input/mhsmadataset/y_head_train.npy")
y_tail_train = np.load("/kaggle/input/mhsmadataset/y_tail_train.npy")
y_vacuole_train = np.load("/kaggle/input/mhsmadataset/y_vacuole_train.npy")

In [5]:
X_test = np.load("/kaggle/input/mhsmadataset/x_128_test.npy")
test_64 = np.load("/kaggle/input/mhsmadataset/x_64_test.npy")
y_acrosome_test = np.load("/kaggle/input/mhsmadataset/y_acrosome_test.npy")
y_head_test = np.load("/kaggle/input/mhsmadataset/y_head_test.npy")
y_tail_test = np.load("/kaggle/input/mhsmadataset/y_tail_test.npy")
y_vacuole_test = np.load("/kaggle/input/mhsmadataset/y_vacuole_test.npy")

In [6]:
from tensorflow.keras.utils import to_categorical

In [7]:
X_train, X_test = X_train.reshape(1000, 128, 128, 1), X_test.reshape(300, 128, 128, 1)
X_train, X_test = X_train/194, X_test/191

In [8]:
y_vacuole_train = to_categorical(y_vacuole_train, num_classes=2)
y_vacuole_test = to_categorical(y_vacuole_test, num_classes=2)

In [9]:
weighted_model = Sequential()
weighted_model.add(Conv2D(filters = 128, kernel_size = (4, 4), padding = 'VALID', input_shape = (128, 128, 1), activation = 'relu'))
weighted_model.add(MaxPool2D(pool_size = (4, 4)))
weighted_model.add(Flatten())
weighted_model.add(Dense(64, activation = 'relu'))
weighted_model.add(Dense(32, activation = 'relu'))
weighted_model.add(Dense(2, activation = 'sigmoid'))
weighted_model.compile(loss = 'binary_crossentropy', optimizer = 'adam', metrics = ['accuracy'])
#weighted_model.load_weights(initial_weights)

early_stop = EarlyStopping(monitor='val_accuracy', patience = 5)


In [10]:
# weighted
weighted_history = weighted_model.fit(
    X_train,
    y_vacuole_train,
    epochs = 25,
    validation_data=(X_test, y_vacuole_test),
    callbacks = [early_stop],
    class_weight=class_weight)

In [11]:
metrics = pd.DataFrame(weighted_model.history.history)
metrics[['loss', 'val_loss']].plot()

In [12]:
weighted_model.evaluate(X_test, y_vacuole_test, verbose = 0)

In [13]:
#y_pred = np.argmax(model.predict(X_test), axis=1)
y_pred = to_categorical(np.argmax(weighted_model.predict(X_test), axis=1), num_classes=2)


from sklearn.metrics import classification_report
print(classification_report(y_vacuole_test, y_pred))

# 2. Resampled + Transformed Data

In [15]:
## resample data
train_labels = np.load('/kaggle/input/mhsmadataset/y_vacuole_train.npy')
x_128_train = np.load('/kaggle/input/mhsmadataset/x_128_train.npy')

bool_train_labels = train_labels != 0

bool_train_labels

pos_x_train = x_128_train[bool_train_labels]
neg_x_train = x_128_train[~bool_train_labels]

pos_labels = train_labels[bool_train_labels]
neg_labels = train_labels[~bool_train_labels]

# using numpy
ids = np.arange(len(pos_x_train))
choices = np.random.choice(ids, len(neg_x_train))

res_pos_x_train = pos_x_train[choices]
res_pos_labels  = pos_labels[choices]

## resampled dataset
resampled_x_train = np.concatenate([res_pos_x_train, neg_x_train], axis=0)
resampled_labels = np.concatenate([res_pos_labels, neg_labels], axis=0)

order = np.arange(len(resampled_labels))
np.random.shuffle(order)
resampled_x_train = resampled_x_train[order]
resampled_labels = resampled_labels[order]

resampled_x_train.shape

In [16]:
# number of positives
len(resampled_labels) - sum(resampled_labels)
sum(resampled_labels)

In [24]:
## apply transformations
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
import tensorflow_datasets as tfds

from tensorflow.keras import layers
from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img
data_augmentation = tf.keras.Sequential([
    layers.RandomFlip("horizontal_and_vertical"),
    layers.RandomRotation(0.2),
])

data_augmentation = tf.keras.Sequential([
    layers.RandomFlip("horizontal_and_vertical"),
    layers.RandomRotation(0.2),
])

## augument data
tres_pos_x_train = data_augmentation(res_pos_x_train)

##
tres_x_train = np.concatenate([tres_pos_x_train, neg_x_train], axis=0)
tres_labels = np.concatenate([res_pos_labels, neg_labels], axis=0)

tres_pos_x_train.shape
tres_x_train.shape

In [20]:
# show the transformed resampled images
n = 5
images = res_pos_x_train[0:n**2]
plt.figure(figsize=(n, n))
    
for i in range(len(images)):
    image = tf.expand_dims(images[i], 0)
    augmented_image = data_augmentation(image)
    ax = plt.subplot(n, n, i + 1)
    plt.imshow(augmented_image[0])
    plt.axis("off")

CNN

In [25]:
# new training data
new_X_train = tres_x_train
new_X_train = new_X_train.reshape(1660, 128, 128, 1)
## X_test remains the same
new_X_train = new_X_train/194

#new_y_vacuole_train = tres_labels
new_y_vacuole_train = to_categorical(tres_labels, num_classes=2)

In [26]:
model = Sequential()
model.add(Conv2D(filters = 128, kernel_size = (4, 4), padding = 'VALID', input_shape = (128, 128, 1), activation = 'relu'))
model.add(MaxPool2D(pool_size = (4, 4)))
model.add(Flatten())
model.add(Dense(64, activation = 'relu'))
model.add(Dense(32, activation = 'relu'))
model.add(Dense(2, activation = 'sigmoid'))
model.compile(loss = 'binary_crossentropy', optimizer = 'adam', metrics = ['accuracy'])

early_stop = EarlyStopping(monitor='val_accuracy', patience = 5)

In [27]:
model.fit(new_X_train, new_y_vacuole_train, epochs = 25, validation_data=(X_test, y_vacuole_test), callbacks = [early_stop])

In [28]:
metrics = pd.DataFrame(model.history.history)
metrics[['loss', 'val_loss']].plot()

In [29]:
model.evaluate(X_test, y_vacuole_test, verbose = 0)

In [30]:
#y_pred = np.argmax(model.predict(X_test), axis=1)
y_pred = to_categorical(np.argmax(model.predict(X_test), axis=1), num_classes=2)

from sklearn.metrics import classification_report
print(classification_report(y_vacuole_test, y_pred))

# 3. Resampled + weights

In [31]:
weighted_model = Sequential()
weighted_model .add(Conv2D(filters = 128, kernel_size = (4, 4), padding = 'VALID', input_shape = (128, 128, 1), activation = 'relu'))
weighted_model .add(MaxPool2D(pool_size = (4, 4)))
weighted_model .add(Flatten())
weighted_model .add(Dense(64, activation = 'relu'))
weighted_model .add(Dense(32, activation = 'relu'))
weighted_model .add(Dense(2, activation = 'sigmoid'))
weighted_model .compile(loss = 'binary_crossentropy', optimizer = 'adam', metrics = ['accuracy'])
#weighted_model.load_weights(initial_weights)

early_stop = EarlyStopping(monitor='val_accuracy', patience = 5)


In [32]:
# weighted + resampled
weighted_history = weighted_model.fit(
    new_X_train,
    new_y_vacuole_train,
    epochs = 25,
    validation_data=(X_test, y_vacuole_test),
    callbacks = [early_stop],
    class_weight=class_weight)

In [33]:
metrics = pd.DataFrame(weighted_model.history.history)
metrics[['loss', 'val_loss']].plot()

In [34]:
weighted_model.evaluate(X_test, y_vacuole_test, verbose = 0)

In [35]:
#y_pred = np.argmax(model.predict(X_test), axis=1)
y_pred = to_categorical(np.argmax(weighted_model.predict(X_test), axis=1), num_classes=2)

from sklearn.metrics import classification_report
print(classification_report(y_vacuole_test, y_pred))