In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from tensorflow.keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import Dense, Conv2D, Flatten, Input, AvgPool2D,MaxPool2D,Dropout
from tensorflow.keras.utils import to_categorical
import numpy as np
from sklearn.preprocessing import OneHotEncoder
import seaborn as sns
import cv2
from sklearn.model_selection import KFold


TRAIN_FOLDER = "C:/Users/dordo/עבודות_אוניברסיטה/סדנא בלמידה עמוקה/planet/planet/train-jpg"
TEST_FOLDER = "C:/Users/dordo/עבודות_אוניברסיטה/סדנא בלמידה עמוקה/planet/planet/test-jpg"
csv = pd.read_csv("C:/Users/dordo/עבודות_אוניברסיטה/סדנא בלמידה עמוקה/planet/planet/train_classes.csv", delimiter = ",")
test = pd.read_csv("C:/Users/dordo/עבודות_אוניברסיטה/סדנא בלמידה עמוקה/planet/planet/sample_submission.csv", delimiter = ",")
input_size = 128
batch_size = 32
n_folds = 5
input_channels = 3
epoches = 3
flatten = lambda l: [item for sublist in l for item in sublist]
labels = list(set(flatten([l.split(' ') for l in csv['tags'].values])))

label_map = {l: i for i, l in enumerate(labels)}
inv_label_map = {i: l for l, i in label_map.items()}

kf = KFold(n_splits=n_folds, shuffle=True, random_state=1)

fold_count = 0
final_results = []
print('#############################')
print('   general variables      ')
print('n_folds: {}'.format(n_folds))
print('batch_size: {}'.format(batch_size))
print('input_size: {}'.format(input_size))

#############################
   general variables      
n_folds: 5
batch_size: 32
input_size: 128


In [2]:
from tensorflow.keras.layers import Dense, Activation, Flatten, Dropout, BatchNormalization
from tensorflow.keras.layers import Conv2D, MaxPooling2D
from tensorflow.keras import regularizers, optimizers
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.applications import InceptionV3
def simple_model():
    inp = Input(shape = (input_size,input_size,3))
    x = Conv2D(64, (4, 4))(inp)
    x = Activation('relu')(x)
    x = Conv2D(64, (4, 4))(x)
    x = Activation('relu')(x)
    x = MaxPooling2D(pool_size = (2, 2))(x)
    x = Conv2D(64, (4, 4))(x)
    x = Activation('relu')(x)
    x = Conv2D(64, (4, 4))(x)
    x = Activation('relu')(x)
    x = MaxPooling2D(pool_size = (2, 2))(x)
    x = Flatten()(x)
    x = Dense(512)(x)
    x = Activation('relu')(x)
    output = Dense(17, activation = 'sigmoid')(x)
    opt = Adam(lr=1e-4)
    model = Model(inp,output)
    model.compile(optimizer = opt,loss = "binary_crossentropy",metrics = ["accuracy"])
    return model

def inceptionv3_model():
    base_model = InceptionV3(include_top=False,
                       weights='imagenet',
                       input_shape=(input_size, input_size, input_channels))

    model = Sequential()
    # Batchnorm input
    model.add(BatchNormalization(input_shape=(input_size, input_size, input_channels)))
    # Base model
    model.add(base_model)
    # Classifier
    model.add(Flatten())
    model.add(Dense(17, activation='sigmoid'))

    opt = Adam(lr=1e-4)

    model.compile(loss='binary_crossentropy',
                  optimizer=opt,
                  metrics=['accuracy'])
    return model

In [9]:
for train_index, valid_index in kf.split(csv):
    fold_count += 1
    print('-------------------------------------------------------')
    print('Fold ', fold_count)

    def transformations(src, choice):
        if choice == 0:
            # Rotate 90
            src = cv2.rotate(src, rotateCode=cv2.ROTATE_90_CLOCKWISE)
        if choice == 1:
            # Rotate 90 and flip horizontally
            src = cv2.rotate(src, rotateCode=cv2.ROTATE_90_CLOCKWISE)
            src = cv2.flip(src, flipCode=1)
        if choice == 2:
            # Rotate 180
            src = cv2.rotate(src, rotateCode=cv2.ROTATE_180)
        if choice == 3:
            # Rotate 180 and flip horizontally
            src = cv2.rotate(src, rotateCode=cv2.ROTATE_180)
            src = cv2.flip(src, flipCode=1)
        if choice == 4:
            # Rotate 90 counter-clockwise
            src = cv2.rotate(src, rotateCode=cv2.ROTATE_90_COUNTERCLOCKWISE)
        if choice == 5:
            # Rotate 90 counter-clockwise and flip horizontally
            src = cv2.rotate(src, rotateCode=cv2.ROTATE_90_COUNTERCLOCKWISE)
            src = cv2.flip(src, flipCode=1)
        return src

    df_train = csv.iloc[train_index]

    def train_generator():
        while True:
            for start in range(0, len(df_train), batch_size):
                x_batch = []
                y_batch = []
                end = min(start + batch_size, len(df_train))
                df_train_batch = df_train[start:end]
                for f, tags in df_train_batch.values:
                    print(TRAIN_FOLDER)
                    img = cv2.imread(TRAIN_FOLDER+'{}.jpg'.format(f))
                    img = cv2.resize(img, (input_size, input_size))
                    img = transformations(img, np.random.randint(6))
                    targets = np.zeros(17)
                    for t in tags.split(' '):
                        targets[label_map[t]] = 1
                    x_batch.append(img)
                    y_batch.append(targets)
                x_batch = np.array(x_batch, np.float32)
                y_batch = np.array(y_batch, np.uint8)
                yield x_batch, y_batch

    df_valid = csv.iloc[valid_index]
    print('Validating on {} samples'.format(len(df_valid)))


    def valid_generator():
        while True:
            for start in range(0, len(df_valid), batch_size):
                x_batch = []
                y_batch = []
                end = min(start + batch_size, len(df_valid))
                df_valid_batch = df_valid[start:end]
                for f, tags in df_valid_batch.values:
                    img = cv2.imread(TRAIN_FOLDER+'{}.jpg'.format(f))
                    img = cv2.resize(img, (input_size, input_size))
                    img = transformations(img, np.random.randint(6))
                    targets = np.zeros(17)
                    for t in tags.split(' '):
                        targets[label_map[t]] = 1
                    x_batch.append(img)
                    y_batch.append(targets)
                x_batch = np.array(x_batch, np.float32)
                y_batch = np.array(y_batch, np.uint8)
                yield x_batch, y_batch
    
    df_test_data = test
    def test_generator():
        while True:
            for start in range(0, len(test), batch_size):
                x_batch = []
                end = min(start + batch_size, len(df_test_data))
                df_test_batch = df_test_data[start:end]
                for f, tags in df_test_batch.values:
                    img = cv2.imread(TEST_FOLDER+'{}.jpg'.format(f))
                    img = cv2.resize(img, (input_size, input_size))
                    x_batch.append(img)
                x_batch = np.array(x_batch, np.float32)
                yield x_batch
                

    model = inceptionv3_model()
    model.fit(x = train_generator(),
                    validation_data=valid_generator(),
                    steps_per_epoch=(len(df_train) // batch_size) + 1,
                    validation_steps=(len(df_valid) // batch_size) + 1,
                    epochs=epoches)
    predictions = model.predict_generator(test_generator(),
                                                 steps=((len(test) // batch_size) + 1),
                                                 verbose=1)
    final_results.append(predictions)

-------------------------------------------------------
Fold  7
Validating on 8096 samples
C:/Users/dordo/עבודות_אוניברסיטה/סדנא בלמידה עמוקה/planet/planet/train-jpg
Epoch 1/3


error: OpenCV(4.4.0) C:\Users\appveyor\AppData\Local\Temp\1\pip-req-build-2b5g8ysb\opencv\modules\imgproc\src\resize.cpp:3929: error: (-215:Assertion failed) !ssize.empty() in function 'cv::resize'


In [None]:
results_p = [sum(x)/n_folds for x in zip(*final_results)]

In [None]:
for index, row in test.iterrows():
    prediction = []
    for i in range(17):
        value = results_p[index][i]
        if(value > 0.2):
            prediction.append(inv_label_map[i])
    row['tags'] = " ".join(prediction)

In [None]:
test.to_csv('planet/answers3.csv', index=False)