In [None]:
import tensorflow as tf
from tensorflow import keras
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Conv2D, MaxPool2D , Flatten, Dropout
from sklearn.model_selection import train_test_split
import pandas as pd
import zipfile
from PIL import Image
import shutil
import os
from keras.preprocessing.image import ImageDataGenerator

In [None]:
with zipfile.ZipFile("/kaggle/input/aerial-cactus-identification/train.zip","r") as z:
    z.extractall(".")
    
#    print(z.namelist())

# img = mpimg.imread('train/008bd3d84a1145e154409c124de7cee9.jpg')
# imgplot = plt.imshow(img)
# plt.show()


data = pd.read_csv("/kaggle/input/aerial-cactus-identification/train.csv")
path = data["id"]
value = data["has_cactus"]


x_train_0 = []
x_train_1 = []
y_train_0 = []
y_train_1 = []

for i in range(17500):
    im = Image.open("train/" + str(path[i]))
    data_img = np.array(im.getdata())
    data_img = data_img.reshape((32,32,3))
    if int(value[i]) == 0:   
        x_train_0.append(data_img)
        y_train_0.append(value[i])
    else:
        x_train_1.append(data_img)
        y_train_1.append(value[i])

        
        
taille = min(len(x_train_0), len(x_train_1))
x_train = x_train_0[:taille] + x_train_1[:taille]
y_train = y_train_0[:taille] + y_train_1[:taille]


x_train = np.array(x_train)
y_train = np.array(y_train)



with zipfile.ZipFile("/kaggle/input/aerial-cactus-identification/test.zip","r") as z:
    z.extractall(".")

x_test = []
path_list = []  # Pour avoir le nom des images plus tard
for path in z.namelist()[1:]:
    path_list.append(path[5:])
    im = Image.open(path)
    data_img = np.array(im.getdata())
    data_img = data_img.reshape((32,32,3))
    x_test.append(data_img)

x_test = np.array(x_test)
    
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train = x_train/255
x_test = x_test/255
y_train = tf.keras.utils.to_categorical(y_train)
x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size = 0.30)

In [None]:
datagen = ImageDataGenerator(
    width_shift_range=0.1,
    height_shift_range=0.1,
    horizontal_flip=True,
    vertical_flip=True,
)

In [None]:
# On va creer le modèle :
model = Sequential()

model.add(Conv2D(32, (3, 3), padding='same', input_shape=x_train.shape[1:], activation='relu'))
model.add(Conv2D(32, (3, 3), activation='relu'))
model.add(MaxPool2D(pool_size=(2, 2)))

model.add(Conv2D(64, (3, 3), padding='same', activation='relu'))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPool2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Conv2D(128, (3, 3), padding='same', activation='relu'))
model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(MaxPool2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dense(128, activation='relu'))
model.add(Dense(128, activation='relu'))
model.add(Dense(2, activation='softmax'))
model.summary()

In [None]:
from keras.callbacks import ReduceLROnPlateau
reduce_lr = ReduceLROnPlateau(
            monitor = "val_accuracy",
            factor=0.2,
            patience=2,
            min_lr=0.001)

from keras.callbacks import ModelCheckpoint
checkpointer = ModelCheckpoint(filepath='model.hdf5', verbose=1, save_best_only=True)


model.compile(optimizer="adam", loss='categorical_crossentropy', metrics=['accuracy'])

history = model.fit(datagen.flow(x_train, y_train, batch_size=1000), epochs=50,
                    validation_data=(x_val, y_val),
                    callbacks=[reduce_lr, checkpointer])

In [None]:
def plot_history(history):
    """
    plot l'accuracy et la loss
    """
    plt.plot(history.history['accuracy'])
    plt.plot(history.history['val_accuracy'])
    plt.title('model accuracy')
    plt.ylabel('accuracy')
    plt.xlabel('epoch')
    plt.legend(['train', 'val'], loc='upper left')
    plt.show()

    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title('model loss')
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.legend(['train', 'val'], loc='upper left')
    plt.show()
plot_history(history)


model.load_weights('model.hdf5')

model.evaluate(x_val,y_val)

In [None]:
resultat = model.predict(x_test)
resultat = np.argmax(resultat,axis = 1)
resultat = pd.Series(resultat,name="has_cactus")



df=pd.DataFrame({'id':path_list})
df['has_cactus']=resultat
df.to_csv("submission.csv",index=False)



In [None]:
print(os.listdir('.'))

In [None]:
shutil.rmtree("./train")
shutil.rmtree("./test")