# iMet Collection 2020
### Kaggle Competition

## Import & Clean Data

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.image as mpimg

In [None]:
df_labels = pd.read_csv("../input/imet-2020-fgvc7/labels.csv")
df_test = pd.read_csv("../input/imet-2020-fgvc7/sample_submission.csv")
df_train = pd.read_csv("../input/imet-2020-fgvc7/train.csv")

print(df_labels.head())
print(df_train.head())

In [None]:
df_train["attribute_ids"]=df_train["attribute_ids"].apply(lambda x:x.split(" "))
df_train["id"]=df_train["id"].apply(lambda x:x + ".png")
print(df_train.head())

## Generate Train and Valid Generators

In [None]:
TRAIN_DIR='../input/imet-2020-fgvc7/train/'
image_size = 128
num_classes = 3471
BATCH_SIZE=512
EPOCHS=6

In [None]:
import numpy as np
from tensorflow.keras.applications.resnet50 import preprocess_input
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.preprocessing.image import load_img, img_to_array, ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, GlobalAveragePooling2D


labels = df_labels.attribute_id.to_list()

data_generator = ImageDataGenerator(preprocess_input, validation_split=0.01)

train_generator = data_generator.flow_from_dataframe(
                                        dataframe=df_train,
                                        directory=TRAIN_DIR,
                                        x_col='id',
                                        y_col='attribute_ids',
                                        seed=1,
                                        target_size=(image_size, image_size),
                                        batch_size=BATCH_SIZE,
                                        class_mode="categorical",                                        
                                        subset='training')

validation_generator = data_generator.flow_from_dataframe(
                                        dataframe=df_train,
                                        directory=TRAIN_DIR,
                                        x_col='id',
                                        y_col='attribute_ids',
                                        seed=1,
                                        target_size=(image_size, image_size),
                                        batch_size=BATCH_SIZE,
                                        class_mode="categorical",
                                        subset='validation')

## Model Generation & Fit

In [None]:
model = Sequential()
model.add(ResNet50(include_top=False, pooling='avg'))
model.add(Dense(num_classes, activation='sigmoid'))
model.layers[0].trainable = False 

model.compile(optimizer='adam',loss='binary_crossentropy', metrics=['accuracy'])

STEP_SIZE_TRAIN=train_generator.n//train_generator.batch_size
STEP_SIZE_VALID=validation_generator.n//validation_generator.batch_size

fit_stats = model.fit_generator(generator=train_generator, epochs=EPOCHS,validation_data=validation_generator, 
                                validation_steps=STEP_SIZE_VALID, steps_per_epoch=STEP_SIZE_TRAIN)

## Test Generator

In [None]:
TEST_DIR='../input/imet-2020-fgvc7/test/'
df_test = pd.read_csv("../input/imet-2020-fgvc7/sample_submission.csv")

df_test["id"] = df_test["id"].apply(lambda x:x + ".png")

test_datagen = ImageDataGenerator(preprocess_input)

test_generator = test_datagen.flow_from_dataframe(
                                                dataframe=df_test,
                                                directory=TEST_DIR,
                                                x_col="id",
                                                batch_size=1,
                                                seed=1,
                                                shuffle=False,
                                                class_mode=None,
                                                target_size=(image_size, image_size))


In [None]:
STEP_SIZE_TEST=test_generator.n//test_generator.batch_size
test_generator.reset()

predictions=model.predict_generator(test_generator,steps=STEP_SIZE_TEST, verbose=1)

## Test Prediction

In [None]:
pred_bol = (predictions > 0.1)  # threshold to be changed depending on amount of tags to generate

labels = (train_generator.class_indices)
labels = dict((v,k) for k,v in labels.items())
final_predictions = []

for row in pred_bol:
    l=[]
    
    for index,cls in enumerate(row):
        if cls:
            l.append(labels[index])
    
    final_predictions.append(" ".join(l))


## Submission

In [None]:
filenames=df_test.id
submission = pd.DataFrame(data={'id': filenames, 'attribute_ids': final_predictions})
submission["id"] = submission["id"].apply(lambda x: x.replace(".png", ""))

submission.to_csv("submission.csv", index=False)