In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
import keras
import cv2
from sklearn.preprocessing import OneHotEncoder
import os

In [2]:
def processData():
    trainDataset = pd.read_csv('train_labels.csv').to_numpy()
    ohe = OneHotEncoder(sparse_output=False)

    yTrain = ohe.fit_transform(np.expand_dims(trainDataset[:, 1], -1))
    
    xTrain = trainDataset[:, 0]
    xTest = np.array(os.listdir('test'))

    def fetchImgs(paths, subset):
        images = []
        for path in paths:
            img = cv2.imread(os.path.join(subset, path))
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            img = cv2.resize(img, (128, 128))
            img = keras.applications.convnext.preprocess_input(img)
        
            images.append(img)
            
        return np.array(images)
    
    xTrain = fetchImgs(xTrain, 'train')
    xTest = fetchImgs(xTest, 'test')


    return xTrain, yTrain, xTest, ohe

xTrain, yTrain, xTest, OHE = processData()


In [None]:

def createModel(inputShape=(128, 128, 3), numClasses=7):
    baseModel = keras.applications.ConvNeXtXLarge(include_top=False, weights='imagenet', input_shape=inputShape)
    baseModel.trainable = False 

    

    x = keras.layers.GlobalAveragePooling2D()(baseModel.output)
    x = keras.layers.Dense(128, activation='relu')(x)
    outputs = keras.layers.Dense(numClasses, activation='softmax')(x)

    return keras.Model(inputs=baseModel.input, outputs=outputs)

model = createModel()
model.compile(optimizer=keras.optimizers.Adam(1e-5), loss='categorical_crossentropy', metrics=['accuracy'])



Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/convnext/convnext_small_notop.h5
[1m198551472/198551472[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 0us/step


In [4]:
history = model.fit(
    xTrain, yTrain, 
    batch_size=8, 
    epochs=150, 
    verbose=1, 
    validation_split=0.2,
)

Epoch 1/150
[1m40/69[0m [32m━━━━━━━━━━━[0m[37m━━━━━━━━━[0m [1m2:55[0m 6s/step - accuracy: 0.3646 - loss: 1.8957

KeyboardInterrupt: 

In [None]:
import matplotlib.pyplot as plt
for feature in history.history.keys():
    if 'val_' in feature:
        break
    plt.plot(history.history[feature], 'r', label=f'Training {feature.capitalize()}')
    plt.plot(history.history[f'val_{feature}'], 'b', label=f'Validation {feature.capitalize()}')
    plt.title('Training and Validation ' + feature.capitalize())
    plt.xlabel('Epoch')
    plt.ylabel(feature.capitalize())
    plt.legend()
    plt.show()

In [None]:
from sklearn.metrics import f1_score
f1_score([np.argmax(i) for i in yTrain], [np.argmax(i) for i in model.predict(xTrain)], average='macro')

In [None]:
testPreds = []
for i in model.predict(xTest):
    i = np.where(i==i.max(), 1, 0)

    testPreds.append(i)

testPreds = OHE.inverse_transform(testPreds)

In [6]:
fileNames = os.listdir('test')
fileNames

['0306fa89.jpg',
 '0345e3ca.jpg',
 '0551a473.jpg',
 '06e1783d.jpg',
 '08b9981b.jpg',
 '0cfc6a1e.jpg',
 '100bd43a.jpg',
 '1081d46a.jpg',
 '10e27bf7.jpg',
 '11bc3308.jpg',
 '11dbafc4.jpg',
 '1432d91c.jpg',
 '1558450b.jpg',
 '163f0957.jpg',
 '167094c8.jpg',
 '16c511c0.jpg',
 '16d11d2d.jpg',
 '175fdb7a.jpg',
 '183485e0.jpg',
 '18a45994.jpg',
 '21f0f191.jpg',
 '226dc6ee.jpg',
 '2437c1aa.jpg',
 '247fb4df.jpg',
 '24f34b9e.jpg',
 '2735eb07.jpg',
 '277c7676.jpg',
 '27ca08cb.jpg',
 '2b5fa46a.jpg',
 '2ebc35e2.jpg',
 '30503f9d.jpg',
 '33b770d0.jpg',
 '33db0590.jpg',
 '34ad5737.jpg',
 '34e2f10c.jpg',
 '385082b9.jpg',
 '3b138401.jpg',
 '3be8b97b.jpg',
 '3c5436f0.jpg',
 '3f4cd7c8.jpg',
 '417f9c7d.jpg',
 '449c69af.jpg',
 '4527e608.jpg',
 '47385f1a.jpg',
 '49c7de85.jpg',
 '4a63e293.jpg',
 '4c9138a1.jpg',
 '508df02b.jpg',
 '50ce0b34.jpg',
 '5334f219.jpg',
 '5550a0fd.jpg',
 '56728cf9.jpg',
 '596f77ab.jpg',
 '5a59b831.jpg',
 '5b1a69df.jpg',
 '5b376777.jpg',
 '5d3b3719.jpg',
 '5e5865b3.jpg',
 '6059f28e.jpg

In [None]:
combined = zip(fileNames, testPreds)

In [None]:
import csv

with open('predictions.csv', 'w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(['filename', 'label'])
    for index, pred in combined:
        writer.writerow([index, pred[0]])