In [None]:
import os
import cv2
import numpy as np
import pandas as pd
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split

In [None]:
img_size = 100
num_classes = len(os.listdir("data/train"))

In [None]:
train_data = []
train_labels = []
class_names = sorted(os.listdir("data/train"))
for class_name in class_names:
    class_path = os.path.join("data/train", class_name)
    for img_name in os.listdir(class_path):
        img_path = os.path.join(class_path, img_name)
        img = cv2.imread(img_path)
        img = cv2.resize(img, (img_size, img_size))
        train_data.append(img)
        train_labels.append(class_names.index(class_name))


test_data = []
file_names = []
test_class_names = sorted(os.listdir("data/test"))
for class_name in test_class_names:
    class_path = os.path.join("data/test", class_name)
    for img_name in os.listdir(class_path):
        img_path = os.path.join(class_path, img_name)
        img = cv2.imread(img_path)
        img = cv2.resize(img, (img_size, img_size))
        test_data.append(img)
        file_names.append(f"{class_name}/{img_name}")


train_data = np.array(train_data)
train_data = train_data.astype('float32') / 255.0
train_labels = to_categorical(train_labels, num_classes)
test_data = np.array(test_data)
test_data = test_data.astype('float32') / 255.0

In [None]:
model = Sequential()
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(img_size, img_size, 3)))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dense(num_classes, activation='softmax'))

In [None]:
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
model.fit(train_data, train_labels, epochs=10, batch_size=32)

In [None]:
predictions = model.predict(test_data)


predicted_species = [class_names[np.argmax(prediction)] for prediction in predictions]


predictions_df = pd.DataFrame({'file': file_names, 'species': predicted_species})
predictions_df.to_csv('predictions.csv', index=False)