# NYBG DATA

In [None]:
! pip install tensorflow
! pip install numpy
! pip install matplotlib
! pip install pandas
! pip install scikit-learn
! pip install opencv-python

In [23]:
# Import necessary libraries
#pip3 install tensorflow
import tensorflow
import os
import numpy as np
import pandas as pd
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.preprocessing import image
from sklearn.model_selection import train_test_split
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.models import Model

In [24]:
filename_train = "./bttai-nybg-2024/BTTAIxNYBG-train.csv"
df_train = pd.read_csv(filename_train, header = 0)
filename_test = "./bttai-nybg-2024/BTTAIxNYBG-test.csv"
df_test = pd.read_csv(filename_test, header = 0)
filename_val = "./bttai-nybg-2024/BTTAIxNYBG-validation.csv"
df_val = pd.read_csv(filename_val, header = 0)


## Provided Example Starting Notebook

In [25]:
classes = list(df_train["classLabel"].unique())
classes

['occluded-specimens',
 'microscope-slides',
 'illustrations-color',
 'animal-specimens',
 'live-plants',
 'biocultural-specimens',
 'illustrations-gray',
 'mixed-pressed-specimens',
 'ordinary-pressed-specimens',
 'micrographs-transmission-light']

In [26]:
train_image_directory = "./bttai-nybg-2024/BTTAIxNYBG-train/BTTAIxNYBG-train"
validation_image_directory = "./bttai-nybg-2024/BTTAIxNYBG-validation/BTTAIxNYBG-validation"

In [27]:
# Data generators
datagen = ImageDataGenerator(rescale=1./255)


In [28]:
train_generator = datagen.flow_from_dataframe(
    dataframe=df_train,
    directory=train_image_directory,
    x_col='imageFile',
    y_col='classLabel',
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical')

Found 81946 validated image filenames belonging to 10 classes.


In [29]:
validation_generator = datagen.flow_from_dataframe(
    dataframe=df_val,
    directory=validation_image_directory,
    x_col='imageFile',
    y_col='classLabel',
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical')

Found 10244 validated image filenames belonging to 10 classes.


In [30]:
# Load pre-trained ResNet50 model
base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))




Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5


In [31]:
# Freeze the layers of the base model
for layer in base_model.layers:
    layer.trainable = False

In [32]:
# Add custom layers on top
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(1024, activation='relu')(x)
predictions = Dense(len(classes), activation='softmax')(x)

In [33]:
# Compile the model
model = Model(inputs=base_model.input, outputs=predictions)
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])





In [34]:
# Train the model
history = model.fit(
    train_generator,
    epochs=10,
    validation_data=validation_generator)

Epoch 1/10


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [36]:
df_train.columns

Index(['uniqueID', 'classLabel', 'classID', 'source', 'imageFile'], dtype='object')

In [39]:
df_test.columns

Index(['uniqueID', 'imageFile'], dtype='object')

In [41]:
test_datagen = ImageDataGenerator(rescale=1./255)

test_generator = test_datagen.flow_from_dataframe(
    dataframe=df_test,
    directory="./bttai-nybg-2024/BTTAIxNYBG-test/BTTAIxNYBG-test",
    x_col="imageFile",
    y_col=None,
    target_size=(224, 224),
    batch_size=32,
    class_mode=None,
    shuffle=False)  # No need to shuffle test data


Found 30690 validated image filenames.


In [42]:
loss, accuracy = model.evaluate(test_generator)



In [43]:
print(f"Test loss: {loss}")
print(f"Test accuracy: {accuracy}")

Test loss: 0.0
Test accuracy: 0.0


In [44]:
predictions = model.predict(test_generator)
predicted_classes = np.argmax(predictions, axis=1)



In [45]:
submission_df = pd.DataFrame({
    'uniqueID': df_test['uniqueID'],
    'classID': predicted_classes
})

In [49]:
submission_df.head()

Unnamed: 0,uniqueID,classID
0,1,1
1,9,9
2,10,4
3,14,1
4,16,6


In [46]:
submission_df.to_csv('submission.csv', index=False)

In [47]:
! kaggle competitions submit -c bttai-nybg-2024 -f submission.csv -m "Starflowers-NYC (Using RV-10Epochs)"

Successfully submitted to BTTAI x NYBG Spring 2024 AI Studio



  0%|          | 0.00/273k [00:00<?, ?B/s]
  6%|▌         | 16.0k/273k [00:00<00:05, 50.1kB/s]
 70%|███████   | 192k/273k [00:00<00:00, 567kB/s]  
100%|██████████| 273k/273k [00:01<00:00, 278kB/s]


In [50]:
model.save("./models/resnet50GroundZero")

INFO:tensorflow:Assets written to: ./models/resnet50GroundZero\assets


INFO:tensorflow:Assets written to: ./models/resnet50GroundZero\assets
