In [1]:
# Import necessary libraries
import os
import numpy as np
import pandas as pd
from keras.preprocessing.image import ImageDataGenerator
from keras.preprocessing import image
from sklearn.model_selection import train_test_split
import tensorflow as tf



In [2]:
train_df = pd.read_csv("BTTAIxNYBG-train.csv")
validate_df = pd.read_csv("BTTAIxNYBG-validation.csv")
test_df = pd.read_csv("BTTAIxNYBG-test.csv")

In [3]:
# Load dataset & Define image directory
train_image_directory = 'BTTAIxNYBG-train/BTTAIxNYBG-train/'
validate_image_directory = 'BTTAIxNYBG-validation/BTTAIxNYBG-validation/' 

In [4]:
split_train_df=train_df[:1000]

In [5]:
# Preprocessing function to load and process images
def train_load_and_preprocess_image(filename, target_size=(256, 256)):
    img_path = os.path.join(train_image_directory, filename)
    img = image.load_img(img_path, target_size=target_size)
    img_array = image.img_to_array(img)
    img_array = np.expand_dims(img_array, axis=0)  # Model expects a batch of images
    return img_array / 255.0  # Normalize to [0, 1]

In [6]:
# Preprocessing function to load and process images
def validate_load_and_preprocess_image(filename, target_size=(256, 256)):
    img_path = os.path.join(validate_image_directory, filename)
    img = image.load_img(img_path, target_size=target_size)
    img_array = image.img_to_array(img)
    img_array = np.expand_dims(img_array, axis=0)  # Model expects a batch of images
    return img_array / 255.0  # Normalize to [0, 1]


In [7]:
# Apply preprocessing to all images
split_train_df['imageData'] = split_train_df['imageFile'].apply(train_load_and_preprocess_image)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  split_train_df['imageData'] = split_train_df['imageFile'].apply(train_load_and_preprocess_image)


In [8]:
validate_df['imageData'] = validate_df['imageFile'].apply(validate_load_and_preprocess_image)

In [9]:
# Split dataset into training and validation sets
### Note: This is a common step in ML training, but in this challenge, since the validation set is provided separately, there is no need to call this function to distinguish between validation and train set.
# train_df, validate_df = train_test_split(df, test_size=0.2, random_state=42)

# Data augmentation configuration for training
train_datagen = ImageDataGenerator(
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

# Note: No augmentation for validation data, only rescaling
validation_datagen = ImageDataGenerator(rescale=1./255)

datagen=ImageDataGenerator(rescale=1./255)

In [10]:
# Convert dataframe to a format suitable for the model training
def train_df_to_dataset(dataframe, datagen, batch_size=32):
    datagen.flow_from_dataframe(
        dataframe=dataframe,
        directory=train_image_directory,
        x_col='imageFile',
        y_col='classLabel',
        target_size=(256, 256),
        batch_size=batch_size,
        class_mode='categorical'  # Change this if not a multiclass classification
    )

def validate_df_to_dataset(dataframe, datagen, batch_size=32):
    datagen.flow_from_dataframe(
        dataframe=dataframe,
        directory=validate_image_directory,
        x_col='imageFile',
        y_col='classLabel',
        target_size=(256, 256),
        batch_size=batch_size,
        class_mode='categorical'  # Change this if not a multiclass classification
    )

In [11]:
# Create datasets for training and validation
train_dataset = train_df_to_dataset(split_train_df, datagen)
validation_dataset = validate_df_to_dataset(validate_df, datagen)

# This setup is now ready for training with model.fit using the train_dataset and validation_dataset

Found 1000 validated image filenames belonging to 10 classes.
Found 10244 validated image filenames belonging to 10 classes.


In [12]:
datagen=ImageDataGenerator(rescale=1./255.,validation_split=0.25)

In [13]:
train_generator=datagen.flow_from_dataframe(
    dataframe=split_train_df,
    directory=train_image_directory,
    x_col='imageFile',
    y_col='classLabel',
    subset="training",
    batch_size=32,
    seed=42,
    shuffle=True,
    class_mode="categorical",  
    target_size=(256, 256)
)

valid_generator=datagen.flow_from_dataframe(
    dataframe=validate_df,
    directory=validate_image_directory,
    x_col='imageFile',
    y_col='classLabel',
    subset="validation",
    batch_size=32,
    seed=42,
    shuffle=True,
    class_mode="categorical",
    target_size=(256, 256)
)

test_datagen=ImageDataGenerator(rescale=1./255.)

Found 750 validated image filenames belonging to 10 classes.
Found 2561 validated image filenames belonging to 10 classes.


In [14]:
train_generator

<keras.src.preprocessing.image.DataFrameIterator at 0x17537ff40>

In [15]:
train_dataset

## CNN

In [31]:
from keras.models import Sequential
from keras.layers import Dense, Activation, Flatten, Dropout, BatchNormalization
from keras.layers import Conv2D, MaxPooling2D
from keras import regularizers, optimizers

In [47]:
model = Sequential()
model.add(Conv2D(32, (3, 3), padding='same',
                 input_shape=(256,256,3)))
model.add(Activation('relu'))
model.add(Conv2D(32, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Conv2D(64, (3, 3), padding='same'))
model.add(Activation('relu'))
model.add(Conv2D(64, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(10, activation='softmax'))
# model.compile(optimizers.rmsprop(lr=0.0001, decay=1e-6),loss="categorical_crossentropy",metrics=["accuracy"])
model.summary()

Model: "sequential_7"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_28 (Conv2D)          (None, 256, 256, 32)      320       
                                                                 
 activation_25 (Activation)  (None, 256, 256, 32)      0         
                                                                 
 conv2d_29 (Conv2D)          (None, 254, 254, 32)      9248      
                                                                 
 activation_26 (Activation)  (None, 254, 254, 32)      0         
                                                                 
 max_pooling2d_15 (MaxPooli  (None, 127, 127, 32)      0         
 ng2D)                                                           
                                                                 
 dropout_15 (Dropout)        (None, 127, 127, 32)      0         
                                                      

In [41]:
# model = tf.keras.models.Sequential([
#     tf.keras.layers.RandomRotation(.25, input_shape=[256,256,1]),
#     tf.keras.layers.Conv2D(64, 7, padding="same", activation='relu'),
#     tf.keras.layers.MaxPooling2D(2),
#     tf.keras.layers.Conv2D(128, 3, padding="same", activation='relu'),
#     tf.keras.layers.Conv2D(128, 3, padding="same", activation='relu'),
#     tf.keras.layers.MaxPooling2D(2),
#     # tf.keras.layers.Conv2D(256, 3, padding="same", activation='relu'),
#     # tf.keras.layers.Conv2D(256, 3, padding="same", activation='relu'),
#     # tf.keras.layers.MaxPooling2D(2),
#     tf.keras.layers.Flatten(),
#     tf.keras.layers.Dense(128, activation='relu'),
#     tf.keras.layers.Dense(64, activation='relu'),
#     tf.keras.layers.Dense(10, activation='softmax')

# ])
# model.summary()

Model: "sequential_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 random_rotation_1 (RandomR  (None, 256, 256, 1)       0         
 otation)                                                        
                                                                 
 conv2d_21 (Conv2D)          (None, 256, 256, 64)      3200      
                                                                 
 max_pooling2d_11 (MaxPooli  (None, 128, 128, 64)      0         
 ng2D)                                                           
                                                                 
 conv2d_22 (Conv2D)          (None, 128, 128, 128)     73856     
                                                                 
 conv2d_23 (Conv2D)          (None, 128, 128, 128)     147584    
                                                                 
 max_pooling2d_12 (MaxPooli  (None, 64, 64, 128)      

In [51]:
model = Sequential()
model.add(Conv2D(256, kernel_size=(3,3), activation='relu',input_shape=(256,256,3)))
model.add(MaxPooling2D(2,2))
model.add(Conv2D(64, kernel_size=(5,5), activation='relu'))
model.add(MaxPooling2D(2,2))
model.add(Flatten())
model.add(Dense(16, activation='relu'))
model.add(Dense(3, activation='softmax'))

In [19]:
# STEP_SIZE_TRAIN=train_generator.n//train_generator.batch_size
# STEP_SIZE_VALID=valid_generator.n//valid_generator.batch_size
# model.fit_generator(generator=train_generator,
#                     steps_per_epoch=STEP_SIZE_TRAIN,
#                     validation_data=valid_generator,
#                     validation_steps=STEP_SIZE_VALID,
#                     epochs=10
# )

In [52]:
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [21]:
# feature_list = ['uniqueID', 'classLabel', 'source', 'imageFile', 'imageData']
# target = validate_df['classID']

In [22]:
# y = validate_df['classID']
# X = validate_df[feature_list]

In [23]:
# X_train, y_train = validation_dataset.drop('classID', axis=1), validation_dataset['classID']
# X_test, y_test = train_dataset.drop('classID', axis=1), train_dataset['classID']

In [24]:
# y = train_df['classID']
# X = train_df.drop(['classID'], axis=1)

In [25]:
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=1234)

In [26]:
# from fastai import *
# from fastai.vision import *
# from fastai.metrics import error_rate
# from fastai.vision.all import *
# import os

In [27]:
# train_path="BTTAIxNYBG-train.csv"
# valid_path="BTTAIxNYBG-validation.csv"

In [28]:
# dls = datablock.dataloaders([train_path, valid_path], splitter=GrandparentSplitter(train_name='train', valid_name='valid'))

# learn = cnn_learner(data, models.resnet34, metrics=[accuracy], model_dir = Path('../kaggle/working'),path = Path("."))
# learn = vision_learner(validate_df, models.resnet34, metrics=[accuracy])

In [35]:
model.fit(train_dataset, epochs=10, validation_data=(validation_dataset), batch_size=32)

ValueError: Failed to find data adapter that can handle input: <class 'NoneType'>, <class 'NoneType'>

In [53]:
model.fit(train_generator, epochs=10, validation_data=(valid_generator), batch_size=32)

Epoch 1/10


InvalidArgumentError: Graph execution error:

Detected at node sparse_categorical_crossentropy/SparseSoftmaxCrossEntropyWithLogits/SparseSoftmaxCrossEntropyWithLogits defined at (most recent call last):
  File "/Applications/Xcode.app/Contents/Developer/Library/Frameworks/Python3.framework/Versions/3.9/lib/python3.9/runpy.py", line 197, in _run_module_as_main

  File "/Applications/Xcode.app/Contents/Developer/Library/Frameworks/Python3.framework/Versions/3.9/lib/python3.9/runpy.py", line 87, in _run_code

  File "/Users/maryhe/Library/Python/3.9/lib/python/site-packages/ipykernel_launcher.py", line 17, in <module>

  File "/Users/maryhe/Library/Python/3.9/lib/python/site-packages/traitlets/config/application.py", line 1046, in launch_instance

  File "/Users/maryhe/Library/Python/3.9/lib/python/site-packages/ipykernel/kernelapp.py", line 736, in start

  File "/Users/maryhe/Library/Python/3.9/lib/python/site-packages/tornado/platform/asyncio.py", line 195, in start

  File "/Applications/Xcode.app/Contents/Developer/Library/Frameworks/Python3.framework/Versions/3.9/lib/python3.9/asyncio/base_events.py", line 596, in run_forever

  File "/Applications/Xcode.app/Contents/Developer/Library/Frameworks/Python3.framework/Versions/3.9/lib/python3.9/asyncio/base_events.py", line 1890, in _run_once

  File "/Applications/Xcode.app/Contents/Developer/Library/Frameworks/Python3.framework/Versions/3.9/lib/python3.9/asyncio/events.py", line 80, in _run

  File "/Users/maryhe/Library/Python/3.9/lib/python/site-packages/ipykernel/kernelbase.py", line 516, in dispatch_queue

  File "/Users/maryhe/Library/Python/3.9/lib/python/site-packages/ipykernel/kernelbase.py", line 505, in process_one

  File "/Users/maryhe/Library/Python/3.9/lib/python/site-packages/ipykernel/kernelbase.py", line 412, in dispatch_shell

  File "/Users/maryhe/Library/Python/3.9/lib/python/site-packages/ipykernel/kernelbase.py", line 740, in execute_request

  File "/Users/maryhe/Library/Python/3.9/lib/python/site-packages/ipykernel/ipkernel.py", line 422, in do_execute

  File "/Users/maryhe/Library/Python/3.9/lib/python/site-packages/ipykernel/zmqshell.py", line 546, in run_cell

  File "/Users/maryhe/Library/Python/3.9/lib/python/site-packages/IPython/core/interactiveshell.py", line 3024, in run_cell

  File "/Users/maryhe/Library/Python/3.9/lib/python/site-packages/IPython/core/interactiveshell.py", line 3079, in _run_cell

  File "/Users/maryhe/Library/Python/3.9/lib/python/site-packages/IPython/core/async_helpers.py", line 129, in _pseudo_sync_runner

  File "/Users/maryhe/Library/Python/3.9/lib/python/site-packages/IPython/core/interactiveshell.py", line 3284, in run_cell_async

  File "/Users/maryhe/Library/Python/3.9/lib/python/site-packages/IPython/core/interactiveshell.py", line 3466, in run_ast_nodes

  File "/Users/maryhe/Library/Python/3.9/lib/python/site-packages/IPython/core/interactiveshell.py", line 3526, in run_code

  File "/var/folders/t0/zr75ng851t33cs9hvh9_n1h00000gn/T/ipykernel_3106/631605186.py", line 1, in <module>

  File "/Users/maryhe/Library/Python/3.9/lib/python/site-packages/keras/src/utils/traceback_utils.py", line 65, in error_handler

  File "/Users/maryhe/Library/Python/3.9/lib/python/site-packages/keras/src/engine/training.py", line 1783, in fit

  File "/Users/maryhe/Library/Python/3.9/lib/python/site-packages/keras/src/engine/training.py", line 1377, in train_function

  File "/Users/maryhe/Library/Python/3.9/lib/python/site-packages/keras/src/engine/training.py", line 1360, in step_function

  File "/Users/maryhe/Library/Python/3.9/lib/python/site-packages/keras/src/engine/training.py", line 1349, in run_step

  File "/Users/maryhe/Library/Python/3.9/lib/python/site-packages/keras/src/engine/training.py", line 1127, in train_step

  File "/Users/maryhe/Library/Python/3.9/lib/python/site-packages/keras/src/engine/training.py", line 1185, in compute_loss

  File "/Users/maryhe/Library/Python/3.9/lib/python/site-packages/keras/src/engine/compile_utils.py", line 277, in __call__

  File "/Users/maryhe/Library/Python/3.9/lib/python/site-packages/keras/src/losses.py", line 143, in __call__

  File "/Users/maryhe/Library/Python/3.9/lib/python/site-packages/keras/src/losses.py", line 270, in call

  File "/Users/maryhe/Library/Python/3.9/lib/python/site-packages/keras/src/losses.py", line 2454, in sparse_categorical_crossentropy

  File "/Users/maryhe/Library/Python/3.9/lib/python/site-packages/keras/src/backend.py", line 5777, in sparse_categorical_crossentropy

logits and labels must have the same first dimension, got logits shape [14,3] and labels shape [140]
	 [[{{node sparse_categorical_crossentropy/SparseSoftmaxCrossEntropyWithLogits/SparseSoftmaxCrossEntropyWithLogits}}]] [Op:__inference_train_function_7416]

In [None]:
#y_proba = model.predict(test_df)

In [None]:
#df[['uniqueID', 'classID']].to_csv("output.csv")
#y_pred.to_csv("o")