In [13]:
import numpy as np
import pandas as pd
import tensorflow as tf
import os
from tqdm import tqdm
from PIL import Image
    

In [8]:
train = pd.read_csv('train.csv')
val = pd.read_csv('val.csv')
test = pd.read_csv('test.csv')

train_images_path = './train_images'
val_images_path = './val_images'
test_images_path = './test_images'

In [6]:
print(len(train), len(val), len(test))
print(train.head())

10000 3000 2000
                                     id  \
0  417812c5-0ce4-499d-b97d-4d28827239bc   
1  5ac91fa3-55f2-4cb3-8c8f-ad84f78e6b36   
2  d2705b90-8347-4cab-a7a6-654540d9a489   
3  a3b33fe7-3085-4433-9c18-8814803891b4   
4  1514b0e4-0665-45bc-ab32-52fce326cc29   

                                             caption  image_id  label  
0  Wet elephants shake water onto people bathing ...    394330      0  
1       Two men holding tennis racquets on the court    130849      0  
2  A bird on a tree limb with mountains in the ba...    514790      0  
3  A kitchen and dining room are featured along w...    182096      0  
4     A fruit stand has various fruits on the table.     68788      1  


### Loading images...

In [17]:
folders = ["train_images", "val_images", "test_images"]
image_data = []
size_not = []
grayscale = []
for folder in folders:
    files = [f for f in os.listdir(folder) if f.endswith('.jpg')]
    for file_name in tqdm(files, desc = f"Loading images from folder {folder}"):
        file_path = os.path.join(folder, file_name)

        img = Image.open(file_path) #open the image
        
        if img.mode != "RGB":
            img = img.convert("RGB")
            
        img_arr = np.array(img)
        image_data.append(img_arr)
            


Loading images from folder train_images: 100%|██████████| 9463/9463 [00:02<00:00, 3241.18it/s]
Loading images from folder val_images: 100%|██████████| 2957/2957 [00:00<00:00, 3246.37it/s]
Loading images from folder test_images: 100%|██████████| 1978/1978 [00:00<00:00, 3232.63it/s]


Processed images shape: (14398, 100, 100, 3)


In [18]:
image_data = np.array(image_data)
print("Loaded images shape:", image_data.shape)

Loaded images shape: (14398, 100, 100, 3)


In [20]:
from keras.models import Sequential
from keras.layers import Dense, Conv2D, Flatten, MaxPooling2D, Dropout

def build_model():
    ### 3.1. create the model
    model = Sequential()
    
    ### 3.2. add the layers
    
    # L1: add a CONV layer with 32 filters, kernel size 3, padding same, activation relu 
    model.add(Conv2D(32, kernel_size=3, padding='same', activation='relu', input_shape=(100, 100, 3)))
    
    # L2: add a CONV layer with 32 filters, kernel size 3, activation relu
    model.add(Conv2D(32, kernel_size=3, activation='relu'))
    
    # L3: add a Max Pooling layer, pool size 2x2
    model.add(MaxPooling2D(pool_size=2))
    
    # L4: add a Dropout layer, drop 1/4 of the neurons
    model.add(Dropout(0.25))
    
    # L5: add a CONV layer with 64 filters, kernel size 3, padding same, activation relu
    model.add(Conv2D(64, kernel_size=3, padding='same', activation='relu'))
    
    # L6: add a CONV layer with 64 filters, kernel size 3, activation relu
    model.add(Conv2D(64, kernel_size=3, activation='relu'))
    
    # L7: add a Max Pooling layer with pool size 2x2
    model.add(MaxPooling2D(pool_size=2))
    
    # L8: add a Dropout layer; drop 1/4 of the neurons
    model.add(Dropout(0.25))
    
    # L9: add a Flatten layer
    model.add(Flatten())
    
    # L10: add a Dense layer with 512 neurons and activation relu
    model.add(Dense(1024, activation='relu'))
    
    

    return model

model = build_model()
model.summary()


Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_4 (Conv2D)           (None, 100, 100, 32)      896       
                                                                 
 conv2d_5 (Conv2D)           (None, 98, 98, 32)        9248      
                                                                 
 max_pooling2d_2 (MaxPoolin  (None, 49, 49, 32)        0         
 g2D)                                                            
                                                                 
 dropout_3 (Dropout)         (None, 49, 49, 32)        0         
                                                                 
 conv2d_6 (Conv2D)           (None, 49, 49, 64)        18496     
                                                                 
 conv2d_7 (Conv2D)           (None, 47, 47, 64)        36928     
                                                      

In [21]:
feature_extractor = Sequential(model.layers[:-1])  # Exclude last layer
features = feature_extractor.predict(image_data)
print("Extracted features shape:", features.shape)

 97/450 [=====>........................] - ETA: 14s

KeyboardInterrupt: 

In [22]:

print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))

Num GPUs Available:  0
