In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
import os
import cv2
from tensorflow.keras.applications import VGG16, ResNet50
from tensorflow.keras.layers import Dense, Flatten, Input
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split



2023-12-04 15:46:39.775473: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [16]:
gpus = tf.config.experimental.list_physical_devices('GPU')
print("Num GPUs Available: ", len(gpus))
for gpu in gpus:
    tf.config.experimental.set_memory_growth(gpu, True)

Num GPUs Available:  0


In [2]:
raw_data = pd.read_csv('train.csv')
test_data = pd.read_csv('test.csv')

In [3]:
image_size = (224, 224)
train_images = []
train_ids = []
for i in range(len(raw_data)):
    if os.path.isfile('downloaded_images/' + str(i) + '.jpg'):
        image = cv2.imread('downloaded_images/' + str(i) + '.jpg')
        image = cv2.resize(image, image_size)
        train_images.append(image)
        train_ids.append(i)
    elif os.path.isfile('downloaded_images/' + str(i) + '.jpeg'):
        image = cv2.imread('downloaded_images/' + str(i) + '.jpeg')
        image = cv2.resize(image, image_size)
        train_images.append(image)
        train_ids.append(i)
    elif os.path.isfile('downloaded_images/' + str(i) + '.png'):
        image = cv2.imread('downloaded_images/' + str(i) + '.png')
        image = cv2.resize(image, image_size)
        train_images.append(image)
        train_ids.append(i)
    else:
        # print('Image ' + str(i) + ' not found')
        continue

print('Number of images found: ' + str(len(train_images)))

train_images = np.array(train_images / 255.0)

Premature end of JPEG file
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG


Number of images found: 15096


TypeError: unsupported operand type(s) for /: 'list' and 'float'

In [12]:
# save the images as numpy array
np.save('train_images.npy', train_images)
np.save('train_ids.npy', train_ids)

In [5]:
train_images = np.load('train_images.npy')
train_ids = np.load('train_ids.npy')

In [6]:
train_labels = raw_data['price'].values
train_labels = train_labels[train_ids]
y_cat = to_categorical(train_labels, num_classes = 6)  # Adjust labels for categorical crossentropy

In [7]:
train_images.shape, y_cat.shape

((15096, 224, 224, 3), (15096, 6))

In [8]:
X_train, X_test, y_train, y_test = train_test_split(train_images, y_cat, test_size = 0.2, random_state = 42)

In [9]:
# Load and configure the VGG16 model
vgg16 = VGG16(weights='imagenet', include_top=False, input_tensor=Input(shape=(224, 224, 3)))
vgg16.trainable = False  # Freeze the layers

# Load and configure the ResNet50 model
resnet50 = ResNet50(weights='imagenet', include_top=False, input_tensor=Input(shape=(224, 224, 3)))
resnet50.trainable = False  # Freeze the layers

# Combine features from both models
combined = tf.keras.layers.concatenate([vgg16.output, resnet50.output])

# Add custom layers
flat = Flatten()(combined)
dense1 = Dense(256, activation='relu')(flat)
output = Dense(6, activation='softmax')(dense1)

# Create the final model
model = Model(inputs=[vgg16.input, resnet50.input], outputs=output)

# Compile the model
model.compile(optimizer=Adam(learning_rate=0.0001), loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit([X_train, X_train], y_train, validation_split=0.2, epochs=10, batch_size=32)

# Evaluate the model
model.evaluate([X_test, X_test], y_test)

# Save the best model
model.save('model.h5')


Epoch 1/10

KeyboardInterrupt: 

Then, load the test dataset and apply the model to predict

In [None]:
# read the model
model = tf.keras.models.load_model('model.h5')

In [13]:
image_size = (224, 224)
test_images = []
test_ids = []
for i in range(len(test_data)):
    if os.path.isfile('test_images/' + str(i) + '.jpg'):
        image = cv2.imread('test_images/' + str(i) + '.jpg')
        image = cv2.resize(image, image_size)
        test_images.append(image)
        test_ids.append(i)
    elif os.path.isfile('test_images/' + str(i) + '.jpeg'):
        image = cv2.imread('test_images/' + str(i) + '.jpeg')
        image = cv2.resize(image, image_size)
        test_images.append(image)
        test_ids.append(i)
    elif os.path.isfile('test_images/' + str(i) + '.png'):
        image = cv2.imread('test_images/' + str(i) + '.png')
        image = cv2.resize(image, image_size)
        test_images.append(image)
        test_ids.append(i)
    else:
        # print('Image ' + str(i) + ' not found')
        continue

test_images = np.array(test_images / 255.0)

Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG
Invalid SOS parameters for sequential JPEG


In [14]:
# save the images as numpy array
np.save('test_images.npy', test_images)
np.save('test_ids.npy', test_ids)

In [None]:
test_images.shape, len(test_ids)

In [None]:
y_pred = model.predict([test_images, test_images])
y_pred = np.argmax(y_pred, axis=1)

In [None]:
# save the prediction to csv file
ids = test_data['id'].values

submission = pd.DataFrame({'id': ids,
                           'price': [None] * len(ids)})
for i in range(len(y_pred)):
    submission['price'][test_ids[i]] = y_pred[i]
for j in range(len(ids)):
    if submission['price'][j] is None:
        submission['price'][j] = 1

submission.to_csv('submission.csv', index=False)