In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt

import os
from pathlib import Path

## Preprocessing the dataset

Images need to go to a df, then convert the RGB channels into pixels.

In [2]:
dataset_route = "../GroceryStoreDataset/dataset/"
train_txt_route = "../GroceryStoreDataset/dataset/train.txt"
val_txt_route = "../GroceryStoreDataset/dataset/val.txt"
test_txt_route = "../GroceryStoreDataset/dataset/test.txt"

In [13]:
def process_txt(route):
    df = pd.read_csv(route, sep=",",header=None,names=["route", "fine", "coarse", 'tensor'])
    return df

In [14]:
df = process_txt(train_txt_route)
df.head()

Unnamed: 0,route,fine,coarse,tensor
0,train/Fruit/Apple/Golden-Delicious/Golden-Deli...,0,0,
1,train/Fruit/Apple/Golden-Delicious/Golden-Deli...,0,0,
2,train/Fruit/Apple/Golden-Delicious/Golden-Deli...,0,0,
3,train/Fruit/Apple/Golden-Delicious/Golden-Deli...,0,0,
4,train/Fruit/Apple/Golden-Delicious/Golden-Deli...,0,0,


In [15]:
df.shape

(2640, 4)

In [16]:
df_val = process_txt(val_txt_route)
df_val.head()

Unnamed: 0,route,fine,coarse,tensor
0,val/Fruit/Apple/Golden-Delicious/Golden-Delici...,0,0,
1,val/Fruit/Apple/Golden-Delicious/Golden-Delici...,0,0,
2,val/Fruit/Apple/Golden-Delicious/Golden-Delici...,0,0,
3,val/Fruit/Apple/Golden-Delicious/Golden-Delici...,0,0,
4,val/Fruit/Apple/Golden-Delicious/Golden-Delici...,0,0,


In [17]:
df_test = process_txt(test_txt_route)
df_test.head()

Unnamed: 0,route,fine,coarse,tensor
0,test/Fruit/Apple/Golden-Delicious/Golden-Delic...,0,0,
1,test/Fruit/Apple/Golden-Delicious/Golden-Delic...,0,0,
2,test/Fruit/Apple/Golden-Delicious/Golden-Delic...,0,0,
3,test/Fruit/Apple/Golden-Delicious/Golden-Delic...,0,0,
4,test/Fruit/Apple/Golden-Delicious/Golden-Delic...,0,0,


## From images to pixels

ResNet CNN uses a 224x224 pixels. Then, we must:

1. resize the images
2. convert them into a processable data for the cnn

In [68]:
def resize_image(route, size=(224,224)):
    img = tf.io.read_file(route)
    img = tf.image.decode_jpeg(img, channels=3)
    img_resized = tf.image.resize(img, [224, 224])
    img_normalised = img_resized / 255.0 # normalise data to improve performance and acc
    return img_normalised

In [69]:
df["tensor"] = df["route"].apply(lambda x: resize_image(os.path.join(dataset_route, x)))
df.head()

Unnamed: 0,route,fine,coarse,tensor
0,train/Fruit/Apple/Golden-Delicious/Golden-Deli...,0,0,"(((tf.Tensor(0.07058824, shape=(), dtype=float..."
1,train/Fruit/Apple/Golden-Delicious/Golden-Deli...,0,0,"(((tf.Tensor(0.5783479, shape=(), dtype=float3..."
2,train/Fruit/Apple/Golden-Delicious/Golden-Deli...,0,0,"(((tf.Tensor(0.34684873, shape=(), dtype=float..."
3,train/Fruit/Apple/Golden-Delicious/Golden-Deli...,0,0,"(((tf.Tensor(0.74612814, shape=(), dtype=float..."
4,train/Fruit/Apple/Golden-Delicious/Golden-Deli...,0,0,"(((tf.Tensor(0.394208, shape=(), dtype=float32..."


In [70]:
df_val["tensor"] = df_val["route"].apply(lambda x: resize_image(os.path.join(dataset_route, x)))
df_val.head()

Unnamed: 0,route,fine,coarse,tensor
0,val/Fruit/Apple/Golden-Delicious/Golden-Delici...,0,0,"(((tf.Tensor(0.44126683, shape=(), dtype=float..."
1,val/Fruit/Apple/Golden-Delicious/Golden-Delici...,0,0,"(((tf.Tensor(0.13513376, shape=(), dtype=float..."
2,val/Fruit/Apple/Golden-Delicious/Golden-Delici...,0,0,"(((tf.Tensor(0.15215617, shape=(), dtype=float..."
3,val/Fruit/Apple/Golden-Delicious/Golden-Delici...,0,0,"(((tf.Tensor(0.056168094, shape=(), dtype=floa..."
4,val/Fruit/Apple/Golden-Delicious/Golden-Delici...,0,0,"(((tf.Tensor(0.6362045, shape=(), dtype=float3..."


In [71]:
df_test["tensor"] = df_test["route"].apply(lambda x: resize_image(os.path.join(dataset_route, x)))
df_test.head()

Unnamed: 0,route,fine,coarse,tensor
0,test/Fruit/Apple/Golden-Delicious/Golden-Delic...,0,0,"(((tf.Tensor(0.5455182, shape=(), dtype=float3..."
1,test/Fruit/Apple/Golden-Delicious/Golden-Delic...,0,0,"(((tf.Tensor(0.5507003, shape=(), dtype=float3..."
2,test/Fruit/Apple/Golden-Delicious/Golden-Delic...,0,0,"(((tf.Tensor(0.2863658, shape=(), dtype=float3..."
3,test/Fruit/Apple/Golden-Delicious/Golden-Delic...,0,0,"(((tf.Tensor(0.42406806, shape=(), dtype=float..."
4,test/Fruit/Apple/Golden-Delicious/Golden-Delic...,0,0,"(((tf.Tensor(0.29791948, shape=(), dtype=float..."


## ResNet-34 - overfitted asf

No es útil porque hay demasiadas pocos datos. Y demasiadas capas. **El overfitting ocurre por cosas como estas**. Siempre, siempre, hay que buscar la cantidad de capas justa y necesaria (gasto y consumo computacional reducido aparte).

Mejor probamos con **MobileNetV2**.

[Link](https://www.analyticsvidhya.com/blog/2021/08/how-to-code-your-resnet-from-scratch-in-tensorflow/)

In [59]:
from tensorflow.keras.utils import to_categorical

X_train = np.stack(df["tensor"].values)
y_train = np.array(df["fine"].values)

X_val = np.stack(df_val["tensor"].values)
y_val = np.array(df_val["fine"].values)

X_test = np.stack(df_test["tensor"].values)
y_test = np.array(df_test["fine"].values)

# Suponiendo que tienes las etiquetas como números enteros
y_train = to_categorical(y_train, num_classes=81)
y_val = to_categorical(y_val, num_classes=81)
y_test = to_categorical(y_test, num_classes=81)

X_train.shape

(2640, 224, 224, 3)

In [15]:
from tensorflow.keras.applications import VGG16
from tensorflow.keras import layers, models

base_model = VGG16(input_shape=(224, 224, 3), include_top=False, weights='imagenet')
base_model.trainable = False  # Congelar capas para evitar sobreajuste

model = models.Sequential([
    base_model,
    layers.Flatten(),
    layers.Dense(512, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(43, activation='softmax')
])

model.compile(
    optimizer='adam',  # Optimizador recomendado
    loss='categorical_crossentropy',  # Para clasificación multiclase
    metrics=['accuracy']  # Para evaluar el rendimiento
)

model.summary()

In [None]:
model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_val, y_val)) overfitted asf

In [28]:
model.save('model.keras')  # Para Keras