# Object Classifier

## Process images

### Extract data from .7z files

**Instructions**

1. Download the dataset from the following link: https://www.kaggle.com/c/cifar-10
2. Extract the dataset into a directory named `data`.
3. Run this notebook.


In [2]:
from zipfile import ZipFile
dataset = ZipFile('data/cifar-10.zip')
dataset.extractall(path='data')
dataset.close()
print("Dataset extracted successfully")

Dataset extracted successfully


In [3]:
import py7zr

archive = py7zr.SevenZipFile('data/train.7z', mode='r')
archive.extractall(path='data')
archive.close()
train_file_location='data/train'

archive = py7zr.SevenZipFile('data/test.7z', mode='r')
archive.extractall(path='data')
archive.close()
test_file_location='data/test'


In [1]:
#imports
import os
import numpy as np
import pandas as pd
from PIL import Image
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from sklearn.model_selection import train_test_split

In [2]:
filenames = os.listdir("data/train")
print(len(filenames))

50000


### Process Data

In [3]:
def get_image_as_array(path):
    try:
        image = Image.open(path)
        return np.array(image)
    except:
        return None

df = pd.read_csv('data/trainLabels.csv')
df['image'] = df['id'].apply(lambda x: get_image_as_array(f'data/train/{x}.png'))





In [4]:
df.head()

Unnamed: 0,id,label,image
0,1,frog,"[[[59, 62, 63], [43, 46, 45], [50, 48, 43], [6..."
1,2,truck,"[[[154, 177, 187], [126, 137, 136], [105, 104,..."
2,3,truck,"[[[255, 255, 255], [253, 253, 253], [253, 253,..."
3,4,deer,"[[[28, 25, 10], [37, 34, 19], [38, 35, 20], [4..."
4,5,automobile,"[[[170, 180, 198], [168, 178, 196], [177, 185,..."


In [5]:
df.dropna(inplace=True, subset=['image'])

In [6]:
df.shape

(50000, 3)

In [7]:
object_classes = df['label'].unique()
print(object_classes)

['frog' 'truck' 'deer' 'automobile' 'bird' 'horse' 'ship' 'cat' 'dog'
 'airplane']


In [8]:

# For reference
entries_per_class = df['label'].value_counts()
print(entries_per_class) # no need to standardize or scale

label
frog          5000
truck         5000
deer          5000
automobile    5000
bird          5000
horse         5000
ship          5000
cat           5000
dog           5000
airplane      5000
Name: count, dtype: int64


In [9]:
# labels_dictionary = dict(enumerate(object_classes))
# print(labels_dictionary)
labels_map = {}
for index, item in enumerate(object_classes):
    labels_map[item] = index

print(labels_map)

{'frog': 0, 'truck': 1, 'deer': 2, 'automobile': 3, 'bird': 4, 'horse': 5, 'ship': 6, 'cat': 7, 'dog': 8, 'airplane': 9}


In [10]:
df['label'] = df['label'].map(labels_map)
df['image'] = df['image']/255
df.head()
df.shape

(50000, 3)

In [11]:
num_rows_80_percent = int(len(df) * 0.8)

train = df.iloc[:num_rows_80_percent]
test = df.iloc[num_rows_80_percent:]

print(train.shape)
print(test.shape)


(40000, 3)
(10000, 3)


In [12]:
train.head()

Unnamed: 0,id,label,image
0,1,0,"[[[0.23137254901960785, 0.24313725490196078, 0..."
1,2,1,"[[[0.6039215686274509, 0.6941176470588235, 0.7..."
2,3,1,"[[[1.0, 1.0, 1.0], [0.9921568627450981, 0.9921..."
3,4,2,"[[[0.10980392156862745, 0.09803921568627451, 0..."
4,5,3,"[[[0.6666666666666666, 0.7058823529411765, 0.7..."


In [13]:
X_train = train['image']
y_train = train['label']

X_test = test['image']
y_test = test['label']






In [14]:
X_train = np.stack(X_train).astype("float32")   # shape: (N, 32, 32, 3)
X_test  = np.stack(X_test).astype("float32")

y_train = np.array(y_train, dtype="int32")      # shape: (N,)
y_test  = np.array(y_test, dtype="int32")

print("X_train.shape: ", X_train.shape)
print("y_train.shape: ", y_train.shape)

print("X_test.shape: ", X_test.shape)
print("y_test.shape: ", y_test.shape)





X_train.shape:  (40000, 32, 32, 3)
y_train.shape:  (40000,)
X_test.shape:  (10000, 32, 32, 3)
y_test.shape:  (10000,)


## Create Neural Net

We shall create the Neural Network

In [15]:
import tensorflow as tf
from tensorflow import keras





#### Setting up layers

In [16]:
num_of_classes = len(object_classes)

#setting up layers

inputs = keras.Input(shape=(32, 32, 3))
x = keras.layers.Flatten()(inputs)
x = keras.layers.Dense(64, activation='relu')(x)
outputs = keras.layers.Dense(num_of_classes, activation='softmax')(x)

model = keras.Model(inputs=inputs, outputs=outputs)

In [17]:
model.compile(
    optimizer='adam', 
    loss='sparse_categorical_crossentropy', # The data is not one hot encoded
    metrics=['accuracy'])

In [18]:
history=model.fit(X_train,y_train, validation_split=0.2, epochs=10)

Epoch 1/10
[1m1000/1000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 935us/step - accuracy: 0.2449 - loss: 2.0717 - val_accuracy: 0.3244 - val_loss: 1.9064
Epoch 2/10
[1m1000/1000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 874us/step - accuracy: 0.3469 - loss: 1.8245 - val_accuracy: 0.3735 - val_loss: 1.7683
Epoch 3/10
[1m1000/1000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 837us/step - accuracy: 0.3716 - loss: 1.7528 - val_accuracy: 0.3823 - val_loss: 1.7360
Epoch 4/10
[1m1000/1000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 932us/step - accuracy: 0.3768 - loss: 1.7350 - val_accuracy: 0.3681 - val_loss: 1.7610
Epoch 5/10
[1m1000/1000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 960us/step - accuracy: 0.3921 - loss: 1.6986 - val_accuracy: 0.3853 - val_loss: 1.7352
Epoch 6/10
[1m1000/1000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 934us/step - accuracy: 0.3992 - loss: 1.6756 - val_accuracy: 0.3835 - val_loss: 1.7394
Epoc

We can see that the accuracy of the model is really low

## Using Transfer Learning

In [19]:
print(history)

<keras.src.callbacks.history.History object at 0x38a3fb4c0>


In [20]:
from tensorflow.keras import Sequential, models, layers, optimizers
from tensorflow.keras.layers import Dense, Dropout, Flatten, BatchNormalization
from tensorflow.keras.models import load_model, Model
from tensorflow.keras.applications.resnet50 import ResNet50




In [21]:
convolutional_base = ResNet50(weights='imagenet', include_top=False, input_shape=(256, 256, 3))

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m94765736/94765736[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 0us/step


In [23]:
model = models.Sequential()
model.add(layers.UpSampling2D((2, 2)))
model.add(layers.UpSampling2D((2, 2)))
model.add(layers.UpSampling2D((2, 2)))
model.add(convolutional_base) #makes sure all the processed values are normalized
model.add(layers.Flatten())
model.add(layers.Dense(128, activation='relu'))
model.add(layers.Dropout(0.5)) # to prevent overfitting
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dropout(0.5))
model.add(layers.Dense(num_of_classes, activation='softmax'))




In [29]:
model.compile(optimizer=optimizers.RMSprop(learning_rate=2e-5), loss='sparse_categorical_crossentropy', metrics=['accuracy'])


In [30]:
history = model.fit(X_train, y_train,  validation_split=0.1, epochs=5)

Epoch 1/5
[1m1125/1125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4197s[0m 4s/step - accuracy: 0.2461 - loss: 2.2264 - val_accuracy: 0.6595 - val_loss: 1.1433
Epoch 2/5
[1m1125/1125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3974s[0m 4s/step - accuracy: 0.5240 - loss: 1.4019 - val_accuracy: 0.8450 - val_loss: 0.5678
Epoch 3/5
[1m1125/1125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3677s[0m 3s/step - accuracy: 0.6806 - loss: 0.9753 - val_accuracy: 0.8928 - val_loss: 0.3875
Epoch 4/5
[1m1125/1125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3704s[0m 3s/step - accuracy: 0.7912 - loss: 0.6614 - val_accuracy: 0.9087 - val_loss: 0.3913
Epoch 5/5
[1m1125/1125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3667s[0m 3s/step - accuracy: 0.8560 - loss: 0.4837 - val_accuracy: 0.9162 - val_loss: 0.4114


## Building a Predictive System
Upload an image to classify using the trained model.

In [32]:
# Upload and predict on a single image (32x32x3 normalized)
import io
from PIL import Image
import numpy as np
import ipywidgets as widgets
from IPython.display import display, clear_output

# Build inverse label map using existing `labels_map`
try:
    inv_labels_map = {v: k for k, v in labels_map.items()}
except Exception as e:
    raise RuntimeError("labels_map is not defined above. Please run the data prep cells first.")

out = widgets.Output()

uploader = widgets.FileUpload(
    accept='.png,.jpg,.jpeg',
    multiple=False,
    description='Upload image'
)

predict_btn = widgets.Button(
    description='Predict',
    button_style='primary',
    tooltip='Run prediction on the uploaded image'
)

img_preview = widgets.Image(format='png')


def preprocess_image(img: Image.Image) -> np.ndarray:
    # Ensure RGB, resize to 32x32, normalize to [0,1], add batch dim
    img = img.convert('RGB')
    img = img.resize((32, 32))
    arr = np.array(img).astype('float32') / 255.0
    arr = np.expand_dims(arr, axis=0)
    return arr


def _get_uploaded_content(upload_widget: widgets.FileUpload):
    v = upload_widget.value
    # ipywidgets <8: dict-like {filename: {content: bytes, ...}}
    if isinstance(v, dict):
        first = next(iter(v.values()))
        return first.get('content', None)
    # ipywidgets >=8: tuple/list of UploadedFile (dict-like or object with .content)
    if isinstance(v, (tuple, list)) and len(v) > 0:
        first = v[0]
        if isinstance(first, dict):
            return first.get('content', None)
        # Bunch/SimpleNamespace-like with attribute access
        return getattr(first, 'content', None)
    return None


def on_predict_clicked(_):
    with out:
        clear_output()
        if not uploader.value:
            print('Please upload an image first.')
            return
        file_content = _get_uploaded_content(uploader)
        if file_content is None:
            print('Could not read uploaded file content. Please try another image.')
            return
        try:
            pil_img = Image.open(io.BytesIO(file_content))
        except Exception as e:
            print('Failed to read image:', e)
            return

        # Show preview
        display(pil_img)

        # Preprocess
        x = preprocess_image(pil_img)

        # Ensure model is available
        try:
            m = model
        except NameError:
            print('Model is not defined. Please run the training cells first.')
            return

        # Predict
        preds = m.predict(x)
        if preds.ndim == 2 and preds.shape[0] == 1:
            probs = preds[0]
            pred_idx = int(np.argmax(probs))
            pred_label = inv_labels_map.get(pred_idx, f'class_{pred_idx}')
            confidence = float(probs[pred_idx])
            print(f'Predicted: {pred_label} (confidence: {confidence:.3f})')
        else:
            print('Unexpected prediction output shape:', preds.shape)


predict_btn.on_click(on_predict_clicked)

ui = widgets.VBox([uploader, predict_btn, out])
display(ui)

VBox(children=(FileUpload(value=(), accept='.png,.jpg,.jpeg', description='Upload image'), Button(button_style…