<a href="https://colab.research.google.com/github/dmitriy-iliyov/data-science/blob/main/neural-network/lab_4/notebook/lab_4.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import os
os.makedirs('/root/.kaggle', exist_ok=True)

!cp kaggle.json /root/.kaggle/

!chmod 600 /root/.kaggle/kaggle.json

import kagglehub
dataset_dir = '/drive/MyDrive/data/tiny-imagenet'
os.makedirs(dataset_dir, exist_ok=True)

!kaggle datasets download -d akash2sharma/tiny-imagenet -p {dataset_dir}

!unzip -q {dataset_dir}/tiny-imagenet.zip -d {dataset_dir}

cp: cannot stat 'kaggle.json': No such file or directory
chmod: cannot access '/root/.kaggle/kaggle.json': No such file or directory
Dataset URL: https://www.kaggle.com/datasets/akash2sharma/tiny-imagenet
License(s): unknown
Downloading tiny-imagenet.zip to /drive/MyDrive/data/tiny-imagenet
 95% 451M/474M [00:03<00:00, 162MB/s]
100% 474M/474M [00:03<00:00, 126MB/s]


In [None]:
import tensorflow as tf
from tensorflow import keras
import os

def custom_image_generator(labels_map, images_dir, num_classes=200):
    class_names = sorted(set(labels_map.values()))
    class_to_index = {name: index for index, name in enumerate(class_names)}
    labels_map = {key: class_to_index[value] for key, value in labels_map.items()}

    image_paths = [os.path.join(images_dir, filename) for filename in labels_map.keys()]
    labels = [labels_map[filename] for filename in labels_map.keys()]

    def load_image(image_path, label):
        image = tf.io.read_file(image_path)
        image = tf.image.decode_jpeg(image, channels=3)
        image = tf.image.resize(image, [224, 224])
        label = tf.one_hot(label, num_classes)
        return image, label
    print(len(image_paths))
    dataset = tf.data.Dataset.from_tensor_slices((image_paths, labels))
    dataset = dataset.map(lambda x, y: load_image(x, y), num_parallel_calls=tf.data.AUTOTUNE)
    dataset = dataset.batch(32).prefetch(tf.data.AUTOTUNE)
    return dataset

def get_code_name_map(line):
    splited_s = line.split('\t')
    splited_s[1] = splited_s[1].replace('\n', '')
    return splited_s[0], splited_s[1]

def get_classes_code_name_map(names_path):
    class_names = {}
    with open(names_path) as file:
      for line in file:
        class_code, class_name = get_code_name_map(line)
        class_names[class_code] = class_name
    return class_names

def get_class_codes(names_path):
    class_codes = []
    with open(names_path) as file:
        for line in file:
            class_codes.append(str(line).strip())
    return class_codes

def create_datasets():
  home_dir = '/drive/MyDrive/data/tiny-imagenet/tiny-imagenet-200'
  needed_class_path = home_dir + '/wnids.txt'
  all_class_path = home_dir + '/words.txt'
  train_dataset_path = home_dir + '/train'
  test_dataset_path = home_dir + '/val/images'
  width = 224
  height = 224
  batch_size = 32

  train_dataset = tf.keras.utils.image_dataset_from_directory(train_dataset_path,
                                                              image_size = (width, height),
                                                              batch_size = batch_size,
                                                              label_mode="categorical",
                                                              validation_split = .15,
                                                              subset = 'training',
                                                              seed = 341)

  val_dataset = tf.keras.utils.image_dataset_from_directory (train_dataset_path,
                                                             image_size = (width, height),
                                                             batch_size = batch_size,
                                                             label_mode="categorical",
                                                             validation_split = .15,
                                                             subset = 'validation',
                                                             seed = 341)

  needed_class_codes = get_class_codes(needed_class_path)
  all_class_code_name_map = get_classes_code_name_map(all_class_path)
  class_code_name_map = {}
  classes_indexes = []
  for index, code in enumerate(needed_class_codes):
      class_code_name_map[code] = all_class_code_name_map[code]
      classes_indexes.append(index)
  train_lables = keras.utils.to_categorical(classes_indexes, 200)

  train_dataset.class_names = train_lables
  val_dataset.class_names = train_lables

  test_dataset_path_names = home_dir + '/val/val_annotations.txt'
  class_imgname_code_map = get_classes_code_name_map(test_dataset_path_names)
  test_dataset = custom_image_generator(class_imgname_code_map, test_dataset_path)

  print(train_dataset.class_names)

  return train_dataset, val_dataset, test_dataset

In [None]:
import time
import tensorflow as tf
import numpy as np
from keras import Sequential, Input
from keras.layers import Conv2D, Flatten, Dense, MaxPooling2D, ZeroPadding2D, Dropout
from keras.optimizers import Adam
from matplotlib import pyplot as plt


class AlexNet:
    def __init__(self):
        self.model = Sequential([
            Input((224, 224, 3)),
            Conv2D(96, (11, 11), strides=4, padding='valid', activation='relu'),
            MaxPooling2D(pool_size=(3, 3), strides=2),
            ZeroPadding2D(padding=(2, 2)),
            Conv2D(256, (5, 5), strides=1, padding='valid', activation='relu'),
            MaxPooling2D(pool_size=(3, 3), strides=2),
            Conv2D(384, (3, 3), padding='same', activation='relu'),
            Conv2D(384, (3, 3), padding='same', activation='relu'),
            Conv2D(256, (3, 3), padding='same', activation='relu'),
            MaxPooling2D(pool_size=(3, 3), strides=2),
            Flatten(),
            Dense(4096, activation='relu'),
            Dropout(0.5),
            Dense(4096, activation='relu'),
            Dropout(0.5),
            Dense(200, activation='softmax')
        ])

        self.store_path = '/content/drive/MyDrive/'
        self.class_names = []

        print(self.model.summary())

        self.model.compile(optimizer=Adam(learning_rate=1e-4),
                           loss='categorical_crossentropy',
                           metrics=['accuracy'])

    def fit(self, train_dataset, val_dataset, epochs=20):
        self.class_names = train_dataset.class_names
        start = time.time()
        history = self.model.fit(train_dataset, validation_data=val_dataset, epochs=epochs)
        execution_time = time.time() - start
        self.model.save(self.store_path + '/lab_4_model.keras')
        self.plot_history(history, epochs, execution_time)


    def evaluete(self, test_dataset):
      loss, accuracy = self.model.evaluate(test_dataset)
      print(f"Loss: {loss}, Accuracy: {accuracy}")

    def


    @staticmethod
    def plot_history(history, epochs, execution_time):
        plt.figure(figsize=(12, 4))

        plt.subplot(1, 2, 1)
        plt.plot(range(1, epochs + 1), history.history['accuracy'], label='Training Accuracy')
        plt.plot(range(1, epochs + 1), history.history['val_accuracy'], label='Validation Accuracy')
        plt.xlabel('Epochs')
        plt.ylabel('Accuracy')
        plt.title(f'Accuracy (Execution Time: {execution_time:.2f} seconds)')
        plt.legend()

        plt.subplot(1, 2, 2)
        plt.plot(range(1, epochs + 1), history.history['loss'], label='Training Loss')
        plt.plot(range(1, epochs + 1), history.history['val_loss'], label='Validation Loss')
        plt.xlabel('Epochs')
        plt.ylabel('Loss')
        plt.title('Loss')
        plt.legend()

        plt.show()

In [None]:
train_dataset, val_dataset, test_dataset = create_datasets()

Found 100000 files belonging to 200 classes.
Using 85000 files for training.
Found 100000 files belonging to 200 classes.
Using 15000 files for validation.


In [None]:
alex_net_model = AlexNet()
with tf.device('/GPU:0'):
    alex_net_model.fit(train_dataset, val_dataset, 20)

In [None]:
alex_net_model.evaluate(test_dataset)