In [None]:
import os
import urllib.request
import zipfile

import shutil

from tensorflow.keras.applications import MobileNet
from tensorflow.keras import models
from tensorflow.keras.layers import Input, GlobalAveragePooling2D, Dense, Dropout
from tensorflow.keras import optimizers
from tensorflow.keras import layers
from tensorflow.keras import models
from tensorflow.keras import optimizers
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping

# Data

## Download data

In [5]:

url = "https://storage.yandexcloud.net/academy.ai/cat-and-dog.zip"
zip_path = "cat-and-dog.zip"

urllib.request.urlretrieve(url, zip_path)

('cat-and-dog.zip', <http.client.HTTPMessage at 0x30983ddd0>)

In [6]:
extract_path = "./temp"

os.makedirs(extract_path, exist_ok=True)

with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_path)

## Train, Val, Test

In [15]:
IMAGE_PATH = './temp/training_set/training_set/'
BASE_DIR = './dataset/'

CLASS_LIST = sorted(os.listdir(IMAGE_PATH))
CLASS_COUNT = len(CLASS_LIST)

if os.path.exists(BASE_DIR):
    shutil.rmtree(BASE_DIR)

os.mkdir(BASE_DIR)

train_dir = os.path.join(BASE_DIR, 'train')
validation_dir = os.path.join(BASE_DIR, 'validation')
test_dir = os.path.join(BASE_DIR, 'test')

os.mkdir(train_dir)
os.mkdir(validation_dir)
os.mkdir(test_dir)

In [16]:
def create_dataset(
    img_path: str,
    new_path: str,
    class_name: str,
    start_index: int,
    end_index: int
):
    src_path = os.path.join(img_path, class_name)
    dst_path = os.path.join(new_path, class_name)

    class_files = os.listdir(src_path)

    os.mkdir(dst_path)

    for fname in class_files[start_index:end_index]:
        src = os.path.join(src_path, fname)
        dst = os.path.join(dst_path, fname)
        shutil.copyfile(src, dst)

In [17]:
for class_label in range(CLASS_COUNT):
    class_name = CLASS_LIST[class_label]

    src_path = os.path.join(IMAGE_PATH, class_name)
    total_count = len(os.listdir(src_path))

    train_end = int(total_count * 0.7)
    val_end = int(total_count * 0.85)

    create_dataset(IMAGE_PATH, train_dir, class_name, 0, train_end)
    create_dataset(IMAGE_PATH, validation_dir, class_name, train_end, val_end)
    create_dataset(IMAGE_PATH, test_dir, class_name, val_end, total_count)

# Model

## Create model

In [36]:
IMG_WIDTH = 160
IMG_HEIGHT = 160
NUM_CLASSES = 2

In [37]:
def model_maker():
    base_model = MobileNet(include_top=False, input_shape = (IMG_WIDTH, IMG_HEIGHT, 3))

    for layer in base_model.layers[:]:
        layer.trainable = False

    input = Input(shape=(IMG_WIDTH, IMG_HEIGHT, 3))
    custom_model = base_model(input)
    custom_model = GlobalAveragePooling2D()(custom_model)
    custom_model = Dense(64, activation='relu')(custom_model)
    custom_model = Dropout(0.5)(custom_model)
    predictions = Dense(NUM_CLASSES, activation='softmax')(custom_model)

    return models.Model(inputs=input, outputs=predictions)

In [38]:
model = model_maker()

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet/mobilenet_1_0_160_tf_no_top.h5
[1m17225924/17225924[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 0us/step


In [39]:
model.summary()

## Image data generator

In [40]:
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)
test_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(160, 160),
    batch_size=20,
    class_mode='categorical'
)

validation_generator = test_datagen.flow_from_directory(
    validation_dir,
    target_size=(160, 160),
    batch_size=20,
    class_mode='categorical'
)

Found 5603 images belonging to 2 classes.
Found 1201 images belonging to 2 classes.


## Compile

In [None]:
# Используем Adam оптимизатор (как рекомендовано в задании)
# Для fine-tuning предобученных моделей обычно используют learning_rate от 1e-4 до 2e-4
model.compile(loss='categorical_crossentropy',
    optimizer=optimizers.Adam(learning_rate=1e-4),
    metrics=['acc']
)

## Learning

In [None]:
# EarlyStopping для автоматической остановки при отсутствии улучшений
early_stopping = EarlyStopping(
    monitor='val_acc',  # отслеживаем валидационную точность
    patience=5,  # ждем 5 эпох без улучшения
    restore_best_weights=True,  # восстанавливаем веса лучшей модели
    verbose=1
)

history = model.fit(
    train_generator,
    epochs=20,  # уменьшено с 30 до 20 (модель достигает 95%+ уже на 3-й эпохе)
    validation_data=validation_generator,
    callbacks=[early_stopping]  # добавляем callback для ранней остановки
)

Epoch 1/30
[1m281/281[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 97ms/step - acc: 0.7077 - loss: 0.5814 - val_acc: 0.9151 - val_loss: 0.2648
Epoch 2/30
[1m281/281[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 98ms/step - acc: 0.8376 - loss: 0.3719 - val_acc: 0.9442 - val_loss: 0.1753
Epoch 3/30
[1m281/281[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 101ms/step - acc: 0.8810 - loss: 0.2892 - val_acc: 0.9609 - val_loss: 0.1355
Epoch 4/30
[1m281/281[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 106ms/step - acc: 0.8986 - loss: 0.2490 - val_acc: 0.9642 - val_loss: 0.1117
Epoch 5/30
[1m281/281[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 109ms/step - acc: 0.9109 - loss: 0.2232 - val_acc: 0.9700 - val_loss: 0.0974
Epoch 6/30
[1m281/281[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 112ms/step - acc: 0.9147 - loss: 0.2030 - val_acc: 0.9709 - val_loss: 0.0875
Epoch 7/30
[1m281/281[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32

In [46]:
test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=(160, 160),
    batch_size=20,
    class_mode='categorical'
)

Found 1201 images belonging to 2 classes.


In [None]:
# Оцениваем на всей тестовой выборке (1201 изображение / 20 batch_size = ~61 шагов)
test_loss, test_acc = model.evaluate(test_generator)
print(f'\nФинальная точность на тестовой выборке: {test_acc:.4f} ({test_acc*100:.2f}%)')

[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 75ms/step - acc: 0.9650 - loss: 0.0768
