In [None]:
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "8432401a",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-11-25T08:58:31.750832Z",
     "iopub.status.busy": "2024-11-25T08:58:31.750501Z",
     "iopub.status.idle": "2024-11-25T08:58:36.261641Z",
     "shell.execute_reply": "2024-11-25T08:58:36.260703Z"
    },
    "papermill": {
     "duration": 4.517253,
     "end_time": "2024-11-25T08:58:36.263737",
     "exception": false,
     "start_time": "2024-11-25T08:58:31.746484",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "import sys",
    "sys.path.append('/kaggle/input/alexnet/pytorch/baseline/1')",
    "import torch",
    "import torch.nn as nn",
    "from torchvision import datasets",
    "from baseline.model import AlexNetBaseline, init_params",
    "from baseline.data_transforms import prepreprocess, get_preprocess, get_train_augment",
    "from baseline.train import train",
    "import matplotlib.pyplot as plt",
    "from baseline.eval import top1_k "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "032c844d",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-11-25T08:58:36.269899Z",
     "iopub.status.busy": "2024-11-25T08:58:36.269547Z",
     "iopub.status.idle": "2024-11-25T08:58:36.279850Z",
     "shell.execute_reply": "2024-11-25T08:58:36.278988Z"
    },
    "papermill": {
     "duration": 0.015032,
     "end_time": "2024-11-25T08:58:36.281477",
     "exception": false,
     "start_time": "2024-11-25T08:58:36.266445",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "cuda"
     ]
    }
   ],
   "source": [
    "torch.manual_seed(0)",
    "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")",
    "print(device)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "38441630",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-11-25T08:58:36.287362Z",
     "iopub.status.busy": "2024-11-25T08:58:36.286736Z",
     "iopub.status.idle": "2024-11-25T08:59:35.700665Z",
     "shell.execute_reply": "2024-11-25T08:59:35.699599Z"
    },
    "papermill": {
     "duration": 59.419023,
     "end_time": "2024-11-25T08:59:35.702780",
     "exception": false,
     "start_time": "2024-11-25T08:58:36.283757",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Downloading https://s3.amazonaws.com/fast-ai-imageclas/imagenette2-320.tgz to datasets/imagenette/imagenette2-320.tgz"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 341663724/341663724 [00:10<00:00, 33389013.26it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Extracting datasets/imagenette/imagenette2-320.tgz to datasets/imagenette",
      "Mean: [119.00100708007812, 116.99090576171875, 109.69341278076172]",
      "Covar: tensor([[19262.6914, 18478.0020, 17268.9180],",
      "        [18478.0020, 18648.8633, 17702.2598],",
      "        [17268.9180, 17702.2598, 17810.1914]], device='cuda:0')",
      "Eigen values (sqrt): tensor([[ 15.6041],",
      "        [ 35.3883],",
      "        [232.8646]])",
      "Eigen vectors: tensor([[-0.4275, -0.6883,  0.5861],",
      "        [ 0.8118, -0.0070,  0.5839],",
      "        [-0.3978,  0.7254,  0.5618]])",
      "Train: 9469",
      "Validation: 1963",
      "Test: 1962"
     ]
    }
   ],
   "source": [
    "train_dataset = datasets.Imagenette(",
    "    'datasets/imagenette', split='train', size='320px', download=True, transform=prepreprocess)",
    "preprocess = get_preprocess(train_dataset)",
    "train_augment = get_train_augment(train_dataset)",
    "train_dataset = datasets.Imagenette(",
    "    'datasets/imagenette', split='train', size='320px', transform=train_augment)",
    "",
    "val_dataset = datasets.Imagenette(",
    "    'datasets/imagenette', split='val', size='320px',transform=preprocess)",
    "val_dataset, test_dataset = torch.utils.data.random_split(val_dataset, [0.5, 0.5])",
    "",
    "print(f'Train: {len(train_dataset)}')",
    "print(f'Validation: {len(val_dataset)}')",
    "print(f'Test: {len(test_dataset)}')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "9182e039",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-11-25T08:59:35.717579Z",
     "iopub.status.busy": "2024-11-25T08:59:35.717266Z",
     "iopub.status.idle": "2024-11-25T08:59:35.732223Z",
     "shell.execute_reply": "2024-11-25T08:59:35.731384Z"
    },
    "papermill": {
     "duration": 0.023591,
     "end_time": "2024-11-25T08:59:35.733833",
     "exception": false,
     "start_time": "2024-11-25T08:59:35.710242",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "model = AlexNetBaseline(len(train_dataset.classes))",
    "if torch.cuda.device_count() > 1:",
    "    print(f'Using {torch.cuda.device_count()} GPUs.')",
    "    model = nn.DataParallel(model)",
    "model.to(device)",
    "# initialize parameters",
    "init_params(model)",
    "compiled_model = model",
    "# if torch.cuda.is_available():",
    "#     compiled_model = torch.compile(model)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "cd07bd77",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-11-25T08:59:35.746653Z",
     "iopub.status.busy": "2024-11-25T08:59:35.746383Z",
     "iopub.status.idle": "2024-11-25T10:48:11.475438Z",
     "shell.execute_reply": "2024-11-25T10:48:11.474297Z"
    },
    "papermill": {
     "duration": 6515.751479,
     "end_time": "2024-11-25T10:48:11.491321",
     "exception": false,
     "start_time": "2024-11-25T08:59:35.739842",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch 1/200, Cost: 2.3125381, CV_Error: 90.22%, lr: 0.01, Time: 33s",
      "Epoch 2/200, Cost: 2.3037463, CV_Error: 90.27%, lr: 0.01, Time: 32s",
      "Epoch 3/200, Cost: 2.3020736, CV_Error: 90.27%, lr: 0.01, Time: 33s",
      "Epoch 4/200, Cost: 2.2999391, CV_Error: 87.06%, lr: 0.01, Time: 34s",
      "Epoch 5/200, Cost: 2.2879239, CV_Error: 87.77%, lr: 0.01, Time: 33s",
      "Epoch 6/200, Cost: 2.2548936, CV_Error: 83.70%, lr: 0.01, Time: 33s",
      "Epoch 7/200, Cost: 2.1856481, CV_Error: 79.37%, lr: 0.01, Time: 32s",
      "Epoch 8/200, Cost: 2.1057905, CV_Error: 79.22%, lr: 0.01, Time: 33s",
      "Epoch 9/200, Cost: 2.0159132, CV_Error: 81.46%, lr: 0.01, Time: 33s",
      "Epoch 10/200, Cost: 1.8882560, CV_Error: 77.74%, lr: 0.01, Time: 33s",
      "Epoch 11/200, Cost: 1.7569191, CV_Error: 72.29%, lr: 0.01, Time: 34s",
      "Epoch 12/200, Cost: 1.5829008, CV_Error: 63.12%, lr: 0.01, Time: 34s",
      "Epoch 13/200, Cost: 1.5781820, CV_Error: 69.38%, lr: 0.01, Time: 32s",
      "Epoch 14/200, Cost: 1.4549547, CV_Error: 73.97%, lr: 0.01, Time: 33s",
      "Epoch 15/200, Cost: 1.3495400, CV_Error: 65.05%, lr: 0.01, Time: 33s",
      "Epoch 16/200, Cost: 1.2605954, CV_Error: 60.21%, lr: 0.01, Time: 33s",
      "Epoch 17/200, Cost: 1.1571791, CV_Error: 51.40%, lr: 0.01, Time: 32s",
      "Epoch 18/200, Cost: 1.0988334, CV_Error: 56.85%, lr: 0.01, Time: 33s",
      "Epoch 19/200, Cost: 1.1250413, CV_Error: 57.26%, lr: 0.01, Time: 32s",
      "Epoch 20/200, Cost: 1.0301573, CV_Error: 58.07%, lr: 0.01, Time: 33s",
      "Epoch 21/200, Cost: 0.9612606, CV_Error: 55.68%, lr: 0.01, Time: 32s",
      "Epoch 22/200, Cost: 0.9339173, CV_Error: 52.98%, lr: 0.01, Time: 32s",
      "Epoch 23/200, Cost: 0.8998632, CV_Error: 57.16%, lr: 0.01, Time: 33s",
      "Epoch 24/200, Cost: 0.8660297, CV_Error: 52.22%, lr: 0.01, Time: 33s",
      "Epoch 25/200, Cost: 0.8340977, CV_Error: 57.87%, lr: 0.01, Time: 33s",
      "Epoch 26/200, Cost: 0.8184806, CV_Error: 50.23%, lr: 0.01, Time: 32s",
      "Epoch 27/200, Cost: 0.8015968, CV_Error: 47.94%, lr: 0.01, Time: 33s",
      "Epoch 28/200, Cost: 0.7378986, CV_Error: 53.03%, lr: 0.01, Time: 33s",
      "Epoch 29/200, Cost: 0.7087646, CV_Error: 47.58%, lr: 0.01, Time: 33s",
      "Epoch 30/200, Cost: 0.7151677, CV_Error: 39.28%, lr: 0.01, Time: 33s",
      "Epoch 31/200, Cost: 0.6838276, CV_Error: 50.33%, lr: 0.01, Time: 33s",
      "Epoch 32/200, Cost: 0.6388513, CV_Error: 47.83%, lr: 0.01, Time: 33s",
      "Epoch 33/200, Cost: 0.6189668, CV_Error: 50.48%, lr: 0.01, Time: 32s",
      "Epoch 34/200, Cost: 0.6128574, CV_Error: 53.44%, lr: 0.01, Time: 33s",
      "Epoch 35/200, Cost: 0.5829689, CV_Error: 46.92%, lr: 0.01, Time: 32s",
      "Epoch 36/200, Cost: 0.5521603, CV_Error: 45.49%, lr: 0.01, Time: 32s",
      "Epoch 37/200, Cost: 0.5380176, CV_Error: 44.57%, lr: 0.01, Time: 32s",
      "Epoch 38/200, Cost: 0.5441541, CV_Error: 52.06%, lr: 0.01, Time: 33s",
      "Epoch 39/200, Cost: 0.5021203, CV_Error: 36.42%, lr: 0.01, Time: 32s",
      "Epoch 40/200, Cost: 0.4738961, CV_Error: 45.44%, lr: 0.01, Time: 33s",
      "Epoch 41/200, Cost: 0.4750730, CV_Error: 46.77%, lr: 0.01, Time: 32s",
      "Epoch 42/200, Cost: 0.4539771, CV_Error: 47.58%, lr: 0.01, Time: 33s",
      "Epoch 43/200, Cost: 0.4325072, CV_Error: 46.71%, lr: 0.01, Time: 33s",
      "Epoch 44/200, Cost: 0.4249929, CV_Error: 44.93%, lr: 0.01, Time: 33s",
      "Epoch 45/200, Cost: 0.4261384, CV_Error: 52.93%, lr: 0.01, Time: 33s",
      "Epoch 46/200, Cost: 0.4013754, CV_Error: 36.93%, lr: 0.01, Time: 32s",
      "Epoch 47/200, Cost: 0.4026439, CV_Error: 48.70%, lr: 0.01, Time: 33s",
      "Epoch 48/200, Cost: 0.3437405, CV_Error: 40.70%, lr: 0.01, Time: 32s",
      "Epoch 49/200, Cost: 0.3463016, CV_Error: 38.56%, lr: 0.01, Time: 32s",
      "Epoch 50/200, Cost: 0.3572978, CV_Error: 48.90%, lr: 0.01, Time: 33s",
      "Epoch 51/200, Cost: 0.3476186, CV_Error: 58.99%, lr: 0.01, Time: 33s",
      "Epoch 52/200, Cost: 0.3365831, CV_Error: 37.60%, lr: 0.01, Time: 32s",
      "Epoch 53/200, Cost: 0.3204740, CV_Error: 38.36%, lr: 0.01, Time: 32s",
      "Epoch 54/200, Cost: 0.3074481, CV_Error: 35.40%, lr: 0.01, Time: 33s",
      "Epoch 55/200, Cost: 0.3045584, CV_Error: 46.82%, lr: 0.01, Time: 33s",
      "Epoch 56/200, Cost: 0.3098075, CV_Error: 34.59%, lr: 0.01, Time: 32s",
      "Epoch 57/200, Cost: 0.2681993, CV_Error: 51.45%, lr: 0.01, Time: 34s",
      "Epoch 58/200, Cost: 0.2664452, CV_Error: 48.14%, lr: 0.01, Time: 32s",
      "Epoch 59/200, Cost: 0.2892412, CV_Error: 42.23%, lr: 0.01, Time: 31s",
      "Epoch 60/200, Cost: 0.2835053, CV_Error: 39.94%, lr: 0.01, Time: 31s",
      "Epoch 61/200, Cost: 0.2585662, CV_Error: 49.62%, lr: 0.01, Time: 31s",
      "Epoch 62/200, Cost: 0.2529770, CV_Error: 40.96%, lr: 0.01, Time: 32s",
      "Epoch 63/200, Cost: 0.2529048, CV_Error: 42.84%, lr: 0.01, Time: 32s",
      "Epoch 64/200, Cost: 0.2102599, CV_Error: 36.12%, lr: 0.01, Time: 33s",
      "Epoch 65/200, Cost: 0.2045751, CV_Error: 40.96%, lr: 0.01, Time: 33s",
      "Epoch 66/200, Cost: 0.2240526, CV_Error: 40.96%, lr: 0.01, Time: 32s",
      "Epoch 67/200, Cost: 0.2136811, CV_Error: 38.05%, lr: 0.01, Time: 31s",
      "Epoch 68/200, Cost: 0.2056436, CV_Error: 42.94%, lr: 0.01, Time: 33s",
      "Epoch 69/200, Cost: 0.1906363, CV_Error: 34.64%, lr: 0.01, Time: 33s",
      "Epoch 70/200, Cost: 0.2000475, CV_Error: 36.02%, lr: 0.01, Time: 32s",
      "Epoch 71/200, Cost: 0.2045865, CV_Error: 43.56%, lr: 0.01, Time: 32s",
      "Epoch 72/200, Cost: 0.2040630, CV_Error: 44.78%, lr: 0.01, Time: 32s",
      "Epoch 73/200, Cost: 0.1855266, CV_Error: 35.35%, lr: 0.01, Time: 33s",
      "Epoch 74/200, Cost: 0.1936981, CV_Error: 37.90%, lr: 0.01, Time: 34s",
      "Epoch 75/200, Cost: 0.1794009, CV_Error: 40.65%, lr: 0.01, Time: 33s",
      "Epoch 76/200, Cost: 0.1880051, CV_Error: 37.14%, lr: 0.01, Time: 33s",
      "Epoch 77/200, Cost: 0.1617485, CV_Error: 36.88%, lr: 0.001, Time: 33s",
      "Epoch 78/200, Cost: 0.0816751, CV_Error: 36.22%, lr: 0.001, Time: 33s",
      "Epoch 79/200, Cost: 0.0613137, CV_Error: 35.61%, lr: 0.001, Time: 31s",
      "Epoch 80/200, Cost: 0.0560483, CV_Error: 35.81%, lr: 0.001, Time: 33s",
      "Epoch 81/200, Cost: 0.0523893, CV_Error: 34.74%, lr: 0.001, Time: 32s",
      "Epoch 82/200, Cost: 0.0510474, CV_Error: 33.32%, lr: 0.001, Time: 32s",
      "Epoch 83/200, Cost: 0.0439766, CV_Error: 33.77%, lr: 0.001, Time: 32s",
      "Epoch 84/200, Cost: 0.0437850, CV_Error: 34.28%, lr: 0.001, Time: 32s",
      "Epoch 85/200, Cost: 0.0394006, CV_Error: 34.34%, lr: 0.001, Time: 31s",
      "Epoch 86/200, Cost: 0.0373130, CV_Error: 33.88%, lr: 0.001, Time: 33s",
      "Epoch 87/200, Cost: 0.0363150, CV_Error: 32.25%, lr: 0.001, Time: 33s",
      "Epoch 88/200, Cost: 0.0328606, CV_Error: 32.45%, lr: 0.001, Time: 32s",
      "Epoch 89/200, Cost: 0.0339982, CV_Error: 33.72%, lr: 0.001, Time: 34s",
      "Epoch 90/200, Cost: 0.0322544, CV_Error: 32.91%, lr: 0.001, Time: 33s",
      "Epoch 91/200, Cost: 0.0333226, CV_Error: 33.37%, lr: 0.001, Time: 33s",
      "Epoch 92/200, Cost: 0.0292787, CV_Error: 33.16%, lr: 0.001, Time: 34s",
      "Epoch 93/200, Cost: 0.0326523, CV_Error: 32.86%, lr: 0.001, Time: 32s",
      "Epoch 94/200, Cost: 0.0293884, CV_Error: 31.94%, lr: 0.001, Time: 33s",
      "Epoch 95/200, Cost: 0.0287785, CV_Error: 32.86%, lr: 0.001, Time: 32s",
      "Epoch 96/200, Cost: 0.0299588, CV_Error: 32.65%, lr: 0.001, Time: 32s",
      "Epoch 97/200, Cost: 0.0296566, CV_Error: 31.94%, lr: 0.001, Time: 32s",
      "Epoch 98/200, Cost: 0.0251622, CV_Error: 32.60%, lr: 0.001, Time: 32s",
      "Epoch 99/200, Cost: 0.0266469, CV_Error: 32.65%, lr: 0.001, Time: 31s",
      "Epoch 100/200, Cost: 0.0296289, CV_Error: 30.67%, lr: 0.001, Time: 32s",
      "Epoch 101/200, Cost: 0.0280027, CV_Error: 30.87%, lr: 0.001, Time: 31s",
      "Epoch 102/200, Cost: 0.0269076, CV_Error: 31.84%, lr: 0.001, Time: 33s",
      "Epoch 103/200, Cost: 0.0224279, CV_Error: 31.64%, lr: 0.001, Time: 31s",
      "Epoch 104/200, Cost: 0.0259758, CV_Error: 32.30%, lr: 0.001, Time: 33s",
      "Epoch 105/200, Cost: 0.0267977, CV_Error: 31.23%, lr: 0.001, Time: 33s",
      "Epoch 106/200, Cost: 0.0204235, CV_Error: 30.57%, lr: 0.001, Time: 32s",
      "Epoch 107/200, Cost: 0.0251831, CV_Error: 30.16%, lr: 0.001, Time: 32s",
      "Epoch 108/200, Cost: 0.0209157, CV_Error: 30.87%, lr: 0.001, Time: 33s",
      "Epoch 109/200, Cost: 0.0225866, CV_Error: 30.46%, lr: 0.001, Time: 32s",
      "Epoch 110/200, Cost: 0.0243596, CV_Error: 30.21%, lr: 0.001, Time: 33s",
      "Epoch 111/200, Cost: 0.0237045, CV_Error: 30.26%, lr: 0.001, Time: 32s",
      "Epoch 112/200, Cost: 0.0208692, CV_Error: 30.11%, lr: 0.001, Time: 32s",
      "Epoch 113/200, Cost: 0.0185920, CV_Error: 31.33%, lr: 0.001, Time: 33s",
      "Epoch 114/200, Cost: 0.0209067, CV_Error: 31.43%, lr: 0.001, Time: 33s",
      "Epoch 115/200, Cost: 0.0197980, CV_Error: 31.89%, lr: 0.001, Time: 32s",
      "Epoch 116/200, Cost: 0.0223032, CV_Error: 31.33%, lr: 0.001, Time: 33s",
      "Epoch 117/200, Cost: 0.0179135, CV_Error: 30.31%, lr: 0.001, Time: 32s",
      "Epoch 118/200, Cost: 0.0184089, CV_Error: 30.62%, lr: 0.001, Time: 32s",
      "Epoch 119/200, Cost: 0.0188293, CV_Error: 30.31%, lr: 0.001, Time: 31s",
      "Epoch 120/200, Cost: 0.0192200, CV_Error: 31.23%, lr: 0.001, Time: 31s",
      "Epoch 121/200, Cost: 0.0175145, CV_Error: 30.11%, lr: 0.001, Time: 31s",
      "Epoch 122/200, Cost: 0.0196941, CV_Error: 30.57%, lr: 0.001, Time: 32s",
      "Epoch 123/200, Cost: 0.0182248, CV_Error: 29.34%, lr: 0.001, Time: 32s",
      "Epoch 124/200, Cost: 0.0187441, CV_Error: 28.94%, lr: 0.001, Time: 32s",
      "Epoch 125/200, Cost: 0.0202789, CV_Error: 28.17%, lr: 0.001, Time: 31s",
      "Epoch 126/200, Cost: 0.0207930, CV_Error: 28.58%, lr: 0.001, Time: 31s",
      "Epoch 127/200, Cost: 0.0138290, CV_Error: 28.48%, lr: 0.001, Time: 33s",
      "Epoch 128/200, Cost: 0.0153396, CV_Error: 29.70%, lr: 0.001, Time: 32s",
      "Epoch 129/200, Cost: 0.0164280, CV_Error: 28.88%, lr: 0.001, Time: 33s",
      "Epoch 130/200, Cost: 0.0198171, CV_Error: 29.60%, lr: 0.001, Time: 33s",
      "Epoch 131/200, Cost: 0.0172772, CV_Error: 28.94%, lr: 0.001, Time: 33s",
      "Epoch 132/200, Cost: 0.0160241, CV_Error: 29.29%, lr: 0.001, Time: 33s",
      "Epoch 133/200, Cost: 0.0160546, CV_Error: 27.71%, lr: 0.001, Time: 33s",
      "Epoch 134/200, Cost: 0.0152533, CV_Error: 28.73%, lr: 0.001, Time: 33s",
      "Epoch 135/200, Cost: 0.0128192, CV_Error: 28.22%, lr: 0.001, Time: 32s",
      "Epoch 136/200, Cost: 0.0111941, CV_Error: 28.43%, lr: 0.001, Time: 33s",
      "Epoch 137/200, Cost: 0.0136428, CV_Error: 29.60%, lr: 0.001, Time: 33s",
      "Epoch 138/200, Cost: 0.0166378, CV_Error: 29.70%, lr: 0.001, Time: 32s",
      "Epoch 139/200, Cost: 0.0118950, CV_Error: 28.32%, lr: 0.001, Time: 32s",
      "Epoch 140/200, Cost: 0.0128071, CV_Error: 29.29%, lr: 0.001, Time: 33s",
      "Epoch 141/200, Cost: 0.0132051, CV_Error: 29.70%, lr: 0.001, Time: 34s",
      "Epoch 142/200, Cost: 0.0147718, CV_Error: 28.83%, lr: 0.001, Time: 33s",
      "Epoch 143/200, Cost: 0.0144989, CV_Error: 29.14%, lr: 0.001, Time: 33s",
      "Epoch 144/200, Cost: 0.0141741, CV_Error: 28.58%, lr: 0.001, Time: 32s",
      "Epoch 145/200, Cost: 0.0134972, CV_Error: 29.24%, lr: 0.001, Time: 33s",
      "Epoch 146/200, Cost: 0.0119325, CV_Error: 28.12%, lr: 0.001, Time: 33s",
      "Epoch 147/200, Cost: 0.0120695, CV_Error: 27.87%, lr: 0.001, Time: 33s",
      "Epoch 148/200, Cost: 0.0125638, CV_Error: 29.09%, lr: 0.001, Time: 32s",
      "Epoch 149/200, Cost: 0.0121954, CV_Error: 28.63%, lr: 0.001, Time: 33s",
      "Epoch 150/200, Cost: 0.0165301, CV_Error: 30.11%, lr: 0.001, Time: 33s",
      "Epoch 151/200, Cost: 0.0108751, CV_Error: 28.99%, lr: 0.001, Time: 32s",
      "Epoch 152/200, Cost: 0.0115930, CV_Error: 29.24%, lr: 0.001, Time: 34s",
      "Epoch 153/200, Cost: 0.0117384, CV_Error: 30.21%, lr: 0.001, Time: 32s",
      "Epoch 154/200, Cost: 0.0131828, CV_Error: 29.70%, lr: 0.0001, Time: 34s",
      "Epoch 155/200, Cost: 0.0158562, CV_Error: 29.55%, lr: 0.0001, Time: 34s",
      "Epoch 156/200, Cost: 0.0114737, CV_Error: 29.34%, lr: 0.0001, Time: 32s",
      "Epoch 157/200, Cost: 0.0097768, CV_Error: 29.24%, lr: 0.0001, Time: 33s",
      "Epoch 158/200, Cost: 0.0130835, CV_Error: 29.24%, lr: 0.0001, Time: 33s",
      "Epoch 159/200, Cost: 0.0123469, CV_Error: 29.60%, lr: 0.0001, Time: 34s",
      "Epoch 160/200, Cost: 0.0120264, CV_Error: 29.65%, lr: 0.0001, Time: 33s",
      "Epoch 161/200, Cost: 0.0104946, CV_Error: 29.70%, lr: 0.0001, Time: 33s",
      "Epoch 162/200, Cost: 0.0111695, CV_Error: 29.65%, lr: 0.0001, Time: 33s",
      "Epoch 163/200, Cost: 0.0104817, CV_Error: 29.50%, lr: 0.0001, Time: 32s",
      "Epoch 164/200, Cost: 0.0120591, CV_Error: 29.44%, lr: 0.0001, Time: 33s",
      "Epoch 165/200, Cost: 0.0124911, CV_Error: 29.70%, lr: 0.0001, Time: 33s",
      "Epoch 166/200, Cost: 0.0096801, CV_Error: 29.55%, lr: 0.0001, Time: 33s",
      "Epoch 167/200, Cost: 0.0104629, CV_Error: 29.80%, lr: 0.0001, Time: 33s",
      "Epoch 168/200, Cost: 0.0094652, CV_Error: 29.70%, lr: 0.0001, Time: 32s",
      "Epoch 169/200, Cost: 0.0113712, CV_Error: 29.50%, lr: 0.0001, Time: 33s",
      "Epoch 170/200, Cost: 0.0109475, CV_Error: 29.44%, lr: 0.0001, Time: 33s",
      "Epoch 171/200, Cost: 0.0100959, CV_Error: 29.44%, lr: 0.0001, Time: 34s",
      "Epoch 172/200, Cost: 0.0115478, CV_Error: 29.50%, lr: 0.0001, Time: 32s",
      "Epoch 173/200, Cost: 0.0091278, CV_Error: 29.34%, lr: 0.0001, Time: 33s",
      "Epoch 174/200, Cost: 0.0102946, CV_Error: 29.34%, lr: 0.0001, Time: 33s",
      "Epoch 175/200, Cost: 0.0082141, CV_Error: 29.19%, lr: 1e-05, Time: 33s",
      "Epoch 176/200, Cost: 0.0118415, CV_Error: 29.19%, lr: 1e-05, Time: 32s",
      "Epoch 177/200, Cost: 0.0102403, CV_Error: 29.34%, lr: 1e-05, Time: 33s",
      "Epoch 178/200, Cost: 0.0116898, CV_Error: 29.34%, lr: 1e-05, Time: 32s",
      "Epoch 179/200, Cost: 0.0093001, CV_Error: 29.34%, lr: 1e-05, Time: 33s",
      "Epoch 180/200, Cost: 0.0122835, CV_Error: 29.29%, lr: 1e-05, Time: 33s",
      "Epoch 181/200, Cost: 0.0109188, CV_Error: 29.29%, lr: 1e-05, Time: 32s",
      "Epoch 182/200, Cost: 0.0097938, CV_Error: 29.29%, lr: 1e-05, Time: 33s",
      "Epoch 183/200, Cost: 0.0129526, CV_Error: 29.34%, lr: 1e-05, Time: 33s",
      "Epoch 184/200, Cost: 0.0116027, CV_Error: 29.34%, lr: 1e-05, Time: 33s",
      "Epoch 185/200, Cost: 0.0110961, CV_Error: 29.34%, lr: 1e-05, Time: 32s",
      "Epoch 186/200, Cost: 0.0101216, CV_Error: 29.39%, lr: 1e-05, Time: 32s",
      "Epoch 187/200, Cost: 0.0103787, CV_Error: 29.34%, lr: 1e-05, Time: 33s",
      "Epoch 188/200, Cost: 0.0111020, CV_Error: 29.34%, lr: 1e-05, Time: 33s",
      "Epoch 189/200, Cost: 0.0102259, CV_Error: 29.34%, lr: 1e-05, Time: 33s",
      "Epoch 190/200, Cost: 0.0099187, CV_Error: 29.34%, lr: 1e-05, Time: 33s",
      "Epoch 191/200, Cost: 0.0093359, CV_Error: 29.34%, lr: 1e-05, Time: 32s",
      "Epoch 192/200, Cost: 0.0096578, CV_Error: 29.34%, lr: 1e-05, Time: 33s",
      "Epoch 193/200, Cost: 0.0093149, CV_Error: 29.34%, lr: 1e-05, Time: 32s",
      "Epoch 194/200, Cost: 0.0101977, CV_Error: 29.34%, lr: 1e-05, Time: 33s",
      "Epoch 195/200, Cost: 0.0090860, CV_Error: 29.34%, lr: 1e-05, Time: 33s",
      "Epoch 196/200, Cost: 0.0122377, CV_Error: 29.34%, lr: 1.0000000000000002e-06, Time: 32s",
      "Epoch 197/200, Cost: 0.0108341, CV_Error: 29.34%, lr: 1.0000000000000002e-06, Time: 32s",
      "Epoch 198/200, Cost: 0.0112793, CV_Error: 29.34%, lr: 1.0000000000000002e-06, Time: 32s",
      "Epoch 199/200, Cost: 0.0091388, CV_Error: 29.34%, lr: 1.0000000000000002e-06, Time: 33s",
      "Epoch 200/200, Cost: 0.0087097, CV_Error: 29.34%, lr: 1.0000000000000002e-06, Time: 33s",
      "Training time: 6515.723425865173"
     ]
    }
   ],
   "source": [
    "costs, val_error_rates, learning_rates = train(",
    "    compiled_model,",
    "    train_dataset=train_dataset,",
    "    cv_dataset=val_dataset,",
    "    batch_size=128,",
    "    num_epochs=200,",
    "    initial_lr=0.01,",
    "    num_workers=3",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "4e351b14",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-11-25T10:48:11.521476Z",
     "iopub.status.busy": "2024-11-25T10:48:11.521113Z",
     "iopub.status.idle": "2024-11-25T10:48:12.009425Z",
     "shell.execute_reply": "2024-11-25T10:48:12.008469Z"
    },
    "papermill": {
     "duration": 0.505776,
     "end_time": "2024-11-25T10:48:12.011377",
     "exception": false,
     "start_time": "2024-11-25T10:48:11.505601",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "torch.save(model.state_dict(), 'baseline_imagenette.model.pt')",
    "torch.save(preprocess, 'baseline_imagenette.preprocess.pt')",
    "torch.save(train_augment, 'baseline_imagenette.train_augment.pt')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "579b2a97",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\ADMIN\\AppData\\Local\\Temp\\ipykernel_23112\\571566078.py:1: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.",
      "  preprocess = torch.load('baseline_imagenette.preprocess.pt', map_location=torch.device('cpu'))",
      "C:\\Users\\ADMIN\\AppData\\Local\\Temp\\ipykernel_23112\\571566078.py:11: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.",
      "  compiled_model.load_state_dict(torch.load('baseline_imagenette.model.pt', map_location=torch.device('cpu')))"
     ]
    },
    {
     "data": {
      "text/plain": [
       "<All keys matched successfully>"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "preprocess = torch.load('/kaggle/input/trained_imagenette_baseline/pytorch/baseline/1/baseline_imagenette.preprocess.pt')",
    "",
    "val_dataset = datasets.Imagenette(",
    "    'datasets/imagenette', split='val', size='320px', transform=preprocess)",
    "val_dataset, test_dataset = torch.utils.data.random_split(val_dataset, [0.5, 0.5])",
    "train_eval_dataset = datasets.Imagenette(",
    "    'datasets/imagenette', split='train', size='320px', transform=preprocess)",
    "",
    "compiled_model = AlexNetBaseline(len(train_eval_dataset.classes))",
    "",
    "compiled_model.load_state_dict(torch.load('/kaggle/input/trained_imagenette_baseline/pytorch/baseline/1/baseline_imagenette.model.pt'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c3d27f64",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2024-11-25T10:48:12.042910Z",
     "iopub.status.busy": "2024-11-25T10:48:12.042079Z",
     "iopub.status.idle": "2024-11-25T10:48:23.084616Z",
     "shell.execute_reply": "2024-11-25T10:48:23.083316Z"
    },
    "papermill": {
     "duration": 11.060347,
     "end_time": "2024-11-25T10:48:23.086130",
     "exception": true,
     "start_time": "2024-11-25T10:48:12.025783",
     "status": "failed"
    },
    "tags": []
   },
   "outputs": [
    {
     "ename": "KeyboardInterrupt",
     "evalue": "",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
      "Cell \u001b[1;32mIn[10], line 1\u001b[0m\u001b[1;32m----> 1\u001b[0m test_top1, test_top3 \u001b[38;5;241m=\u001b[39m \u001b[43mtop1_k\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcompiled_model\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtest_dataset\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mk\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m3\u001b[39;49m\u001b[43m)\u001b[49m\u001b[0;32m      2\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mTest Top 1: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mtest_top1\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m)\u001b[0;32m      3\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mTest Top 3: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mtest_top3\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m)",
      "File \u001b[1;32me:\\projects\\Python\\alexnet-cv-team7\\baseline\\eval.py:57\u001b[0m, in \u001b[0;36mtop1_k\u001b[1;34m(model, dataset, k, device)\u001b[0m\u001b[0;32m     55\u001b[0m images \u001b[38;5;241m=\u001b[39m images\u001b[38;5;241m.\u001b[39mto(device)\u001b[0;32m     56\u001b[0m labels \u001b[38;5;241m=\u001b[39m labels\u001b[38;5;241m.\u001b[39mto(device)\u001b[1;32m---> 57\u001b[0m prediction \u001b[38;5;241m=\u001b[39m \u001b[43mpredict\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mimages\u001b[49m\u001b[43m)\u001b[49m\u001b[0;32m     59\u001b[0m t1_predicted_labels \u001b[38;5;241m=\u001b[39m prediction\u001b[38;5;241m.\u001b[39margmax(\u001b[38;5;241m1\u001b[39m)\u001b[0;32m     60\u001b[0m t1_trues \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m torch\u001b[38;5;241m.\u001b[39meq(labels, t1_predicted_labels)\u001b[38;5;241m.\u001b[39mcount_nonzero()\u001b[38;5;241m.\u001b[39mitem()",
      "File \u001b[1;32me:\\projects\\Python\\alexnet-cv-team7\\baseline\\eval.py:20\u001b[0m, in \u001b[0;36mpredict\u001b[1;34m(model, X)\u001b[0m\u001b[0;32m     18\u001b[0m res: torch\u001b[38;5;241m.\u001b[39mTensor \u001b[38;5;241m=\u001b[39m F\u001b[38;5;241m.\u001b[39msoftmax(model(\u001b[38;5;28mnext\u001b[39m(X)), dim\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m1\u001b[39m)\u001b[0;32m     19\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m crop \u001b[38;5;129;01min\u001b[39;00m X:\u001b[1;32m---> 20\u001b[0m     res \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m F\u001b[38;5;241m.\u001b[39msoftmax(\u001b[43mmodel\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcrop\u001b[49m\u001b[43m)\u001b[49m, dim\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m1\u001b[39m)\u001b[0;32m     21\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m res\u001b[38;5;241m/\u001b[39m\u001b[38;5;241m10\u001b[39m",
      "File \u001b[1;32me:\\dev\\anaconda3\\envs\\torch\\Lib\\site-packages\\torch\n\\modules\\module.py:1736\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[1;34m(self, *args, **kwargs)\u001b[0m\u001b[0;32m   1734\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_compiled_call_impl(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)  \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\u001b[0;32m   1735\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\u001b[1;32m-> 1736\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call_impl\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m",
      "File \u001b[1;32me:\\dev\\anaconda3\\envs\\torch\\Lib\\site-packages\\torch\n\\modules\\module.py:1747\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[1;34m(self, *args, **kwargs)\u001b[0m\u001b[0;32m   1742\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\u001b[0;32m   1743\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\u001b[0;32m   1744\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\u001b[0;32m   1745\u001b[0m         \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\u001b[0;32m   1746\u001b[0m         \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\u001b[1;32m-> 1747\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\u001b[0;32m   1749\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\u001b[0;32m   1750\u001b[0m called_always_called_hooks \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mset\u001b[39m()",
      "File \u001b[1;32me:\\projects\\Python\\alexnet-cv-team7\\baseline\\model.py:103\u001b[0m, in \u001b[0;36mAlexNetBaseline.forward\u001b[1;34m(self, X)\u001b[0m\u001b[0;32m     95\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mforward\u001b[39m(\u001b[38;5;28mself\u001b[39m, X: torch\u001b[38;5;241m.\u001b[39mTensor) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m torch\u001b[38;5;241m.\u001b[39mTensor:\u001b[0;32m     96\u001b[0m \u001b[38;5;250m    \u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\u001b[0;32m     97\u001b[0m \u001b[38;5;124;03m    Args:\u001b[39;00m\u001b[0;32m     98\u001b[0m \u001b[38;5;124;03m        X (torch.Tensor): m x 3 x 224 x 224\u001b[39;00m\u001b[1;32m   (...)\u001b[0m\u001b[0;32m    101\u001b[0m \u001b[38;5;124;03m        torch.Tensor: raw logits output (m x num_classes)\u001b[39;00m\u001b[0;32m    102\u001b[0m \u001b[38;5;124;03m    \"\"\"\u001b[39;00m\u001b[1;32m--> 103\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mnetwork\u001b[49m\u001b[43m(\u001b[49m\u001b[43mX\u001b[49m\u001b[43m)\u001b[49m",
      "File \u001b[1;32me:\\dev\\anaconda3\\envs\\torch\\Lib\\site-packages\\torch\n\\modules\\module.py:1736\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[1;34m(self, *args, **kwargs)\u001b[0m\u001b[0;32m   1734\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_compiled_call_impl(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)  \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\u001b[0;32m   1735\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\u001b[1;32m-> 1736\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call_impl\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m",
      "File \u001b[1;32me:\\dev\\anaconda3\\envs\\torch\\Lib\\site-packages\\torch\n\\modules\\module.py:1747\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[1;34m(self, *args, **kwargs)\u001b[0m\u001b[0;32m   1742\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\u001b[0;32m   1743\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\u001b[0;32m   1744\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\u001b[0;32m   1745\u001b[0m         \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\u001b[0;32m   1746\u001b[0m         \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\u001b[1;32m-> 1747\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\u001b[0;32m   1749\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\u001b[0;32m   1750\u001b[0m called_always_called_hooks \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mset\u001b[39m()",
      "File \u001b[1;32me:\\dev\\anaconda3\\envs\\torch\\Lib\\site-packages\\torch\n\\modules\\container.py:250\u001b[0m, in \u001b[0;36mSequential.forward\u001b[1;34m(self, input)\u001b[0m\u001b[0;32m    248\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mforward\u001b[39m(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;28minput\u001b[39m):\u001b[0;32m    249\u001b[0m     \u001b[38;5;28;01mfor\u001b[39;00m module \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m:\u001b[1;32m--> 250\u001b[0m         \u001b[38;5;28minput\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[43mmodule\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43minput\u001b[39;49m\u001b[43m)\u001b[49m\u001b[0;32m    251\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28minput\u001b[39m",
      "File \u001b[1;32me:\\dev\\anaconda3\\envs\\torch\\Lib\\site-packages\\torch\n\\modules\\module.py:1736\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[1;34m(self, *args, **kwargs)\u001b[0m\u001b[0;32m   1734\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_compiled_call_impl(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)  \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\u001b[0;32m   1735\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\u001b[1;32m-> 1736\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call_impl\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m",
      "File \u001b[1;32me:\\dev\\anaconda3\\envs\\torch\\Lib\\site-packages\\torch\n\\modules\\module.py:1747\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[1;34m(self, *args, **kwargs)\u001b[0m\u001b[0;32m   1742\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\u001b[0;32m   1743\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\u001b[0;32m   1744\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\u001b[0;32m   1745\u001b[0m         \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\u001b[0;32m   1746\u001b[0m         \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\u001b[1;32m-> 1747\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\u001b[0;32m   1749\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\u001b[0;32m   1750\u001b[0m called_always_called_hooks \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mset\u001b[39m()",
      "File \u001b[1;32me:\\dev\\anaconda3\\envs\\torch\\Lib\\site-packages\\torch\n\\modules\\container.py:250\u001b[0m, in \u001b[0;36mSequential.forward\u001b[1;34m(self, input)\u001b[0m\u001b[0;32m    248\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mforward\u001b[39m(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;28minput\u001b[39m):\u001b[0;32m    249\u001b[0m     \u001b[38;5;28;01mfor\u001b[39;00m module \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m:\u001b[1;32m--> 250\u001b[0m         \u001b[38;5;28minput\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[43mmodule\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43minput\u001b[39;49m\u001b[43m)\u001b[49m\u001b[0;32m    251\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28minput\u001b[39m",
      "File \u001b[1;32me:\\dev\\anaconda3\\envs\\torch\\Lib\\site-packages\\torch\n\\modules\\module.py:1736\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[1;34m(self, *args, **kwargs)\u001b[0m\u001b[0;32m   1734\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_compiled_call_impl(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)  \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\u001b[0;32m   1735\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\u001b[1;32m-> 1736\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call_impl\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m",
      "File \u001b[1;32me:\\dev\\anaconda3\\envs\\torch\\Lib\\site-packages\\torch\n\\modules\\module.py:1747\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[1;34m(self, *args, **kwargs)\u001b[0m\u001b[0;32m   1742\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\u001b[0;32m   1743\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\u001b[0;32m   1744\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\u001b[0;32m   1745\u001b[0m         \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\u001b[0;32m   1746\u001b[0m         \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\u001b[1;32m-> 1747\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\u001b[0;32m   1749\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\u001b[0;32m   1750\u001b[0m called_always_called_hooks \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mset\u001b[39m()",
      "File \u001b[1;32me:\\dev\\anaconda3\\envs\\torch\\Lib\\site-packages\\torch\n\\modules\ormalization.py:63\u001b[0m, in \u001b[0;36mLocalResponseNorm.forward\u001b[1;34m(self, input)\u001b[0m\u001b[0;32m     62\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mforward\u001b[39m(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;28minput\u001b[39m: Tensor) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m Tensor:\u001b[1;32m---> 63\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mF\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mlocal_response_norm\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43minput\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msize\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43malpha\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbeta\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mk\u001b[49m\u001b[43m)\u001b[49m",
      "File \u001b[1;32me:\\dev\\anaconda3\\envs\\torch\\Lib\\site-packages\\torch\n\\functional.py:2996\u001b[0m, in \u001b[0;36mlocal_response_norm\u001b[1;34m(input, size, alpha, beta, k)\u001b[0m\u001b[0;32m   2994\u001b[0m     div \u001b[38;5;241m=\u001b[39m div\u001b[38;5;241m.\u001b[39mview(sizes[\u001b[38;5;241m0\u001b[39m], \u001b[38;5;241m1\u001b[39m, sizes[\u001b[38;5;241m1\u001b[39m], sizes[\u001b[38;5;241m2\u001b[39m], \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m)\u001b[0;32m   2995\u001b[0m     div \u001b[38;5;241m=\u001b[39m pad(div, (\u001b[38;5;241m0\u001b[39m, \u001b[38;5;241m0\u001b[39m, \u001b[38;5;241m0\u001b[39m, \u001b[38;5;241m0\u001b[39m, size \u001b[38;5;241m/\u001b[39m\u001b[38;5;241m/\u001b[39m \u001b[38;5;241m2\u001b[39m, (size \u001b[38;5;241m-\u001b[39m \u001b[38;5;241m1\u001b[39m) \u001b[38;5;241m/\u001b[39m\u001b[38;5;241m/\u001b[39m \u001b[38;5;241m2\u001b[39m))\u001b[1;32m-> 2996\u001b[0m     div \u001b[38;5;241m=\u001b[39m \u001b[43mavg_pool3d\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdiv\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m(\u001b[49m\u001b[43msize\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstride\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m)\u001b[49m\u001b[38;5;241m.\u001b[39msqueeze(\u001b[38;5;241m1\u001b[39m)\u001b[0;32m   2997\u001b[0m     div \u001b[38;5;241m=\u001b[39m div\u001b[38;5;241m.\u001b[39mview(sizes)\u001b[0;32m   2998\u001b[0m div \u001b[38;5;241m=\u001b[39m div\u001b[38;5;241m.\u001b[39mmul(alpha)\u001b[38;5;241m.\u001b[39madd(k)\u001b[38;5;241m.\u001b[39mpow(beta)",
      "\u001b[1;31mKeyboardInterrupt\u001b[0m: "
     ]
    }
   ],
   "source": [
    "test_top1, test_top3 = top1_k(compiled_model, test_dataset, k=3)",
    "print(f'Test Top 1: {test_top1}')",
    "print(f'Test Top 3: {test_top3}')",
    "",
    "val_top1, val_top3 = top1_k(compiled_model, val_dataset, k=3)",
    "print(f'Val Top 1: {val_top1}')",
    "print(f'Val Top 3: {val_top3}')",
    "",
    "train_eval_dataset = datasets.Imagenette(",
    "    'datasets/imagenette', split='train', size='320px', transform=preprocess)",
    "train_top1, train_top3 = top1_k(compiled_model, train_eval_dataset, k=3)",
    "print(f'Val Top 1: {train_top1}')",
    "print(f'Val Top 3: {train_top3}')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "701c4a6c",
   "metadata": {
    "execution": {
     "iopub.status.busy": "2024-11-25T08:32:29.369185Z",
     "iopub.status.idle": "2024-11-25T08:32:29.369634Z",
     "shell.execute_reply": "2024-11-25T08:32:29.369470Z",
     "shell.execute_reply.started": "2024-11-25T08:32:29.369452Z"
    },
    "papermill": {
     "duration": null,
     "end_time": null,
     "exception": null,
     "start_time": null,
     "status": "pending"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "plt.plot(costs)",
    "plt.xlabel(\"Epoch\")",
    "plt.title('Costs')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3cb629cf",
   "metadata": {
    "execution": {
     "iopub.status.busy": "2024-11-25T08:32:29.370878Z",
     "iopub.status.idle": "2024-11-25T08:32:29.371216Z",
     "shell.execute_reply": "2024-11-25T08:32:29.371028Z",
     "shell.execute_reply.started": "2024-11-25T08:32:29.371015Z"
    },
    "papermill": {
     "duration": null,
     "end_time": null,
     "exception": null,
     "start_time": null,
     "status": "pending"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "plt.plot(val_error_rates)",
    "plt.xlabel(\"Epoch\")",
    "plt.title('Cross Validation Error Rates')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "eeba2247",
   "metadata": {
    "execution": {
     "iopub.status.busy": "2024-11-25T08:32:29.372491Z",
     "iopub.status.idle": "2024-11-25T08:32:29.372830Z",
     "shell.execute_reply": "2024-11-25T08:32:29.372703Z",
     "shell.execute_reply.started": "2024-11-25T08:32:29.372688Z"
    },
    "papermill": {
     "duration": null,
     "end_time": null,
     "exception": null,
     "start_time": null,
     "status": "pending"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "plt.plot(learning_rates)",
    "plt.xlabel(\"Epoch\")",
    "plt.title('Learning Rates')"
   ]
  }
 ],
 "metadata": {
  "kaggle": {
   "accelerator": "gpu",
   "dataSources": [
    {
     "isSourceIdPinned": true,
     "modelId": 173987,
     "modelInstanceId": 151535,
     "sourceId": 177880,
     "sourceType": "modelInstanceVersion"
    }
   ],
   "dockerImageVersionId": 30786,
   "isGpuEnabled": true,
   "isInternetEnabled": true,
   "language": "python",
   "sourceType": "notebook"
  },
  "kernelspec": {
   "display_name": "torch",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.3"
  },
  "papermill": {
   "default_parameters": {},
   "duration": 6595.208725,
   "end_time": "2024-11-25T10:48:24.522209",
   "environment_variables": {},
   "exception": true,
   "input_path": "__notebook__.ipynb",
   "output_path": "__notebook__.ipynb",
   "parameters": {},
   "start_time": "2024-11-25T08:58:29.313484",
   "version": "2.6.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}