<a href="https://colab.research.google.com/github/anand-therattil/machine_learning/blob/main/Transfer_Learning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:

# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES
# TO THE CORRECT LOCATION (/kaggle/input) IN YOUR NOTEBOOK,
# THEN FEEL FREE TO DELETE THIS CELL.
# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON
# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR
# NOTEBOOK.

import os
import sys
from tempfile import NamedTemporaryFile
from urllib.request import urlopen
from urllib.parse import unquote, urlparse
from urllib.error import HTTPError
from zipfile import ZipFile
import tarfile
import shutil

CHUNK_SIZE = 40960
DATA_SOURCE_MAPPING = 'animal-data:https%3A%2F%2Fstorage.googleapis.com%2Fkaggle-data-sets%2F4707547%2F7995592%2Fbundle%2Farchive.zip%3FX-Goog-Algorithm%3DGOOG4-RSA-SHA256%26X-Goog-Credential%3Dgcp-kaggle-com%2540kaggle-161607.iam.gserviceaccount.com%252F20240505%252Fauto%252Fstorage%252Fgoog4_request%26X-Goog-Date%3D20240505T132817Z%26X-Goog-Expires%3D259200%26X-Goog-SignedHeaders%3Dhost%26X-Goog-Signature%3D160f001b66b988452d3e4ee1370dbb161f81452ea1de9346001266d002d9fc75ff128c6dde30a807c8927bf86f6939cbbde00c8510782bb82e7f961210a43afbefaefc63d06de19b444581c791cf4e44956e8f76d12a1051d1c2ffe4dfe7b2c57e39e5d0f70902a27b7ecf76512f4b4375fc7432300bf72146f32f67f83b7898ed81fefb25eb96c0a866ac1a388ee1513532f1a44452f9b173e062b0c41ab90331204095f3696444bc105c500dc70a632038ac6cbfdfc889e72f5fd98b403088472ea97a87762512e41c54191b87d5be6ff91d4df7fd9152672e856375404ec9a74ca613c478b6784aac2ae6a55dc8752bddfd5ed6f7d63614f6ba195e1476de'

KAGGLE_INPUT_PATH='/kaggle/input'
KAGGLE_WORKING_PATH='/kaggle/working'
KAGGLE_SYMLINK='kaggle'

!umount /kaggle/input/ 2> /dev/null
shutil.rmtree('/kaggle/input', ignore_errors=True)
os.makedirs(KAGGLE_INPUT_PATH, 0o777, exist_ok=True)
os.makedirs(KAGGLE_WORKING_PATH, 0o777, exist_ok=True)

try:
  os.symlink(KAGGLE_INPUT_PATH, os.path.join("..", 'input'), target_is_directory=True)
except FileExistsError:
  pass
try:
  os.symlink(KAGGLE_WORKING_PATH, os.path.join("..", 'working'), target_is_directory=True)
except FileExistsError:
  pass

for data_source_mapping in DATA_SOURCE_MAPPING.split(','):
    directory, download_url_encoded = data_source_mapping.split(':')
    download_url = unquote(download_url_encoded)
    filename = urlparse(download_url).path
    destination_path = os.path.join(KAGGLE_INPUT_PATH, directory)
    try:
        with urlopen(download_url) as fileres, NamedTemporaryFile() as tfile:
            total_length = fileres.headers['content-length']
            print(f'Downloading {directory}, {total_length} bytes compressed')
            dl = 0
            data = fileres.read(CHUNK_SIZE)
            while len(data) > 0:
                dl += len(data)
                tfile.write(data)
                done = int(50 * dl / int(total_length))
                sys.stdout.write(f"\r[{'=' * done}{' ' * (50-done)}] {dl} bytes downloaded")
                sys.stdout.flush()
                data = fileres.read(CHUNK_SIZE)
            if filename.endswith('.zip'):
              with ZipFile(tfile) as zfile:
                zfile.extractall(destination_path)
            else:
              with tarfile.open(tfile.name) as tarfile:
                tarfile.extractall(destination_path)
            print(f'\nDownloaded and uncompressed: {directory}')
    except HTTPError as e:
        print(f'Failed to load (likely expired) {download_url} to path {destination_path}')
        continue
    except OSError as e:
        print(f'Failed to load {download_url} to path {destination_path}')
        continue

print('Data source import complete.')


In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All"
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import os
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image

import torch
import torch.nn as nn
from torchvision import transforms, models


In [None]:
data_path = '/kaggle/input/animal-data/animal_data/'
# filename_structure --> Target_***.jpgs
target_mapping= {"Horse":0,"Lion":1,"Dog":2,"Bear":3,"Bird":4,"Tiger":5,
                "Kangaroo":6,"Elephant":7,"Zebra":8,"Cow":9,"Panda":10,
                "Giraffe":11,"Dolphin":12,"Cat":13,"Deer":14}
data = []
for dirname, _, filenames in os.walk(data_path):
    for filename in filenames:
        data.append(os.path.join(dirname, filename))

In [None]:
transform = transforms.Compose([
    transforms.Resize((120,120)),
    transforms.RandomHorizontalFlip(0.5),
    transforms.RandomVerticalFlip(0.5),
    transforms.RandomRotation(90),
    transforms.ToTensor()

])

In [None]:
class CustomDataset(torch.utils.data.Dataset):
    def __init__(self, all_images, transforms=None):
        self.all_images = all_images
        self.transforms  = transforms

    def __len__(self):
        return len(self.all_images)

    def __getitem__(self,idx):
        image_path= self.all_images[idx]
        image = Image.open(self.all_images[idx])

        if(self.transforms != None):
            image = self.transforms(image)

        label = image_path.split("/")[-2]
        target = target_mapping[label]
        return image,target

In [None]:
Custom_Dataset = CustomDataset(data, transforms= transform)
train_dl = torch.utils.data.DataLoader(Custom_Dataset,
                                       batch_size=16,
                                       num_workers=4,
                                      shuffle=True,
                                      pin_memory=True)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)
model = models.resnet18(weights='DEFAULT')

total_in_feature = model.fc.in_features
print(total_in_feature)


In [None]:
# Changing the Sequential Model
model.fc = nn.Sequential(
    nn.Linear(total_in_feature,256),
    nn.ReLU(),
    nn.Linear(256, 15),
)

model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.fc.parameters(),lr=0.001)

In [None]:
num_epochs = 100
train_loss = []
for epoch in range(num_epochs):
    train_loss = 0
    for i, (images, target) in enumerate(train_dl):
        images = images.to(device)
        target = target.to(device)

        output = model(images)
        loss = criterion(output, target)
        train_loss += loss.cpu().detach().numpy()

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        if (i+1)%100 ==0:
            print(f'epoch {epoch+1} / {num_epochs}, step {i+1}/n_total_steps, loss={loss.item()}')


In [None]:
img, tgt = Custom_Dataset.__getitem__(1000)
img = img.unsqueeze(0)

img = img.to(device)
print(img.shape)

prd = model(img)
_, predictions = torch.max(prd, 1)
print(prd)
print(tgt)
print(predictions)