# resnet classify image

#### resnet delete last classification head, add a 9 neon classification head and finetune the last layer on 45 pics

In [None]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [None]:
# Change directory to your Google Drive
%cd /content/drive/My Drive/smalldata

# List files and folders in your Drive
!ls


/content/drive/My Drive/smalldata
cat  chicken  cow  dog	donkey	frog  lion  monkey  resnet_finetuned.pth  sheep


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, models, transforms
from torch.utils.data import DataLoader

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Data transformation
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Load your dataset
# Assuming dataset structure: /content/drive/My Drive/smalldata/{class_name}/{image_files}
data_dir = "/content/drive/My Drive/smalldata"
dataset = datasets.ImageFolder(root=data_dir, transform=transform)
dataloader = DataLoader(dataset, batch_size=8, shuffle=True)

# Load pre-trained ResNet
model = models.resnet50(pretrained=True)

# Modify the classification head
num_classes = 9
model.fc = nn.Linear(model.fc.in_features, num_classes)
model = model.to(device)

# Loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.fc.parameters(), lr=1e-4)

# Finetune the model
num_epochs = 5  # Adjust as needed
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for inputs, labels in dataloader:
        inputs, labels = inputs.to(device), labels.to(device)

        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Metrics
        running_loss += loss.item() * inputs.size(0)
        _, predicted = torch.max(outputs, 1)
        correct += (predicted == labels).sum().item()
        total += labels.size(0)

    epoch_loss = running_loss / len(dataset)
    epoch_acc = correct / total
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss:.4f}, Accuracy: {epoch_acc:.4f}")

# Save the finetuned model
torch.save(model.state_dict(), "resnet_finetuned.pth")

# Load the model for inference
model.load_state_dict(torch.load("resnet_finetuned.pth"))
model.eval()


Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:00<00:00, 221MB/s]


Epoch 1/5, Loss: 2.2448, Accuracy: 0.1556
Epoch 2/5, Loss: 2.1746, Accuracy: 0.1778
Epoch 3/5, Loss: 2.0817, Accuracy: 0.2444
Epoch 4/5, Loss: 2.0339, Accuracy: 0.2889
Epoch 5/5, Loss: 1.9554, Accuracy: 0.3778


  model.load_state_dict(torch.load("resnet_finetuned.pth"))


ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [None]:
import torch
from torchvision import models, transforms
from PIL import Image

# Device configuration
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load the trained model
model = models.resnet50(pretrained=True)
model.fc = nn.Linear(model.fc.in_features, 9)  # 9 classes
model.load_state_dict(torch.load("resnet_finetuned.pth"))
model = model.to(device)
model.eval()

# Class names mapping (from ImageFolder)
# Assuming dataset.classes contains class names in alphabetical order
class_names = dataset.classes  # Use dataset.classes from training

# Transformation for inference (same as used during training)
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

def predict_image(image_path):
    # Load and preprocess the image
    image = Image.open(image_path).convert("RGB")
    input_tensor = transform(image).unsqueeze(0)  # Add batch dimension
    input_tensor = input_tensor.to(device)

    # Perform inference
    with torch.no_grad():
        output = model(input_tensor)
        _, predicted_idx = torch.max(output, 1)  # Get the index of the max logit
        predicted_class = class_names[predicted_idx.item()]  # Map index to class name

    return predicted_class




  model.load_state_dict(torch.load("resnet_finetuned.pth"))


The predicted animal is: cat


In [None]:
# Example: Predict a new image
image_path = "/content/test.jpeg"  # Replace with your image path
predicted_animal = predict_image(image_path)
print(f"The predicted animal is: {predicted_animal}")

The predicted animal is: cat


In [None]:
import os

# Path to your test dataset
test_dir = "/content/drive/My Drive/testdog"

# List all image files in the directory
test_images = [os.path.join(test_dir, img) for img in os.listdir(test_dir) if img.endswith(('.jpg', '.png', '.jpeg'))]
print(f"Found {len(test_images)} images in the test dataset.")


Found 26 images in the test dataset.


In [None]:
# Predict and print results for all images in the test dataset
results = []

for img_path in test_images:
    predicted_animal = predict_image(img_path)
    results.append((os.path.basename(img_path), predicted_animal))
    print(f"Image: {os.path.basename(img_path)}, Predicted Animal: {predicted_animal}")


Image: n02085782_126.jpg, Predicted Animal: dog
Image: n02091134_9.jpg, Predicted Animal: donkey
Image: n02085782_143.jpg, Predicted Animal: dog
Image: n02085782_80.jpg, Predicted Animal: dog
Image: n02085782_82.jpg, Predicted Animal: dog
Image: n02088364_1384.jpg, Predicted Animal: donkey
Image: n02087046_81.jpg, Predicted Animal: cow
Image: n02091134_125.jpg, Predicted Animal: cow
Image: n02085936_352.jpg, Predicted Animal: dog
Image: n02085620_382.jpg, Predicted Animal: dog
Image: n02085620_431.jpg, Predicted Animal: cat
Image: n02086240_30.jpg, Predicted Animal: dog
Image: n02088364_959.jpg, Predicted Animal: monkey
Image: n02087046_133.jpg, Predicted Animal: frog
Image: n02086240_9.jpg, Predicted Animal: dog
Image: n02085936_420.jpg, Predicted Animal: cat
Image: n02086240_34.jpg, Predicted Animal: dog
Image: n02091134_39.jpg, Predicted Animal: monkey
Image: n02088364_876.jpg, Predicted Animal: dog
Image: n02085936_338.jpg, Predicted Animal: cow
Image: n02088364_1128.jpg, Predicted

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, models, transforms
from torch.utils.data import DataLoader

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Data transformation with augmentation
transform = transforms.Compose([
    transforms.RandomResizedCrop(224),  # Random cropping and resizing
    transforms.RandomHorizontalFlip(),  # Random horizontal flipping
    transforms.RandomRotation(15),     # Random rotation
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),  # Color jitter
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Load your dataset
data_dir = "/content/drive/My Drive/smalldata"
dataset = datasets.ImageFolder(root=data_dir, transform=transform)
dataloader = DataLoader(dataset, batch_size=8, shuffle=True)

# Load pre-trained ResNet
model = models.resnet50(pretrained=True)

# Modify the classification head
num_classes = 9
model.fc = nn.Linear(model.fc.in_features, num_classes)
model = model.to(device)

# Loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.fc.parameters(), lr=1e-4)

# Training loop
num_epochs = 5  # Adjust as needed
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for inputs, labels in dataloader:
        inputs, labels = inputs.to(device), labels.to(device)

        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Metrics
        running_loss += loss.item() * inputs.size(0)
        _, predicted = torch.max(outputs, 1)
        correct += (predicted == labels).sum().item()
        total += labels.size(0)

    epoch_loss = running_loss / len(dataset)
    epoch_acc = correct / total
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss:.4f}, Accuracy: {epoch_acc:.4f}")

# Save the finetuned model
torch.save(model.state_dict(), "resnet_finetuned_with_augmentation.pth")


Epoch 1/5, Loss: 2.2768, Accuracy: 0.1111
Epoch 2/5, Loss: 2.2047, Accuracy: 0.2000
Epoch 3/5, Loss: 2.1215, Accuracy: 0.2222
Epoch 4/5, Loss: 2.0915, Accuracy: 0.2444
Epoch 5/5, Loss: 2.0747, Accuracy: 0.2667


In [None]:
import torch
from torchvision import transforms
from PIL import Image

# Load the trained model
model = models.resnet50(pretrained=True)
model.fc = nn.Linear(model.fc.in_features, 9)  # 9 classes
model.load_state_dict(torch.load("resnet_finetuned_with_augmentation.pth"))
model = model.to(device)
model.eval()

# Class names mapping (from ImageFolder)
class_names = dataset.classes  # This maps indices to folder names

# Transformation for the input image (should match the training transform)
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

def predict_image(image_path):
    # Load and preprocess the image
    image = Image.open(image_path).convert("RGB")
    input_tensor = transform(image).unsqueeze(0)  # Add batch dimension
    input_tensor = input_tensor.to(device)

    # Perform inference
    with torch.no_grad():
        output = model(input_tensor)
        _, predicted_idx = torch.max(output, 1)  # Get the index of the max logit
        predicted_class = class_names[predicted_idx.item()]  # Map index to class name

    return predicted_class



  model.load_state_dict(torch.load("resnet_finetuned_with_augmentation.pth"))


In [None]:
# Predict and print results for all images in the test dataset
results = []

for img_path in test_images:
    predicted_animal = predict_image(img_path)
    results.append((os.path.basename(img_path), predicted_animal))
    print(f"Image: {os.path.basename(img_path)}, Predicted Animal: {predicted_animal}")

Image: n02085782_126.jpg, Predicted Animal: donkey
Image: n02091134_9.jpg, Predicted Animal: cow
Image: n02085782_143.jpg, Predicted Animal: donkey
Image: n02085782_80.jpg, Predicted Animal: donkey
Image: n02085782_82.jpg, Predicted Animal: donkey
Image: n02088364_1384.jpg, Predicted Animal: dog
Image: n02087046_81.jpg, Predicted Animal: chicken
Image: n02091134_125.jpg, Predicted Animal: chicken
Image: n02085936_352.jpg, Predicted Animal: chicken
Image: n02085620_382.jpg, Predicted Animal: cat
Image: n02085620_431.jpg, Predicted Animal: monkey
Image: n02086240_30.jpg, Predicted Animal: chicken
Image: n02088364_959.jpg, Predicted Animal: cat
Image: n02087046_133.jpg, Predicted Animal: chicken
Image: n02086240_9.jpg, Predicted Animal: cat
Image: n02085936_420.jpg, Predicted Animal: cat
Image: n02086240_34.jpg, Predicted Animal: donkey
Image: n02091134_39.jpg, Predicted Animal: cat
Image: n02088364_876.jpg, Predicted Animal: dog
Image: n02085936_338.jpg, Predicted Animal: chicken
Image: 

# classify audio

In [None]:
# https://huggingface.co/ardneebwar/wav2vec2-animal-sounds-finetuned-hubert-finetuned-animals
import librosa
import torch
from transformers import HubertForSequenceClassification, Wav2Vec2FeatureExtractor

# Load the fine-tuned model and feature extractor
model_name = "ardneebwar/wav2vec2-animal-sounds-finetuned-hubert-finetuned-animals"
feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(model_name)
model = HubertForSequenceClassification.from_pretrained(model_name)

# Prepare the device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
model.eval()  # Set the model to evaluation mode

# Function to predict the class of an audio file
def predict_audio_class(audio_file, feature_extractor, model, device):
    # Load and preprocess the audio file
    speech, sr = librosa.load(audio_file, sr=16000)
    input_values = feature_extractor(speech, return_tensors="pt", sampling_rate=16000).input_values
    input_values = input_values.to(device)

    # Predict
    with torch.no_grad():
        logits = model(input_values).logits

    # Get the predicted class ID
    predicted_id = torch.argmax(logits, dim=-1)
    # Convert the predicted ID to the class name
    predicted_class = model.config.id2label[predicted_id.item()]

    return predicted_class

# Replace 'path_to_your_new_audio_file.wav' with the actual path to the new audio file
audio_file_path = "/content/cat_1.wav"
predicted_class = predict_audio_class(audio_file_path, feature_extractor, model, device)
print(f"Predicted class: {predicted_class}")


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


preprocessor_config.json:   0%|          | 0.00/213 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.95k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/378M [00:00<?, ?B/s]

Predicted class: pig


In [None]:
audio_file_path = "/content/cat_2.wav"
predicted_class = predict_audio_class(audio_file_path, feature_extractor, model, device)
print(f"Predicted class: {predicted_class}")

Predicted class: pig
