<a href="https://colab.research.google.com/github/akarighattam/akarighattam/blob/main/Arthropod_Identification_code.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>


### Arthropod Dataset

In [None]:
#install and import fastbook
!pip install fastai==2.0.15
!pip install fastai2==0.0.30
!pip install fastcore==1.0.16
!pip install -Uqq fastbook
import fastbook
from fastbook import *

#import libraries
import os
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from PIL import Image, ImageOps
#mount google drive to access images for dataset
from google.colab import drive
%matplotlib inline
drive.mount('/content/drive')

In [None]:
#resize photos of spiders for training and append them to a list
train_spiders_folder = '/content/drive/Shareddrives/Image Dataset for Aranya/Arthropod Photos/Train Dataset/Spiders'
train_spiders_resized = '/content/drive/Shareddrives/Image Dataset for Aranya/Resized Arthropod Photos/Train Dataset/Spiders'

train_list_spiders = []
train_labels_spiders = []

for img in os.listdir(train_spiders_folder):
  #resize_photo = Image.open(os.path.join(train_spiders_folder, img))
  #resize_photo = ImageOps.pad(resize_photo, (300, 300), color='black')
  #resize_photo.save(f"{train_spiders_resized}/{img}")
  training_photos = mpimg.imread(os.path.join(train_spiders_resized, img))
  train_list_spiders.append([training_photos])
  train_labels_spiders.append("Spiders")

In [None]:
#resize photos of moths for training and append them to a list
train_moths_folder = '/content/drive/Shareddrives/Image Dataset for Aranya/Arthropod Photos/Train Dataset/Moths'
train_moths_resized = '/content/drive/Shareddrives/Image Dataset for Aranya/Resized Arthropod Photos/Train Dataset/Moths'

train_list_moths = []
train_labels_moths = []

for img in os.listdir(train_moths_folder):
  #resize_photo = Image.open(os.path.join(train_moths_folder, img))
  #resize_photo = ImageOps.pad(resize_photo, (300, 300), color='black')
  #resize_photo.save(f"{train_moths_resized}/{img}")
  training_photos = mpimg.imread(os.path.join(train_moths_resized, img))
  train_list_moths.append([training_photos])
  train_labels_moths.append("Moths")

In [None]:
#resize photos of spiders for validation and append them to a list
valid_spiders_folder = '/content/drive/Shareddrives/Image Dataset for Aranya/Arthropod Photos/Validation Dataset/Spiders'
valid_spiders_resized = '/content/drive/Shareddrives/Image Dataset for Aranya/Resized Arthropod Photos/Validation Dataset/Spiders'

valid_list_spiders = []
valid_labels_spiders = []

for img in os.listdir(valid_spiders_folder):
  #resize_photo = Image.open(os.path.join(valid_spiders_folder, img))
  #resize_photo = ImageOps.pad(resize_photo, (300, 300), color='black')
  #resize_photo.save(f"{valid_spiders_resized}/{img}")
  training_photos = mpimg.imread(os.path.join(valid_spiders_resized, img))
  valid_list_spiders.append([training_photos])
  valid_labels_spiders.append("Spiders")

In [None]:
#resize photos of moths for validation and append them to a list
valid_moths_folder = '/content/drive/Shareddrives/Image Dataset for Aranya/Arthropod Photos/Validation Dataset/Moths'
valid_moths_resized = '/content/drive/Shareddrives/Image Dataset for Aranya/Resized Arthropod Photos/Validation Dataset/Moths'

valid_list_moths = []
valid_labels_moths = []

for img in os.listdir(valid_moths_folder):
  #resize_photo = Image.open(os.path.join(valid_moths_folder, img))
  #resize_photo = ImageOps.pad(resize_photo, (300, 300), color='black')
  #resize_photo.save(f"{valid_moths_resized}/{img}")
  training_photos = mpimg.imread(os.path.join(valid_moths_resized, img))
  valid_list_moths.append([training_photos])
  valid_labels_moths.append("Moths")

In [None]:
#convert each list to a numpy array and then to a float
spiders_array_train = np.array(train_list_spiders).astype(np.float32)
moths_array_train = np.array(train_list_moths).astype(np.float32)
spiders_array_valid = np.array(valid_list_spiders).astype(np.float32)
moths_array_valid = np.array(valid_list_moths).astype(np.float32)

#convert the numpy arrays to tensors
spiders_train = tensor(spiders_array_train)
moths_train = tensor(moths_array_train)
spiders_valid = tensor(spiders_array_valid)
moths_valid = tensor(moths_array_valid)

#concatenate the train tensors and define it as train_x (mulitiply 2nd parameter in .view by 3 for rgb)
train_x = torch.cat([spiders_train, moths_train]).view(-1, 300*300*3)

#define train_y (list of classification labels)
train_y = tensor([1]*train_labels_spiders.count("Spiders") + [0]*train_labels_moths.count("Moths")).unsqueeze(1)

#concatenate the validation tensors and define it as valid_x (mulitiply 2nd parameter in .view by 3 for rgb)
valid_x = torch.cat([spiders_valid, moths_valid]).view(-1, 300*300*3)

#define valid_y (list of classification labels)
valid_y = tensor([1]*valid_labels_spiders.count("Spiders") + [0]*valid_labels_moths.count("Moths")).unsqueeze(1)

In [None]:
#check that the dimensions are correct
train_x.shape, train_y.shape, valid_x.shape, valid_y.shape

In [None]:
#training dataset list containing sublists with (image, label)
training_dataset = list(zip(train_x, train_y))
x, y = training_dataset[0]

#validation dataset list containing sublists with (image, label)
validation_dataset = list(zip(valid_x, valid_y))

In [None]:
#define parameters for initializing random weights
def initialize(size, variance = 1.0):
  return (torch.randn(size)*variance).requires_grad_()

#initialize random weights (mulitiply 1st parameter in of initialize by 3 for rgb)
weights = initialize((300*300*3, 1))
#initialize bias
bias = initialize(1)

In [None]:
#define loss function (this loss function only works for 2 classifications)
def loss_function(predictions, actual_values):
  predictions = predictions.sigmoid()
  return torch.where(actual_values==1, 1-predictions, predictions).mean()

In [None]:
#define DataLoader for training dataset
training_dataloader = DataLoader(training_dataset, batch_size = 25, shuffle=True)
x_batch, y_batch = first(training_dataloader)

#define DataLoader for validation dataset
validation_dataloader = DataLoader(validation_dataset, batch_size = 25, shuffle=True)

In [None]:
#define a function to calculate the gradient
def calculate_gradient(x_batch, y_batch, model):
  predict_x = model(x_batch)
  loss = loss_function(predict_x, y_batch)
  loss.backward()

In [None]:
#define optimizer that updates the weights
class OptimizerClass:
  def __init__(self, parameters, learning_rate):
    self.parameters, self.learning_rate = list(parameters), learning_rate
  def step(self, *args, **kwargs):
    for p in self.parameters:
      p.data -= p.grad.data*self.learning_rate
  def zero_gradient(self, *args, **kwargs):
    for p in self.parameters:
      p.grad.zero_()

In [None]:
#function that trains the model for one epoch
def train_for_epoch(model):
  for x_batch, y_batch in training_dataloader:
    calculate_gradient(x_batch, y_batch, model)
    optimizer = OptimizerClass(parameters, learning_rate)
    optimizer.step()
    optimizer.zero_gradient()

In [None]:
#define learning rate and parameters variable
learning_rate = 100.
parameters = [weights, bias]

In [None]:
#function to check accuracy between training batches
def batch_accuracy(x_batch, y_batch):
  predictions = x_batch.sigmoid()
  correct = (predictions>0.5) == y_batch
  return correct.float()

In [None]:
#function that validates the model for one epoch
def validate_for_epoch(model):
  accuracy = [batch_accuracy(model(x_batch), y_batch) for x_batch, y_batch in validation_dataloader]
  return round(torch.stack(accuracy).mean().item(), 4)

In [None]:
#function that trains the model
def train_model(model, epochs):
  for i in range(epochs):
    train_for_epoch(model)
    print(validate_for_epoch(model), end = ' ')

In [None]:
#define prediction function
def predict(x_batch):
  return (x_batch@weights + bias)/270000

In [None]:
#The output is the percent of how accurate the model is at predicting the classification correctly
print("Prediction:")
train_model(predict, 401)


In [None]:
#function that prints the predicted classification and the confidence in percent
def identify(path):
  print("Spider = 1, Moth = 0")
  image = PILImage(PILImage.create(path))
  image.show()
  image = ImageOps.pad(image, (300, 300), color='black')
  pil_image_to_numpy = np.array(image).astype(np.float32)
  numpy_to_tensor = tensor(pil_image_to_numpy).view(-1, 300*300*3)
  tensor_value = round((predict(numpy_to_tensor).sigmoid()).item(), 4)
  if tensor_value>0.7:
    print("Prediction value:", tensor_value)
    print("Spider:", (str(int(round(tensor_value, 2)*100)))+"%", "likely")
  elif tensor_value<0.3:
    print("Prediction value:", tensor_value)
    print("Moth:", (str(int(round(1-tensor_value, 2)*100)))+"%", "likely")
  else:
    print("Prediction value:", tensor_value)
    print("Cannot detect any arthropods in the image.")

orbweaver = '/content/drive/MyDrive/Arthropod Identification App - Test Photos/Western Spotted Orbweaver (Neoscona oaxacensis)-1.jpg'
idia_moth = '/content/drive/MyDrive/Arthropod Identification App - Test Photos/American Idia Moth.jpg'
jumping_spider = '/content/drive/MyDrive/Arthropod Identification App - Test Photos/Flea Jumping Spider.jpg'
carpet_moth = '/content/drive/MyDrive/Arthropod Identification App - Test Photos/Bent-line Carpet Moth.jpg'

identify(idia_moth)
