In [1]:
# Imports here
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
import matplotlib.pyplot as plt
import pandas as pd
import torch
from torch import nn, optim
from torchvision import datasets, transforms,  models
from collections import OrderedDict
from PIL import Image
import seaborn as sns
import numpy as np
import time
from PIL import Image
import glob
from torch.autograd import Variable

In [None]:
train_dir = '../input/segregated-leaves-images/Segregated Data/training data/'
valid_dir = '../input/segregated-leaves-images/Segregated Data/validation data/'


In [None]:
# transforms for the training, validation dataset
training_transforms = transforms.Compose([transforms.RandomRotation(30),
                                          transforms.RandomResizedCrop(224),
                                          transforms.RandomHorizontalFlip(),
                                          transforms.ToTensor(),
                                          transforms.Normalize([0.485, 0.456, 0.406],
                                                            [0.229, 0.224, 0.225])
    
                                         ])

validation_transforms=transforms.Compose([transforms.Resize(255),
                                                      transforms.CenterCrop(224),
                                                      transforms.ToTensor(),
                                                      transforms.Normalize([0.485, 0.456, 0.406],
                                                                           [0.229, 0.224, 0.225])
    
                                                    ])

In [None]:
# Load the datasets with ImageFolder

#Loading training dataset and validation dataset
train_data = datasets.ImageFolder(train_dir, transform=training_transforms)
validation_data=datasets.ImageFolder(valid_dir, transform=validation_or_testing_transforms)


In [None]:
# Using the image datasets and the trainforms, define the dataloaders

#DataLoader for training set
train_dataloader = torch.utils.data.DataLoader(train_data, batch_size=64, shuffle=True)
#DataLoader for validation set
validation_dataloader = torch.utils.data.DataLoader(validation_data, batch_size=64)

In [None]:
#Label mapping
import json
with open('../input/cat-to-name/cat_to_name.json', 'r') as f:
    cat_to_name = json.load(f)

## Training

In [None]:
# Build and train your network
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

#importing the pre-trained model

model=models.densenet121(pretrained=True)
#model=models.resnet50(pretrained=True)
#models.vgg16(pretrained=True)

In [None]:
#freezing paramenter so that we do not backdrop through them
for param in model.parameters():
    param.requires_grad = False

In [None]:
#create a feed forward model 

classifier= nn.Sequential(nn.Linear(1024, 256),
                          nn.ReLU(),
                          nn.Dropout(0.2),
                          nn.Linear(256, 38),
                          nn.LogSoftmax(dim=1)
                         )

#changing the classifier of pre-trained model with our feed-forward classifier

model.classifier=classifier

criterion = nn.NLLLoss()

# Only train the classifier parameters, feature parameters are frozen
optimizer = optim.Adam(model.classifier.parameters(), lr=0.003)

model.to(device);

In [None]:
len(train_dataloader)

In [None]:
#training the model
epochs = 10
train_losses, validation_losses = [], []
for e in range(epochs):
    
    running_loss = 0
    i=0
    for inputs, labels in train_dataloader:
        
        # Move input and label tensors to the default device
        inputs, labels = inputs.to(device), labels.to(device)
        
        optimizer.zero_grad()
        
        logps = model.forward(inputs)
        loss = criterion(logps, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        
    else:
        validation_loss = 0
        accuracy = 0
        model.eval()
        with torch.no_grad():
            for inputs, labels in validation_dataloader:
                inputs, labels = inputs.to(device), labels.to(device)
                logps = model.forward(inputs)
                batch_loss = criterion(logps, labels)

                validation_loss += batch_loss.item()

                # Calculate accuracy
                ps = torch.exp(logps)
                top_p, top_class = ps.topk(1, dim=1)
                equals = top_class == labels.view(*top_class.shape)
                accuracy += torch.mean(equals.type(torch.FloatTensor)).item()
                
                
        train_losses.append(running_loss/len(train_dataloader))
        validation_losses.append(validation_loss/len(validation_dataloader))

        print("Epoch: {}/{}.. ".format(e+1, epochs),
              "Training Loss: {:.3f}.. ".format(running_loss/len(train_dataloader)),
              "Validation Loss: {:.3f}.. ".format(validation_loss/len(validation_dataloader)),
              "Validation Accuracy: {:.3f}".format(accuracy/len(validation_dataloader)))
        
        
        model.train()


## Predictons:

In [None]:
def process_image(img): 
    # Resize
    if img.size[0] > img.size[1]:
        img.thumbnail((10000, 256))
    else:
        img.thumbnail((256, 10000))
        
        
    # Crop 
    left_margin = (img.width-224)/2
    bottom_margin = (img.height-224)/2
    right_margin = left_margin + 224
    top_margin = bottom_margin + 224
    img = img.crop((left_margin, bottom_margin, right_margin,   
                      top_margin))
    
    # Normalize
    img = np.array(img)/255
    mean = np.array([0.485, 0.456, 0.406]) #provided mean
    std = np.array([0.229, 0.224, 0.225]) #provided std
    img = (img - mean)/std
    
    
    # Move color channels to first dimension as expected by PyTorch
    img = img.transpose((2, 0, 1))
    
    return img

In [None]:
#interate through the test folder and make prediction for every image
#save then name of the image and the predicted label in a dictionary

image_and_labels={}
filenames=glob.glob('../input/testdata/TestFiles/*.jpg')
counter=1

for image_path in filenames:

    #image number
    image_num=image_path.split('/')[4].split('.')[0]
    
    #processing
    image=Image.open(image_path)
    img = process_image(image) 
    img = torch.from_numpy(img).type(torch.FloatTensor) 
    img.unsqueeze_(0)
    i = img.to(device)
    
    #prediction
    model.eval()
    probs = torch.exp(model.forward(i))
    top_probs, top_labs = probs.topk(1) 
    
    #adding to the dictonary 
    image_and_labels[image_num]=str(top_labs.item())

## Saving Output

In [None]:
#converting dictionary to a dataframe
df_img_lab=pd.DataFrame(list(image_and_labels.items()))
df_img_lab.head()

In [None]:
#saving the dataframe in a CSV file.
df_img_lab.to_csv('predicted_labels_final.csv', index=False)