In [1]:
from torchvision.datasets import ImageFolder
from torchvision import transforms, models
from torch.utils import data
import torch
import numpy as np
import sklearn
from sklearn.decomposition import PCA
from sklearn.linear_model import LogisticRegression
import pandas as pd
import matplotlib.pyplot as plt
from os import listdir
from skimage.io import imread
from numpy.random import poisson, normal, beta, choice
import time
from copy import deepcopy, copy
from PIL import Image
import bagnets.pytorchnet

In [2]:
seed = 42

np.random.seed(seed)                                                                       
torch.manual_seed(seed) 

img_transform = transforms.Compose([transforms.Resize(255),
                                    transforms.CenterCrop(224),
                                    transforms.ToTensor(),
                                    transforms.Normalize(mean=[0, 0, 0], std=[1, 1, 1])]
                                   )
dataset = ImageFolder(root='/Users/jlakkis/Downloads/plant-seedlings-classification/train', transform=img_transform)

print(dataset.classes)

['Black-grass', 'Charlock', 'Cleavers', 'Common Chickweed', 'Common wheat', 'Fat Hen', 'Loose Silky-bent', 'Maize', 'Scentless Mayweed', 'Shepherds Purse', 'Small-flowered Cranesbill', 'Sugar beet']


In [3]:
"""
 This is a function that takes in an image, and a dispersion parameter
 p and n define the size of the mean pixel patch and the number of patches
 replaces pixel with random noise in randomly selected patches of the image
"""

def patchnoiser(image, theta, p , n):
    x, y, c = image.shape
    means = torch.sum(image, dim=(0,1))/x/y
    
    for i in range(n):
        px = min(poisson(lam = p, size=None),x)
        py = min(poisson(lam = p, size=None),y)
        
        max_x = x + 1 - px
        max_y = y + 1 - py
        
        x_range = list(range(max_x))
        y_range = list(range(max_y))
        
        patch1_x = choice(x_range)
        patch1_y = choice(y_range)
        
        def pixeltonoise(x):
            if(x<10 **(-6)):
                return x
            if(x > 1 - 10**(-6)):
                return x

            a = (1 - theta)/theta * x
            b = (1 - theta)/theta * (1 - x)

            return(beta(a, b))
    
        pixeltonoise = np.vectorize(pixeltonoise)
        
        noise0 = pixeltonoise(float(means[0]) * np.ones(shape = (px, py)) )
        noise1 = pixeltonoise(float(means[1]) * np.ones(shape = (px, py)) )
        noise2 = pixeltonoise(float(means[2]) * np.ones(shape = (px, py)) )
        
        noise0 = torch.from_numpy(noise0)
        noise1 = torch.from_numpy(noise1)
        noise2 = torch.from_numpy(noise2)
        
        image[patch1_x:(patch1_x+px), patch1_y:(patch1_y+py),0] = noise0
        image[patch1_x:(patch1_x+px), patch1_y:(patch1_y+py),1] = noise1
        image[patch1_x:(patch1_x+px), patch1_y:(patch1_y+py),2] = noise2
    
    return(image)

"""
 This is a function that takes in an image, and a dispersion parameter
 replaces each pixel with a noisy version of that pixel 
"""

def uniformnoiser(image, theta):
    x, y, c = image.shape
    
    def pixeltonoise(x):
        if(x<10 **(-6)):
            return x
        if(x > 1 - 10**(-6)):
            return x
        
        a = (1 - theta)/theta * x
        b = (1 - theta)/theta * (1 - x)
        
        return(beta(a, b))
    
    pixeltonoise = np.vectorize(pixeltonoise)
    
    image = np.asarray(image)
    image = pixeltonoise(image)
    image = torch.from_numpy(image)
    
    return(image)

"""
 This is a function that takes in an image, and a dispersion parameter
 p and n define the size of the mean pixel patch and the number of patches
 replaces pixel with random noise in randomly selected patches of the image
"""

def patchswap(image, p, n):
    x, y, c = image.shape
    
    for i in range(n):
        px = min(poisson(lam = p, size=None),x)
        py = min(poisson(lam = p, size=None),y)
        
        max_x = x + 1 - px
        max_y = y + 1 - py
        
        x_range = list(range(max_x))
        y_range = list(range(max_y))
        
        patch1_x = choice(x_range)
        patch1_y = choice(y_range)
        patch1 = deepcopy(image[patch1_x:(patch1_x + px),patch1_y:(patch1_y + py),0:3])

        patch2_x = choice(x_range)
        patch2_y = choice(y_range)
        patch2 = deepcopy(image[patch2_x:(patch2_x + px),patch2_y:(patch2_y + py),0:3])
        
        image[patch1_x:(patch1_x + px),patch1_y:(patch1_y + py),0:3] = patch2
        image[patch2_x:(patch2_x + px),patch2_y:(patch2_y + py),0:3] = patch1
    
    return(image)

In [4]:
"""A pytorch dataset class that retrieves and modifies images in real time"""
"""
    Corruption Modes:
        1. None: return clean resized image
        2. patchswap: swap patches of pixels
        3. noiseypatch: Randomly select patches of pixels and replace them with randomly distributed noise
        4. uniformnoise: For each pixel, replace that pixel's color values with noisy versions of that 
            pixel's color values
"""
class tgenerator(data.Dataset):

    def __init__(self, IDlink):
        'Initialization'
        self.link = IDlink
        self.list_IDs = listdir(IDlink)
        self.mode = 'None'
        self.imageview = True
        self.transformer = transforms.Compose([transforms.Resize(255),
                                    transforms.CenterCrop(224),
                                    transforms.ToTensor(),
                                    transforms.Normalize(mean=[0, 0, 0], std=[1, 1, 1])])
    
    def __len__(self):
        'Denotes the total number of samples'
        return len(self.list_IDs)

    def viewmode(self):
        '''Switch the shaping of the tensors. For CNN models, call this method in order
        to ensure that the first dimension of the image tensor is the channel'''
        self.imageview = not self.imageview
    
    def setmode_none(self):
        self.mode = 'none'
    
    def setmode_patchswap(self,p = 1, n = 0):
        self.mode = 'patchswap'
        self.p = p
        self.n = n
        
    def setmode_noiseypatch(self, theta = 1/4, p = 1, n = 0):
        self.mode = 'noiseypatch'
        self.theta = theta
        self.p = p
        self.n = n
        
    def setmode_uniformnoise(self, theta = 1/4):
        self.mode = 'uniformnoise'
        self.theta = theta
        
    def setparams(self, resize=255, centercrop=224):
        self.size = size
        self.transformer = transforms.Compose([transforms.Resize(resize),
                                    transforms.CenterCrop(centercrop),
                                    transforms.ToTensor(),
                                    transforms.Normalize(mean=[0, 0, 0], std=[1, 1, 1])]
        )
    
    def __getitem__(self, index):
        'Generates one sample of data'
        # Select sample
        ID = self.list_IDs[index]

        # Load data and get label
        X = Image.open(self.link + '/' + ID)
        X = self.transformer(X)
        X = X.permute(1,2,0)
        
        if(self.mode == 'patchswap'):
            X = patchswap(X, self.p, self.n)
            
        if(self.mode == 'noiseypatch'):
            X = patchnoiser(X, self.theta, self.p, self.n)
            
        if(self.mode == 'uniformnoise'):
            X = uniformnoiser(X, self.theta)

        if(not self.imageview):
            X = X.permute(2,0,1)
            
        return X

In [5]:
PATH = "/Users/jlakkis/Downloads/trained_bagnet.pkl"
bagnet = bagnets.pytorchnet.bagnet17(pretrained=False, num_classes = 12)
bagnet.load_state_dict(torch.load(PATH, map_location=torch.device('cpu')))
bagnet.eval() 

BagNet(
  (conv1): Conv2d(3, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
  (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.001, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(2, 2), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(2, 2), bias=False)
        (1): BatchNorm2d(256,

In [6]:
index = list(range(794))
columns=['file','species']
df_ = pd.DataFrame(index = index, columns=columns)
df_ = df_.fillna(0)

In [7]:
link = "/Users/jlakkis/Desktop/Temporary Files/CIS 520 Project/data/test"
testset = tgenerator(link)
testset.viewmode()

Evaluate on clean test set.

Kaggle F1 Score: 0.72040

In [8]:
for i in range(794):
    im = testset[i]
    im = im.view(1,3,224,224)
    
    ytest = torch.max(bagnet(im),1)[1][0].detach()
    
    if(i % 100 ==0):
        print(i)
    
    df_['file'][i] = testset.list_IDs[i]
    df_['species'][i] = dataset.classes[int(ytest)]
    
df_.to_csv(path_or_buf = "/Users/jlakkis/Desktop/predictions_bagnet.csv", index = False)

0


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # This is added back by InteractiveShellApp.init_path()
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self._setitem_with_indexer(indexer, value)


100
200
300
400
500
600
700


Evaluate on uniform noise perturbed test set.

Kaggle F1 Score: 0.11335

In [9]:
testset.setmode_uniformnoise(theta=0.6)

for i in range(794):
    im = testset[i]
    im = im.view(1,3,224,224)
    
    ytest = torch.max(bagnet(im.float()),1)[1][0].detach()
    
    if(i % 100 ==0):
        print(i)
    
    df_['file'][i] = testset.list_IDs[i]
    df_['species'][i] = dataset.classes[int(ytest)]
    
df_.to_csv(path_or_buf = "/Users/jlakkis/Desktop/predictions_unformnoise_bagnet.csv", index = False)

0
100
200
300
400
500
600
700


Evaluate on noisy patch perturbed test set.

Kaggle F1 Score: 0.20025

In [10]:
testset.setmode_noiseypatch(theta=0.1,p=30,n=40)

for i in range(794):
    im = testset[i]
    im = im.view(1,3,224,224)
    
    ytest = torch.max(bagnet(im.float()),1)[1][0].detach()
    
    if(i % 100 ==0):
        print(i)
    
    df_['file'][i] = testset.list_IDs[i]
    df_['species'][i] = dataset.classes[int(ytest)]
    
df_.to_csv(path_or_buf = "/Users/jlakkis/Desktop/predictions_noiseypatch_bagnet.csv", index = False)

0
100
200
300
400
500
600
700


Evaluate on patchswap perturbed test set.

Kaggle F1 Score: 0.52267

In [11]:
testset.setmode_patchswap(p=20, n=50)

for i in range(794):
    im = testset[i]
    im = im.view(1,3,224,224)
    
    ytest = torch.max(bagnet(im.float()),1)[1][0].detach()
    
    if(i % 100 ==0):
        print(i)
    
    df_['file'][i] = testset.list_IDs[i]
    df_['species'][i] = dataset.classes[int(ytest)]
    
df_.to_csv(path_or_buf = "/Users/jlakkis/Desktop/predictions_patchswap_bagnet.csv", index = False)

0
100
200
300
400
500
600
700


Evaluate on texturized test set.

Kaggle F1 Score: 0.19269

In [12]:
link = "/Users/jlakkis/Desktop/Temporary Files/CIS 520 Project/data/texturized"
testset = tgenerator(link)
testset.viewmode()

for i in range(794):
    im = testset[i]
    im = im.view(1,3,224,224)
    
    ytest = torch.max(bagnet(im.float()),1)[1][0].detach()
    
    if(i % 100 ==0):
        print(i)
    
    df_['file'][i] = testset.list_IDs[i]
    df_['species'][i] = dataset.classes[int(ytest)]
    
df_.to_csv(path_or_buf = "/Users/jlakkis/Desktop/predictions_texturized_bagnet.csv", index = False)

0
100
200
300
400
500
600
700
