In [13]:
!pip3 install torchvision

Collecting torchvision
  Downloading torchvision-0.9.1-cp38-cp38-win_amd64.whl (852 kB)
Collecting torch==1.8.1
  Downloading torch-1.8.1-cp38-cp38-win_amd64.whl (190.5 MB)
Installing collected packages: torch, torchvision
Successfully installed torch-1.8.1 torchvision-0.9.1


In [86]:
import os
import numpy as np
import torch
import glob
import torch.nn as nn
from torch.utils.data import DataLoader
from torchvision.transforms import transforms
from torch.optim import Adam
from torch.autograd import Variable
import torchvision
import pathlib

In [87]:
#Checking for device
#Most devices will use cpu gpu much faster at machine learning processing data
device=torch.device('cuda' if torch.cuda.is_available()else 'cpu')
print(device)

cpu


In [88]:
#Transforms
transformer=transforms.Compose([
    transforms.Resize((150,150)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(), #0-255 to 0-1 numpy to tensor
    transforms.Normalize([0.5,0.5,0.5],
                        [0.5,0.5,0.5])
])

In [89]:
#Dataloader
#Path to test and train data sets that we will use for analysis
train_path= 'AnacondaImageClassification_TrainingSetV2/train/'
test_path= 'AnacondaImageClassification_TrainingSetV2/test/'


train_loader=DataLoader(
    torchvision.datasets.ImageFolder(train_path,transform=transformer),
    batch_size=32, shuffle=True)

test_loader=DataLoader(
    torchvision.datasets.ImageFolder(test_path,transform=transformer),
    batch_size=32, shuffle=True)

In [99]:
#Classification Categories

root=pathlib.Path(train_path)
#classes=sorted(['Algi', 'Clay', 'Clean', 'Iron', 'Lime', 'Petrol_Octane_95', 'Petrol_Octane_98', 'Stream'])
classes=sorted([j.name.split('/')[-1] for j in root.iterdir()])

In [100]:
print(classes)

['.ipynb_checkpoints', 'Algi', 'Clay', 'Clean', 'Iron', 'Lime', 'Petrol_Octane_95', 'Petrol_Octane_98', 'Saved_Model', 'Stream']


In [108]:
#CNN Network

class ConvNet(nn.Module):
    def __init__(self,num_classes=10):
        super(ConvNet,self).__init__()
        
        #Output size after convolution filter
        #((w-f+2P)/s) +1
        
        #Input shape= (256,3,150,150)
        
        self.conv1=nn.Conv2d(in_channels=3,out_channels=12,kernel_size=3,stride=1,padding=1)
        #Shape= (256,12,150,150)
        self.bn1=nn.BatchNorm2d(num_features=12)
        #Shape= (256,12,150,150)
        self.relu1=nn.ReLU()
        #Shape= (256,12,150,150)
        
        self.pool=nn.MaxPool2d(kernel_size=2)
        #Reduce the image size be factor 2
        #Shape= (256,12,75,75)
        
        
        self.conv2=nn.Conv2d(in_channels=12,out_channels=20,kernel_size=3,stride=1,padding=1)
        #Shape= (256,20,75,75)
        self.relu2=nn.ReLU()
        #Shape= (256,20,75,75)
        
        
        
        self.conv3=nn.Conv2d(in_channels=20,out_channels=32,kernel_size=3,stride=1,padding=1)
        #Shape= (256,32,75,75)
        self.bn3=nn.BatchNorm2d(num_features=32)
        #Shape= (256,32,75,75)
        self.relu3=nn.ReLU()
        #Shape= (256,32,75,75)
        
        
        self.fc=nn.Linear(in_features=75 * 75 * 32,out_features=num_classes)
        
        
        
        #Feed forwad function
        
    def forward(self,input):
        output=self.conv1(input)
        output=self.bn1(output)
        output=self.relu1(output)
            
        output=self.pool(output)
            
        output=self.conv2(output)
        output=self.relu2(output)
            
        output=self.conv3(output)
        output=self.bn3(output)
        output=self.relu3(output)
            
            
            #Above output will be in matrix form, with shape (256,32,75,75)
            
        output=output.view(-1,32*75*75)
            
            
        output=self.fc(output)
            
        return output

In [109]:
model=ConvNet(num_classes=10).to(device)

In [110]:
#Optmizer and loss function
optimizer=Adam(model.parameters(),lr=0.001,weight_decay=0.0001)
loss_function=nn.CrossEntropyLoss()

In [111]:
num_epochs=10


In [112]:

#calculating the size of training and testing images
train_count=len(glob.glob(train_path+'/**/*.jpg'))
test_count=len(glob.glob(test_path+'/**/*.jpg'))

In [113]:
print(train_count,test_count)

288 96


In [114]:

#Model training and saving best model

best_accuracy=0.0

for epoch in range(num_epochs):
    
    #Evaluation and training on training dataset
    model.train()
    train_accuracy=0.0
    train_loss=0.0
    
    for i, (images,labels) in enumerate(train_loader):
        if torch.cuda.is_available():
            images=Variable(images.cuda())
            labels=Variable(labels.cuda())
            
        optimizer.zero_grad()
        
        outputs=model(images)
        loss=loss_function(outputs,labels)
        loss.backward()
        optimizer.step()
        
        
        train_loss+= loss.cpu().data*images.size(0)
        _,prediction=torch.max(outputs.data,1)
        
        train_accuracy+=int(torch.sum(prediction==labels.data))
        
    train_accuracy=train_accuracy/train_count
    train_loss=train_loss/train_count
    
    
    # Evaluation on testing dataset
    model.eval()
    
    test_accuracy=0.0
    for i, (images,labels) in enumerate(test_loader):
        if torch.cuda.is_available():
            images=Variable(images.cuda())
            labels=Variable(labels.cuda())
            
        outputs=model(images)
        _,prediction=torch.max(outputs.data,1)
        test_accuracy+=int(torch.sum(prediction==labels.data))
    
    test_accuracy=test_accuracy/test_count
    
    
    print('Epoch: '+str(epoch)+' Train Loss: '+str(train_loss)+' Train Accuracy: '+str(train_accuracy)+' Test Accuracy: '+str(test_accuracy))
    
    #Save the best model
    if test_accuracy>best_accuracy:
        torch.save(model.state_dict(),'best_checkpoint.model')
        best_accuracy=test_accuracy

Epoch: 0 Train Loss: tensor(10.1548) Train Accuracy: 0.59375 Test Accuracy: 0.3229166666666667
Epoch: 1 Train Loss: tensor(4.2578) Train Accuracy: 0.8472222222222222 Test Accuracy: 0.5833333333333334
Epoch: 2 Train Loss: tensor(3.0609) Train Accuracy: 0.8993055555555556 Test Accuracy: 0.6979166666666666
Epoch: 3 Train Loss: tensor(1.6728) Train Accuracy: 0.9409722222222222 Test Accuracy: 0.84375
Epoch: 4 Train Loss: tensor(0.0732) Train Accuracy: 0.9826388888888888 Test Accuracy: 0.9479166666666666
Epoch: 5 Train Loss: tensor(0.0004) Train Accuracy: 1.0 Test Accuracy: 1.0
Epoch: 6 Train Loss: tensor(0.0006) Train Accuracy: 1.0 Test Accuracy: 1.0
Epoch: 7 Train Loss: tensor(0.0356) Train Accuracy: 0.9965277777777778 Test Accuracy: 1.0
Epoch: 8 Train Loss: tensor(0.0761) Train Accuracy: 0.9895833333333334 Test Accuracy: 0.9895833333333334
Epoch: 9 Train Loss: tensor(0.0726) Train Accuracy: 0.9930555555555556 Test Accuracy: 0.9895833333333334


## Testing on the unlabled images

In [115]:
import torch
import torch.nn as nn
from torchvision.transforms import transforms
import numpy as np
from torch.autograd import Variable
from torchvision.models import squeezenet1_1
import torch.functional as F
from io import open
import os
from PIL import Image
import pathlib
import glob
import cv2

In [116]:
train_path = 'AnacondaImageClassification_TrainingSetV2/train/'
pred = 'AnacondaImageClassification_TrainingSetV2/prediction/'


In [117]:
#categories
root=pathlib.Path(train_path)
classes=sorted([j.name.split('/')[-1] for j in root.iterdir()])

In [118]:

#Model training and saving best model

best_accuracy=0.0

for epoch in range(num_epochs):
    
    #Evaluation and training on training dataset
    model.train()
    train_accuracy=0.0
    train_loss=0.0
    
    for i, (images,labels) in enumerate(train_loader):
        if torch.cuda.is_available():
            images=Variable(images.cuda())
            labels=Variable(labels.cuda())
            
        optimizer.zero_grad()
        
        outputs=model(images)
        loss=loss_function(outputs,labels)
        loss.backward()
        optimizer.step()
        
        
        train_loss+= loss.cpu().data*images.size(0)
        _,prediction=torch.max(outputs.data,1)
        
        train_accuracy+=int(torch.sum(prediction==labels.data))
        
    train_accuracy=train_accuracy/train_count
    train_loss=train_loss/train_count
    
    
    # Evaluation on testing dataset
    model.eval()
    
    test_accuracy=0.0
    for i, (images,labels) in enumerate(test_loader):
        if torch.cuda.is_available():
            images=Variable(images.cuda())
            labels=Variable(labels.cuda())
            
        outputs=model(images)
        _,prediction=torch.max(outputs.data,1)
        test_accuracy+=int(torch.sum(prediction==labels.data))
    
    test_accuracy=test_accuracy/test_count
    
    
    print('Epoch: '+str(epoch)+' Train Loss: '+str(train_loss)+' Train Accuracy: '+str(train_accuracy)+' Test Accuracy: '+str(test_accuracy))
    
    #Save the best model
    if test_accuracy>best_accuracy:
        torch.save(model.state_dict(),'best_checkpoint.model')
        best_accuracy=test_accuracy

Epoch: 0 Train Loss: tensor(0.6497) Train Accuracy: 0.9826388888888888 Test Accuracy: 0.96875
Epoch: 1 Train Loss: tensor(1.0286) Train Accuracy: 0.9444444444444444 Test Accuracy: 0.875
Epoch: 2 Train Loss: tensor(1.3974) Train Accuracy: 0.9548611111111112 Test Accuracy: 1.0
Epoch: 3 Train Loss: tensor(0.3041) Train Accuracy: 0.9756944444444444 Test Accuracy: 1.0
Epoch: 4 Train Loss: tensor(0.0037) Train Accuracy: 0.9965277777777778 Test Accuracy: 0.9270833333333334
Epoch: 5 Train Loss: tensor(0.6727) Train Accuracy: 0.9756944444444444 Test Accuracy: 1.0
Epoch: 6 Train Loss: tensor(0.9022) Train Accuracy: 0.9618055555555556 Test Accuracy: 0.9895833333333334
Epoch: 7 Train Loss: tensor(2.6012) Train Accuracy: 0.9513888888888888 Test Accuracy: 0.8333333333333334
Epoch: 8 Train Loss: tensor(0.6082) Train Accuracy: 0.9548611111111112 Test Accuracy: 0.8958333333333334
Epoch: 9 Train Loss: tensor(1.1657) Train Accuracy: 0.9652777777777778 Test Accuracy: 0.9479166666666666


In [126]:

checkpoint=torch.load('best_checkpoint.model')
model=ConvNet(num_classes=10)
model.load_state_dict(checkpoint)
model.eval()

ConvNet(
  (conv1): Conv2d(3, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn1): BatchNorm2d(12, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu1): ReLU()
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(12, 20, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (relu2): ReLU()
  (conv3): Conv2d(20, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn3): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu3): ReLU()
  (fc): Linear(in_features=180000, out_features=10, bias=True)
)

In [127]:
#Transforms
transformer=transforms.Compose([
    transforms.Resize((150,150)),
    transforms.ToTensor(),  #0-255 to 0-1, numpy to tensors
    transforms.Normalize([0.5,0.5,0.5], # 0-1 to [-1,1] , formula (x-mean)/std
                        [0.5,0.5,0.5])
])

In [128]:

#prediction function
def prediction(img_path,transformer):
    
    image=Image.open(img_path)
    
    image_tensor=transformer(image).float()
    
    
    image_tensor=image_tensor.unsqueeze_(0)
    
    if torch.cuda.is_available():
        image_tensor.cuda()
        
    input=Variable(image_tensor)
    
    
    output=model(input)
    
    index=output.data.numpy().argmax()
    
    pred=classes[index]
    
    return pred

In [129]:
images_path=glob.glob(pred+'/*.jpg')

In [130]:
pred_dict={}

for i in images_path:
    pred_dict[i[i.rfind('/')+1:]]=prediction(i,transformer)

In [131]:
pred_dict

{'prediction\\algi_water_1.jpg': 'Algi',
 'prediction\\algi_water_2.jpg': 'Algi',
 'prediction\\algi_water_3.jpg': 'Algi',
 'prediction\\algi_water_4.jpg': 'Algi',
 'prediction\\algi_water_5.jpg': 'Algi',
 'prediction\\clay_water_1.jpg': 'Clay',
 'prediction\\clay_water_2.jpg': 'Clay',
 'prediction\\clay_water_3.jpg': 'Clay',
 'prediction\\clay_water_4.jpg': 'Clay',
 'prediction\\clay_water_5.jpg': 'Clay',
 'prediction\\clean_water_1.jpg': 'Clean',
 'prediction\\clean_water_2.jpg': 'Clean',
 'prediction\\clean_water_3.jpg': 'Clean',
 'prediction\\clean_water_4.jpg': 'Clean',
 'prediction\\clean_water_5.jpg': 'Clean',
 'prediction\\iron_oxide_water_1.jpg': 'Iron',
 'prediction\\iron_oxide_water_2.jpg': 'Iron',
 'prediction\\iron_oxide_water_3.jpg': 'Iron',
 'prediction\\iron_oxide_water_4.jpg': 'Iron',
 'prediction\\iron_oxide_water_5.jpg': 'Iron',
 'prediction\\lime_water_1.jpg': 'Lime',
 'prediction\\lime_water_2.jpg': 'Lime',
 'prediction\\lime_water_3.jpg': 'Lime',
 'prediction\\lim

## ROC Curve

In [133]:
from sklearn import svm, datasets
from sklearn import metrics
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_breast_cancer
import matplotlib.pyplot as plt

In [145]:
from sklearn.model_selection import train_test_split

from sklearn.metrics import roc_curve, auc
from sklearn import datasets
from sklearn.multiclass import OneVsRestClassifier
from sklearn.svm import LinearSVC
from sklearn.preprocessing import label_binarize
import matplotlib.pyplot as plt

iris = pred_dict
X, y = train_accuracy, test_accuracy

y = label_binarize(y, classes=[0,1,2])
n_classes = 3

# shuffle and split training and test sets
X_train, X_test, y_train, y_test =\
    train_test_split(X, y, test_size=0.33, random_state=0)

# classifier
clf = OneVsRestClassifier(LinearSVC(random_state=0))
y_score = clf.fit(X_train, y_train).decision_function(X_test)

# Compute ROC curve and ROC area for each class
fpr = dict()
tpr = dict()
roc_auc = dict()
for i in range(n_classes):
    fpr[i], tpr[i], _ = roc_curve(y_test[:, i], y_score[:, i])
    roc_auc[i] = auc(fpr[i], tpr[i])

# Plot of a ROC curve for a specific class
for i in range(n_classes):
    plt.figure()
    plt.plot(fpr[i], tpr[i], label='ROC curve (area = %0.2f)' % roc_auc[i])
    plt.plot([0, 1], [0, 1], 'k--')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('Receiver operating characteristic example')
    plt.legend(loc="lower right")
    plt.show()

TypeError: Singleton array array(0.94791667) cannot be considered a valid collection.

NameError: name 'X_values_full' is not defined

NameError: name 'X_train' is not defined