Getting data from google drive and unzipping

In [None]:
!apt install unzip
!mkdir birds_sml
from google.colab import drive
drive.mount('/content/drive')
!unzip /content/drive/MyDrive/225Birds.zip -d birds_sml/

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: birds_sml/train/SUPERB STARLING/068.jpg  
  inflating: birds_sml/train/SUPERB STARLING/069.jpg  
  inflating: birds_sml/train/SUPERB STARLING/070.jpg  
  inflating: birds_sml/train/SUPERB STARLING/071.jpg  
  inflating: birds_sml/train/SUPERB STARLING/072.jpg  
  inflating: birds_sml/train/SUPERB STARLING/073.jpg  
  inflating: birds_sml/train/SUPERB STARLING/074.jpg  
  inflating: birds_sml/train/SUPERB STARLING/075.jpg  
  inflating: birds_sml/train/SUPERB STARLING/076.jpg  
  inflating: birds_sml/train/SUPERB STARLING/077.jpg  
  inflating: birds_sml/train/SUPERB STARLING/078.jpg  
  inflating: birds_sml/train/SUPERB STARLING/079.jpg  
  inflating: birds_sml/train/SUPERB STARLING/080.jpg  
  inflating: birds_sml/train/SUPERB STARLING/081.jpg  
  inflating: birds_sml/train/SUPERB STARLING/082.jpg  
  inflating: birds_sml/train/SUPERB STARLING/083.jpg  
  inflating: birds_sml/train/SUPERB STARLING/084.jpg  


**Importing Packages**

In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Convolution2D, MaxPooling2D, Flatten, Dense, Dropout, GlobalAveragePooling2D
from keras.applications import VGG16

**Making Directories for training and testing data**

In [None]:
train_dir = 'birds_sml/train'

test_dir = 'birds_sml/test'

valid_dir = 'birds_sml/valid'

img_width, img_height = 224, 224  # Default input size for VGG16

**Instantiating the convolutional base**

In [None]:
conv_base = VGG16(weights='imagenet', 
                  include_top=False,
                  input_shape=(img_width, img_height, 3))

# Show architecture
conv_base.summary()

Model: "vgg16"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 224, 224, 3)]     0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 56, 56, 128)       0     

**Extract Features**

In [None]:
import os, shutil
from keras.preprocessing.image import ImageDataGenerator

datagen = ImageDataGenerator(rescale=1./255)
batch_size = 32

def extract_features(directory, sample_count):
    features = np.zeros(shape=(sample_count, 7, 7, 512))  # Must be equal to the output of the convolutional base
    labels = np.zeros(shape=(sample_count,225))
    # Preprocess data
    generator = datagen.flow_from_directory(directory,
                                            target_size=(img_width,img_height),
                                            batch_size = batch_size,
                                            class_mode='categorical')
    # Pass data through convolutional base
    i = 0
    for inputs_batch, labels_batch in generator:
        features_batch = conv_base.predict(inputs_batch)
        features[i * batch_size: (i + 1) * batch_size] = features_batch
        labels[i * batch_size: (i + 1) * batch_size] = labels_batch
        i += 1
        if i * batch_size >= sample_count:
            break
    return features, labels
    
train_features, train_labels = extract_features(train_dir, 31316)  # Agree with our small dataset size
# validation_features, validation_labels = extract_features(valid_dir, 1125)
test_features, test_labels = extract_features(test_dir, 1125)

Found 31316 images belonging to 225 classes.
Found 1125 images belonging to 225 classes.


In [None]:
print(train_features.shape)
test_features.shape

(31316, 7, 7, 512)


(1125, 7, 7, 512)

In [None]:
print(train_labels.shape)
test_labels.shape

(31316, 225)


(1125, 225)

In [None]:
Y_train = np.array([np.where(i == 1)[0][0] for i in train_labels])
Y_test = np.array([np.where(i == 1)[0][0] for i in test_labels])

In [None]:
print(Y_train.shape)
Y_test.shape

(31316,)


(1125,)

In [None]:
print(train_features.shape)
test_features.shape

(31316, 7, 7, 512)


(1125, 7, 7, 512)

In [None]:
X_train = train_features.flatten().reshape(31316 , -1)
X_test = test_features.flatten().reshape(1125 , -1)

In [None]:
print(X_train.shape)
X_test.shape

(31316, 25088)


(1125, 25088)

In [None]:
from sklearn import svm
clf = svm.LinearSVC(verbose = 1)

clf.fit(X_train , Y_train)

[LibLinear]



LinearSVC(C=1.0, class_weight=None, dual=True, fit_intercept=True,
          intercept_scaling=1, loss='squared_hinge', max_iter=1000,
          multi_class='ovr', penalty='l2', random_state=None, tol=0.0001,
          verbose=1)

In [None]:
import pickle
filename = 'svm_model.sav'
pickle.dump(clf , open(filename , 'wb'))

In [None]:
Y_train.shape
Y_test.shape

(1125,)

In [None]:
X_train[:1000 , :]

array([[0.12374729, 0.        , 0.        , ..., 0.        , 0.64049435,
        0.        ],
       [0.01492564, 0.        , 0.        , ..., 0.29106966, 1.14703727,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 1.1836108 ,
        0.        ],
       ...,
       [0.72774935, 0.        , 0.        , ..., 0.        , 1.13006175,
        0.        ],
       [0.24558073, 0.        , 0.        , ..., 0.18684551, 0.33155817,
        0.        ],
       [0.24320889, 0.        , 0.        , ..., 0.        , 0.80068254,
        0.        ]])

In [None]:
from sklearn.metrics import accuracy_score

y_pred_train = clf.predict(X_train)
y_pred_test = clf.predict(X_test)

In [None]:
print(accuracy_score(Y_train , y_pred_train))
accuracy_score(Y_test , y_pred_test)

0.9999680674415634


0.8924444444444445

In [None]:
y_pred

array([ 57,  40, 165,  47, 109,  80,  68,  65, 102,  63, 224, 191, 135,
       124,   4, 140, 130,  35, 138, 179,  79, 199, 135, 183,  90, 151,
        87, 144, 223,  67,  32,  86, 161, 136,  42,  87,  53, 109, 129,
       210,  42,  37, 134,  97,  52,  92,  25,  53,  38, 110, 206, 183,
       163, 108,  11, 213, 216,  77,  39, 101, 110,  67,   6, 194,  77,
        10,  28,  18,  60, 205, 152, 108,  75, 182, 207, 197, 104,  11,
       188,   3, 224,  34, 126,  97, 216, 168, 111,   7,  14,  51,  46,
         9, 180, 112, 189,   4, 134, 183, 141,  24,  40,  44, 166, 102,
       174, 123,  64,  98,  99, 171,  53, 138,  67, 182, 132, 160,  20,
       190, 215,  63, 118, 114,  75,  49,  51, 104,  24, 199,  89, 108,
       173,   2, 139, 151,   6,  26,  10, 194,  42,   6,  60,  68, 115,
         2,  82, 153,  42,  30, 133, 178, 158, 148,  39,  41, 124, 110,
        15, 127, 123,  50,  70,  93,   9, 151,  89,  84, 190,  15, 148,
       193, 141, 212,  31,  77,  85, 150,  90, 117, 107, 142, 18