In [1]:
import tensorflow as tf
import tensorflow.keras.layers as Layers
import tensorflow.keras.activations as Activations
import tensorflow.keras.models as Models
import tensorflow.keras.optimizers as Optimizer
import keras 
import os
import matplotlib.pyplot as plt
import cv2
import numpy as np
import time
import pandas as pd
from sklearn.utils import shuffle
from IPython.display import SVG
import seaborn as sns
from tqdm import tqdm
from sklearn.metrics import accuracy_score

In [2]:
class_names = ['mountain', 'street', 'glacier', 'buildings', 'sea', 'forest'] 
class_names_label = {class_name:i for i, class_name in enumerate(class_names)}  
nb_classes = len(class_names)  
IMAGE_SIZE = (32, 32)
IMAGE_SIZE1 = (150, 150)  
# Image sizes used are different for different algoritms.
# for NN and CNN and InceptionV3 and VGG are 150*150
# for other algoritmhms is 32*32

In [3]:
def load_data():
    datasets = ['seg_train', 'seg_test']
    output = []
   
    # Iterate through training and test sets
    for dataset in datasets:
       
        images = []
        labels = []
       
        print("Loading {}".format(dataset))
       
        # Iterate through each folder corresponding to a category
        for folder in os.listdir(dataset):
            label = class_names_label[folder]
           
            # Iterate through each image in our folder
            for file in tqdm(os.listdir(os.path.join(dataset, folder))):
               
                # Get the path name of the image
                img_path = os.path.join(os.path.join(dataset, folder), file)
               
                # Open and resize the img
                image = cv2.imread(img_path)
                image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
                image = cv2.resize(image, IMAGE_SIZE)
               
                # Append the image and its corresponding label to the output
                images.append(image)
                labels.append(label)
               
        images = np.array(images, dtype = 'float32')
        labels = np.array(labels, dtype = 'int32')  
        print(images.shape)
        output.append((images, labels))

    return output


In [4]:
def load_data1():
   
    datasets = ['seg_train', 'seg_test']
    output = []
   
    # Iterate through training and test sets
    for dataset in datasets:
       
        images = []
        labels = []
       
        print("Loading {}".format(dataset))
       
        # Iterate through each folder corresponding to a category
        for folder in os.listdir(dataset):
            label = class_names_label[folder]
           
            # Iterate through each image in our folder
            for file in tqdm(os.listdir(os.path.join(dataset, folder))):
               
                # Get the path name of the image
                img_path = os.path.join(os.path.join(dataset, folder), file)
               
                # Open and resize the img
                image = cv2.imread(img_path)
                image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
                image = cv2.resize(image, IMAGE_SIZE1)
               
                # Append the image and its corresponding label to the output
                images.append(image)
                labels.append(label)
               
        images = np.array(images, dtype = 'float32')
        labels = np.array(labels, dtype = 'int32')  
        print(images.shape)
        output.append((images, labels))

    return output


In [5]:
(train_images, train_labels), (X_test, y_test) = load_data()  
X_train, y_train = shuffle(train_images, train_labels, random_state=25)

  3%|██▌                                                                            | 71/2191 [00:00<00:02, 708.16it/s]

Loading seg_train


100%|█████████████████████████████████████████████████████████████████████████████| 2191/2191 [00:02<00:00, 769.84it/s]
100%|█████████████████████████████████████████████████████████████████████████████| 2271/2271 [00:03<00:00, 741.95it/s]
100%|█████████████████████████████████████████████████████████████████████████████| 2404/2404 [00:03<00:00, 793.41it/s]
100%|█████████████████████████████████████████████████████████████████████████████| 2512/2512 [00:03<00:00, 817.10it/s]
100%|█████████████████████████████████████████████████████████████████████████████| 2274/2274 [00:02<00:00, 822.44it/s]
100%|█████████████████████████████████████████████████████████████████████████████| 2382/2382 [00:03<00:00, 787.29it/s]
 19%|███████████████▌                                                                | 85/437 [00:00<00:00, 848.89it/s]

(14034, 32, 32, 3)
Loading seg_test


100%|███████████████████████████████████████████████████████████████████████████████| 437/437 [00:00<00:00, 805.20it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 474/474 [00:00<00:00, 770.22it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 553/553 [00:00<00:00, 836.69it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 525/525 [00:00<00:00, 806.43it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 510/510 [00:00<00:00, 823.66it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 501/501 [00:00<00:00, 781.43it/s]


(3000, 32, 32, 3)


In [6]:
(train_images1, train_labels1), (Xnn_test, ynn_test) = load_data1()  
Xnn_train, ynn_train = shuffle(train_images1, train_labels1, random_state=25)

  7%|█████                                                                        | 145/2191 [00:00<00:01, 1449.69it/s]

Loading seg_train


100%|████████████████████████████████████████████████████████████████████████████| 2191/2191 [00:01<00:00, 1448.29it/s]
100%|████████████████████████████████████████████████████████████████████████████| 2271/2271 [00:01<00:00, 1391.36it/s]
100%|████████████████████████████████████████████████████████████████████████████| 2404/2404 [00:01<00:00, 1500.18it/s]
100%|████████████████████████████████████████████████████████████████████████████| 2512/2512 [00:01<00:00, 1540.69it/s]
100%|████████████████████████████████████████████████████████████████████████████| 2274/2274 [00:01<00:00, 1542.36it/s]
100%|████████████████████████████████████████████████████████████████████████████| 2382/2382 [00:01<00:00, 1468.91it/s]
 12%|█████████▌                                                                      | 52/437 [00:00<00:00, 515.96it/s]

(14034, 150, 150, 3)
Loading seg_test


100%|███████████████████████████████████████████████████████████████████████████████| 437/437 [00:00<00:00, 723.47it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 474/474 [00:00<00:00, 733.04it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 553/553 [00:00<00:00, 805.74it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 525/525 [00:00<00:00, 790.79it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 510/510 [00:00<00:00, 806.78it/s]
100%|███████████████████████████████████████████████████████████████████████████████| 501/501 [00:00<00:00, 776.12it/s]


(3000, 150, 150, 3)


In [7]:
# scaling is done by dividing each value by 255 and each value will be in range 0 and 1
# for CNN NN InceptionV3 and VGG input are 4D arrays
# for others are 2D array

Xnn_train = Xnn_train/255
Xnn_test = Xnn_test/255
print(Xnn_train.shape)

(14034, 150, 150, 3)


In [8]:
X_train = X_train.reshape(X_train.shape[0],32*32*3) 
X_test = X_test.reshape(X_test.shape[0],32*32*3) 
X_train = X_train/255; 
X_test = X_test/255;


In [12]:
# Code for logistic regression
from sklearn.linear_model import LogisticRegression
tic = time.time()
log_reg = LogisticRegression(solver='lbfgs',multi_class='multinomial',max_iter = 10000)
log_reg.fit(X_train,y_train)
predlr = log_reg.predict(X_test)
print(accuracy_score(predlr,y_test))
toc = time.time()
LRtime = toc-tic
print(LRtime)

0.486
613.0998592376709


In [13]:
# code for naive bayes with linear 
from sklearn.naive_bayes import MultinomialNB
tic = time.time()
NB = MultinomialNB()
NB.fit(X_train,y_train)
prednb = NB.predict(X_test)
print(accuracy_score(prednb,y_test))
toc = time.time()
NBtime = toc-tic
print(NBtime)

0.43633333333333335
0.5185930728912354


In [14]:
# code for naive bayes with gaussian
from sklearn.naive_bayes import GaussianNB
tic = time.time()
GNB = GaussianNB()
GNB.fit(X_train,y_train)
predgnb = GNB.predict(X_test)
print(accuracy_score(predgnb,y_test))
toc = time.time()
NBtime = toc-tic
print(NBtime)

0.467
1.1257996559143066


In [15]:
from sklearn.tree import DecisionTreeClassifier
tic = time.time()
dt = DecisionTreeClassifier()
dt.fit(X_train,y_train)
preddt = dt.predict(X_test)
print(accuracy_score(preddt,y_test))
toc = time.time()
KNNtime = toc-tic
print(KNNtime)

0.414
44.41193699836731


In [16]:
from sklearn.neighbors import KNeighborsClassifier
tic = time.time()
knn = KNeighborsClassifier(n_neighbors = 6)
knn.fit(X_train,y_train)
predknn = knn.predict(X_test)
print(accuracy_score(predknn,y_test))
toc = time.time()
KNNtime = toc-tic
print(KNNtime)

0.4036666666666667
6.677317142486572


In [21]:
from tensorflow.keras.models import Sequential 
tic = time.time()
model2 =Sequential([
    tf.keras.layers.Flatten( input_shape = (150, 150, 3)),
    tf.keras.layers.Dense(64*3, activation= 'relu' ),
    tf.keras.layers.Dense(64, activation= 'relu' ),
    tf.keras.layers.Dense(32, activation= 'relu' ),
    tf.keras.layers.Dense(6, activation= 'softmax')
    
])

model2.compile(optimizer = 'adam', loss = 'sparse_categorical_crossentropy', metrics=['accuracy'])

history2 = model2.fit(Xnn_train, ynn_train, batch_size=128, epochs=16, validation_split = 0.2)

toc = time.time()
NNtime = toc-tic
print(NNtime)


Epoch 1/16
Epoch 2/16
Epoch 3/16
Epoch 4/16
Epoch 5/16
Epoch 6/16
Epoch 7/16
Epoch 8/16
Epoch 9/16
Epoch 10/16
Epoch 11/16
Epoch 12/16
Epoch 13/16
Epoch 14/16
Epoch 15/16
Epoch 16/16
77.1002197265625


In [22]:
model2.evaluate(Xnn_test,ynn_test)




[1.4892703294754028, 0.4633333384990692]

In [26]:
tic = time.time()
model = tf.keras.Sequential([
    tf.keras.layers.Conv2D(64, (3, 3), activation = 'relu', input_shape = (150, 150, 3)), 
    tf.keras.layers.MaxPooling2D(2,2),
    tf.keras.layers.Conv2D(64, (3, 3), activation = 'relu'),
    tf.keras.layers.MaxPooling2D(2,2),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128, activation=tf.nn.relu),
    tf.keras.layers.Dense(6, activation=tf.nn.softmax)
])
model.compile(optimizer = 'adam', loss = 'sparse_categorical_crossentropy', metrics=['accuracy'])
history = model.fit(Xnn_train, ynn_train, batch_size=64, epochs=10, validation_split = 0.2)
toc = time.time()
CNNtime = toc-tic
print(CNNtime)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
166.17591190338135


In [27]:
model.evaluate(Xnn_test,ynn_test)



[1.2042089700698853, 0.7749999761581421]

In [9]:
tic = time.time()
import xgboost as xgb
xg_cl = xgb.XGBClassifier(objective='multi:softprob',
n_estimators=10, seed=123)
xg_cl.fit(X_train, y_train)
preds = xg_cl.predict(X_test)
accuracy = float(np.sum(preds==y_test))/y_test.shape[0]
print(accuracy)
toc = time.time()
XGBtime = toc-tic
print(XGBtime)



0.5896666666666667
18.951029777526855


In [11]:
from sklearn.ensemble import RandomForestClassifier
tic = time.time()
clf=RandomForestClassifier(n_estimators=100)
clf.fit(X_train, y_train)  
y_pred=clf.predict(X_test)
from sklearn import metrics  
print("Accuracy:",metrics.accuracy_score(y_test, y_pred))
toc = time.time()
RFtime = toc-tic
print(RFtime)

Accuracy: 0.594
52.458067655563354


In [12]:
from sklearn.decomposition import PCA
tic = time.time()
pca = PCA(n_components=500)
pca.fit_transform(X_train)
print(pca.explained_variance_ratio_.sum())

0.9283348


In [13]:
from sklearn.svm import SVC
svm = SVC()
svm.fit(X_train,y_train)
predsvm = svm.predict(X_test)
toc = time.time()
svmtime = toc-tic
print(svmtime)


497.737286567688


In [14]:
print("Accuracy:",metrics.accuracy_score(y_test, predsvm))


Accuracy: 0.6386666666666667
