In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import keras
import skimage
import tensorflow
from keras.utils.np_utils import to_categorical
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPool2D, AvgPool2D, BatchNormalization, Reshape
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import LearningRateScheduler
from keras.utils.vis_utils import plot_model
import matplotlib.pyplot as plt
import skimage.morphology as morp
from skimage.filters import rank

%matplotlib inline



  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [8]:
# Load all images. 

import os
import skimage.data
def load_data(data_dir):
    images = []
    
    file_names = sorted([os.path.join(data_dir, f)
                  for f in os.listdir(data_dir) if f.endswith(".jpg")])
    
    for f in file_names:
        images.append(skimage.data.imread(f, as_gray=True))
    
    return images

images = load_data('D:/mapmyindia-master/mapmyindia-master/data/train/cropped')
print(len(images))

467


In [9]:
#explore shapes
for image in images[:3]:
    print('image.shape[before]', image.shape)

image.shape[before] (24, 17)
image.shape[before] (60, 45)
image.shape[before] (148, 95)


In [10]:
#transform all images to 250x250

image_size = 64
import skimage.transform
images250 = [skimage.transform.resize(image, (image_size, image_size)) for image in images]

  warn("The default mode, 'constant', will be changed to 'reflect' in "
  warn("Anti-aliasing will be enabled by default in skimage 0.15 to "


In [11]:
#confirm transformation
for image in images250[:3]:
    print('image.shape[after]', image.shape)
    

image.shape[after] (64, 64)
image.shape[after] (64, 64)
image.shape[after] (64, 64)


In [12]:
from PIL import Image

for image in images250[:3]:
    image = (image * 255).astype(np.uint8)
    im = Image.fromarray(image)
    plt.figure()
    plt.imshow(im)
    plt.show()


AttributeError: 'numpy.ndarray' object has no attribute 'mask'

<matplotlib.figure.Figure at 0x268bce15668>

AttributeError: 'numpy.ndarray' object has no attribute 'mask'

<matplotlib.figure.Figure at 0x268bce9bc88>

AttributeError: 'numpy.ndarray' object has no attribute 'mask'

<matplotlib.figure.Figure at 0x268be00c2b0>

In [13]:
#Load Labels 
data = pd.read_csv("D:/mapmyindia-master/mapmyindia-master/data/train.csv")
data = data.sort_values("Img_Name").reset_index(drop=True)
labels = data["Label"]

labels.head()

0    Speed Limit 60
1    Speed Limit 40
2    Speed Limit 20
3    Speed Limit 40
4    Speed Limit 50
Name: Label, dtype: object

In [14]:
#categorize labels..

import numpy as np
from keras.utils.np_utils import to_categorical
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

# y_train_labels = np.array(labels)
# X_train_data = np.array(images250)
y = np.array(labels)
X = np.array(images250)
X = X[:, :, :, np.newaxis] # (250,250) --> (467,250,250,1)

num_categories = 6

#encode labels
label_encoder = LabelEncoder()
y_labels_encoded = label_encoder.fit_transform(y)
y_labels_categorized = to_categorical(y_labels_encoded, num_categories)

print('len(X)', len(X))
print('len(y)', len(y))
print('len(y_labels_categorized)', len(y_labels_categorized))

len(X) 467
len(y) 467
len(y_labels_categorized) 467


In [15]:
X_train, X_test, y_train, y_test = train_test_split(X, y_labels_categorized, test_size=0.2, random_state=42)

In [22]:
# OLD MODEL

# input_shape = (250, 250, 1)

# model = Sequential()
# model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=input_shape, data_format='channels_last'))
# # model.add(Conv2D(32, (3, 3), activation='relu'))
# # model.add(Conv2D(32, (3, 3), activation='relu'))
# model.add(MaxPool2D(pool_size=(2, 2)))

# # model.add(Conv2D(64, (3, 3), activation='relu'))
# # model.add(MaxPool2D(pool_size=(2, 2)))
# # model.add(Dropout(0.25))
# model.add(Flatten())

# model.add(Dense(256, activation='relu'))
# model.add(Dropout(0.5))
# model.add(Dense(num_categories, activation='softmax'))

# model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=['accuracy'])

# model.summary()

In [16]:
# MODEL #1 -- Basic Non-convolutional

model = Sequential()
model.add(Flatten(input_shape=(image_size,image_size,1)))
model.add(Dense(units=image_size,activation="relu"))
model.add(Dense(units=image_size,activation="relu"))
model.add(Dense(units=6,activation="softmax"))
model.compile(optimizer="adam",loss="categorical_crossentropy",metrics=["accuracy"])

In [20]:
model.fit(X_train, y_train, epochs=5, batch_size=32)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x268bdff9d68>

In [21]:
accuracy = model.evaluate(x=X_test,y=y_test,batch_size=32)
accuracy



[1.6743259455295318, 0.36170212829366644]

In [22]:
# MODEL #2 -- Convolutional test 

total_epochs_trained = 0
model = Sequential()
model.add(Conv2D(32, (5, 5), input_shape=(image_size,image_size,1), activation='relu'))
model.add(MaxPool2D(pool_size=(2, 2)))
model.add(Dropout(0.2))

model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPool2D(pool_size=(2, 2)))
model.add(Dropout(0.2))

# model.add(Conv2D(64, (3, 3), activation='relu'))
# model.add(MaxPool2D(pool_size=(2, 2)))
# model.add(Dropout(0.2))

model.add(Flatten())
model.add(Dense(32, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(6, activation='softmax'))

# from keras.optimizers import SGD
# opt = SGD(lr=0.05)

model.compile(loss='categorical_crossentropy',
              optimizer='rmsprop', #opt
              metrics=['accuracy']) 

# NOTE: If loss=categorical_crossentropy, 
# then 'accuracy' metrics == categorical_accruacy

model.summary()


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 60, 60, 32)        832       
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 30, 30, 32)        0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 30, 30, 32)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 28, 28, 64)        18496     
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 14, 14, 64)        0         
_________________________________________________________________
dropout_2 (Dropout)          (None, 14, 14, 64)        0         
_________________________________________________________________
flatten_2 (Flatten)          (None, 12544)             0         
__________

In [110]:
# data_gen = ImageDataGenerator(
#     featurewise_center=True,
#     featurewise_std_normalization=True,
#     rotation_range=20,
#     zoom_range=0.2,
#     width_shift_range=0.1,
#     height_shift_range=0.1,
#     horizontal_flip=False,
#     fill_mode="nearest",
#     channel_shift_range=0.2,
# )


# # compute quantities required for featurewise normalization
# # (std, mean, and principal components if ZCA whitening is applied)
# data_gen.fit(X_train)

# # fits the model on batches with real-time data augmentation:
# # model.fit_generator(train_gen.flow(X_train, y_train, batch_size=32),
# #                     steps_per_epoch=len(X_train) / 32, epochs=20)

# import os
# os.makedirs('../data/image_data_gen')

# i = 0
# for X_batch, y_batch in data_gen.flow(X_train, y_train, batch_size=5, save_to_dir='../data/image_data_gen', save_prefix='aug', save_format='jpg'):
#     i += 1
#     if i == 500:
#         break

# print("done.")

In [23]:
train_gen = ImageDataGenerator(
    featurewise_center=True,
    featurewise_std_normalization=True,
    rotation_range=20,
    zoom_range=0.1,
    width_shift_range=0.05,
    height_shift_range=0.05,
    horizontal_flip=False,
    fill_mode="nearest",
    channel_shift_range=0.2,
)

validation_gen = ImageDataGenerator(
    featurewise_center=False
#     featurewise_std_normalization=True
)


# compute quantities required for featurewise normalization
# (std, mean, and principal components if ZCA whitening is applied)
train_gen.fit(X_train)
validation_gen.fit(X_train)

In [24]:
epochs_per_round = 5
epoch_acc = []
print("ACCURACY AFTER X EPOCHS:\n")
for i in range(10):
    # fits the model on batches with real-time data augmentation:
    model.fit_generator(
        train_gen.flow(X_train, y_train, batch_size=32),
#         validation_data=validation_gen.flow(X_test, y_test, batch_size=32),
#         validation_steps=800 // 32,
        steps_per_epoch=len(X_train) / 32, 
        epochs=epochs_per_round,
        verbose=0
    )
    
    total_epochs_trained += epochs_per_round

    acc = model.evaluate(x=X_test,y=y_test,batch_size=32, verbose=0)
    print("EPOCH #", total_epochs_trained, "(loss, acc):", (round(acc[0],3), round(acc[1],3)))
    epoch_acc.append((acc, total_epochs_trained))
    
    
print(epoch_acc)


ACCURACY AFTER X EPOCHS:

EPOCH # 5 (loss, acc): (1.784, 0.287)
EPOCH # 10 (loss, acc): (1.751, 0.33)
EPOCH # 15 (loss, acc): (1.708, 0.351)
EPOCH # 20 (loss, acc): (1.671, 0.298)
EPOCH # 25 (loss, acc): (1.605, 0.383)
EPOCH # 30 (loss, acc): (1.452, 0.489)
EPOCH # 35 (loss, acc): (1.37, 0.628)
EPOCH # 40 (loss, acc): (1.354, 0.606)
EPOCH # 45 (loss, acc): (1.263, 0.649)
EPOCH # 50 (loss, acc): (1.271, 0.681)
[([1.7840509363945494, 0.2872340438213754], 5), ([1.750868523374517, 0.32978723467664517], 10), ([1.708340862964062, 0.351063830421326], 15), ([1.6708382799270305, 0.2978723423278078], 20), ([1.6052105452152008, 0.38297872467243926], 25), ([1.4521786836867636, 0.48936170212765956], 30), ([1.3699695277721324, 0.6276595731999012], 35), ([1.354158858035473, 0.6063829761870364], 40), ([1.2633168798811891, 0.6489361664082142], 45), ([1.2711678794089785, 0.6808510701707069], 50)]


In [None]:
# model.fit(X_train, y_train, epochs=10, batch_size=32)

In [25]:
accuracy = model.evaluate(x=X_test,y=y_test,batch_size=32)
accuracy



[1.2711678794089785, 0.6808510701707069]

In [26]:
predictions = model.predict(X_test)
print('First prediction:', predictions[0])

predict_max = [max(class_probs) for class_probs in predictions]
print('Average predicted class probability:', np.mean(predict_max))

correct = [1 if np.argmax(predictions[i])==np.argmax(y_test[i]) else 0 for i in range(len(y_test))]
print("%-correct: ", np.mean(correct))

predictions[0:5]


First prediction: [0.13204116 0.18447365 0.21076325 0.12277453 0.16241872 0.18752868]
Average predicted class probability: 0.33284354
%-correct:  0.6808510638297872


array([[0.13204116, 0.18447365, 0.21076325, 0.12277453, 0.16241872,
        0.18752868],
       [0.19430637, 0.19450314, 0.1788002 , 0.13527177, 0.12290107,
        0.17421737],
       [0.23235771, 0.1961845 , 0.1796511 , 0.13892211, 0.09229092,
        0.16059363],
       [0.09742282, 0.17235568, 0.46232408, 0.04976016, 0.06843856,
        0.14969862],
       [0.5083174 , 0.22091702, 0.04359073, 0.12551446, 0.01459213,
        0.08706822]], dtype=float32)

In [None]:
predictions.savetxt("outputfinal.csv", a, delimiter=",")