In [None]:
%tensorflow_version 2.x
!pip install tensorflow-gpu



In [1]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [2]:
import scipy.misc
import cv2
from imutils import paths
from tqdm import tqdm
import numpy as np
from glob import glob
import matplotlib.pyplot as plt
%matplotlib inline
import os
import random
import tensorflow as tf
import datetime
from glob import glob
import IPython.display as display
from IPython.display import clear_output
import math
import time
from google.colab import auth
from oauth2client.client import GoogleCredentials
import csv
from tensorflow.keras.layers import *
import warnings
warnings.filterwarnings('ignore')
AUTOTUNE = tf.data.experimental.AUTOTUNE
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
print(f'Tensorflow ver. {tf.__version__}')

Tensorflow ver. 2.4.1


In [3]:
#delete files from trash in drive
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
my_drive = GoogleDrive(gauth)

def delTrash() :
    for file in my_drive.ListFile({'q': "trashed = true"}).GetList():
        #print(f'the file "{a_file['title']}", is about to get deleted permanently.')
        file.Delete()

In [4]:
SEED = random.choice([32, 50, 100])
os.chdir("drive/My Drive/segUnet3")
dataset_path = 'idd20k_lite/'
img_val = dataset_path + 'leftImg8bit/val/'
seg_val = dataset_path + 'gtFine/val/'
img_train = dataset_path + 'leftImg8bit/train/'
seg_train = dataset_path + 'gtFine/train/'
(IMG_HEIGHT,IMG_WIDTH) = (128,256)
N_CHANNELS = 3
N_CLASSES = 8 
TRAIN_SIZE= len(glob(img_train+'*/*_image.jpg'))
VAL_SIZE = len(glob(img_val+'*/*_image.jpg'))
BATCH_SIZE = 32
BUFFER_SIZE = 1500

In [5]:
# Load Dataset from folder here
def parse_image(img_path):
    image = tf.io.read_file(img_path)
    image = tf.image.decode_jpeg(image, channels=3)
    mask_path = tf.strings.regex_replace(img_path, "leftImg8bit", "gtFine")
    mask_path = tf.strings.regex_replace(mask_path, "_image.jpg", "_label.png")
    mask = tf.io.read_file(mask_path)
    mask = tf.image.decode_png(mask, channels=1)
    mask = tf.where(mask==255, np.dtype('uint8').type(7), mask)
    return {'image': image, 'mask': mask}
def load_train(datapoint):
    input_image = tf.image.resize(datapoint['image'], (IMG_HEIGHT, IMG_WIDTH))
    input_mask = tf.image.resize(datapoint['mask'], (IMG_HEIGHT, IMG_WIDTH))
    if tf.random.uniform(()) > 0.5:
        input_image = tf.image.flip_left_right(input_image)
        input_mask = tf.image.flip_left_right(input_mask)
    input_image = tf.cast(input_image, tf.float32) / 255.0
    return input_image, input_mask
def load_test(datapoint):
    input_image = tf.image.resize(datapoint['image'], (IMG_HEIGHT, IMG_WIDTH))
    input_mask = tf.image.resize(datapoint['mask'], (IMG_HEIGHT, IMG_WIDTH))
    input_image = tf.cast(input_image, tf.float32) / 255.0
    return input_image, input_mask

train_dataset_list = tf.data.Dataset.list_files(img_train+'*/*_image.jpg', seed=SEED)
train_dataset = train_dataset_list.map(parse_image)
val_dataset_list = tf.data.Dataset.list_files(img_val+'*/*_image.jpg', seed=SEED)
val_dataset = val_dataset_list.map(parse_image)
dataset = {"train": train_dataset, "val": val_dataset}
def prepareDataset(dataset) :
    dataset['train'] = dataset['train'].map(load_train, num_parallel_calls=tf.data.experimental.AUTOTUNE)
    dataset['train'] = dataset['train'].shuffle(buffer_size=BUFFER_SIZE, seed=SEED)
    dataset['train'] = dataset['train'].repeat()
    dataset['train'] = dataset['train'].batch(BATCH_SIZE)
    dataset['train'] = dataset['train'].prefetch(buffer_size=AUTOTUNE)
    dataset['val'] = dataset['val'].map(load_test)
    dataset['val'] = dataset['val'].repeat()
    dataset['val'] = dataset['val'].batch(BATCH_SIZE)
    dataset['val'] = dataset['val'].prefetch(buffer_size=AUTOTUNE)
prepareDataset(dataset)

In [63]:
class Downsampling(tf.keras.Model):
    def model(self):
        if self.layer == 1:
            x = Input(shape=(128,128,3))
            return tf.keras.Model(inputs=[x], outputs=self.call(x))
        elif self.layer == 2:
            x = Input(shape=(128,128,64))
            return tf.keras.Model(inputs=[x], outputs=self.call(x))
        elif self.layer == 3:
            x = Input(shape=(64,64,128))
            return tf.keras.Model(inputs=[x], outputs=self.call(x))
        elif self.layer == 4:
            x = Input(shape=(32,32, 256))
            return tf.keras.Model(inputs=[x], outputs=self.call(x))
        elif self.layer == 5:
            x = Input(shape=(16, 16, 512))
            return tf.keras.Model(inputs=[x], outputs=self.call(x))

    def __init__(self, filter_size, layer, initializer= None):
        super(Downsampling, self).__init__()
        self.layer = layer
        self.filter_size = filter_size
        self.initializer = initializer
        self.conv1 = Convolution2D(self.filter_size, 3, activation = 'relu', padding='same', kernel_initializer=self.initializer)
        self.conv2 = Convolution2D(self.filter_size, 3, activation = 'relu', padding='same', kernel_initializer=self.initializer)

    def call(self,inputs):
        if(self.layer==1):
            x = self.conv1(inputs)
            x = self.conv2(x)
            return x
        x = MaxPooling2D(pool_size=(2,2))(inputs)
        x = self.conv1(x)
        x = self.conv2(x)
        return x

class Upsampling(tf.keras.Model):
    def model(self):
        if self.layer == 1:
            x = Input(shape=(8, 8, 1024))
            return tf.keras.Model(inputs=[x], outputs=self.call(x))
        elif self.layer == 2:
            x = Input(shape=(16, 16, 512))
            return tf.keras.Model(inputs=[x], outputs=self.call(x))
        elif self.layer == 3:
            x = Input(shape=(32, 32, 256))
            return tf.keras.Model(inputs=[x], outputs=self.call(x))
        elif self.layer == 4:
            x = Input(shape=(64, 64, 128))
            return tf.keras.Model(inputs=[x], outputs=self.call(x))
        elif self.layer == 5:
            x = Input(shape=(128, 128, 2))
            return tf.keras.Model(inputs=[x], outputs=self.call(x))
    def __init__(self, filter_size, layer, initializer= None):
        super(Upsampling, self).__init__()
        self.filter_size = filter_size
        self.layer = layer
        self.initializer = initializer
        self.conv1 = Convolution2D(self.filter_size, 2, activation = 'relu', padding='same', kernel_initializer=self.initializer)
        self.conv2 = Convolution2D(self.filter_size, 3, activation = 'relu', padding='same', kernel_initializer=self.initializer)
        self.conv3 = Convolution2D(self.filter_size, 3, activation = 'relu', padding='same', kernel_initializer=self.initializer)
        self.conv4 = Convolution2D(2, 3, activation = 'relu', padding = 'same', kernel_initializer=self.initializer)
    
    def call(self,inputs,upsampling_layer):
        x = self.conv1(UpSampling2D(size=(2,2))(inputs))
        merged = concatenate([upsampling_layer,x], axis=3)
        x = self.conv2(merged)
        x = self.conv3(x)
        if(self.layer==4):
            x = self.conv4(x)
        return x

class segUnetModel(tf.keras.Model):
    def model(self, all = 0):
        if all == 0:
            x = Input(shape=(128,128,3))
            return tf.keras.Model(inputs=[x], outputs=self.call(x))
        else:
            print(self.layer1.model())
            print(self.layer2.model())
            print(self.layer3.model())
            print(self.layer4.model())
            print(self.layer5.model())
            print(self.layer6.model())
            print(self.layer7.model())
            print(self.layer8.model())
            print(self.layer9.model())

    def __init__(self, output_channels, initializer = None):
        super(segUnetModel, self).__init__()
        self.output_channels = output_channels
        self.layer1 = Downsampling(64,1)
        self.layer2 = Downsampling(128,2)
        self.layer3 = Downsampling(256,3)
        self.layer4 = Downsampling(512,4)
        self.layer5 = Downsampling(1024,5)
        self.layer6 = Upsampling(512,1)
        self.layer7 = Upsampling(256,2)
        self.layer8 = Upsampling(128,3)
        self.layer9 = Upsampling(64,4)
        self.output_layer = Convolution2D(self.output_channels, 1, activation='softmax')
    def call(self,inputs):
        conv1 = self.layer1(inputs)
        conv2 = self.layer2(conv1)
        conv3 = self.layer3(conv2)
        conv4 = self.layer4(conv3)
        conv5 = self.layer5(conv4)
        convtr1 = self.layer6(conv5,conv4)
        convtr2 = self.layer7(convtr1,conv3)
        convtr3 = self.layer8(convtr2,conv2)
        convtr4 = self.layer9(convtr3,conv1)
        output = self.output_layer(convtr4)
        return output

model = segUnetModel(N_CLASSES)
model.model().summary()

Model: "model_16"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_20 (InputLayer)           [(None, 128, 128, 3) 0                                            
__________________________________________________________________________________________________
downsampling_230 (Downsampling) (None, 128, 128, 64) 38720       input_20[0][0]                   
__________________________________________________________________________________________________
downsampling_231 (Downsampling) (None, 64, 64, 128)  221440      downsampling_230[0][0]           
__________________________________________________________________________________________________
downsampling_232 (Downsampling) (None, 32, 32, 256)  885248      downsampling_231[0][0]           
___________________________________________________________________________________________

"\nclass Downsampling(tf.keras.Model):\n    def __init__(self, filter_size, layer, initializer= None):\n        super(Downsampling, self).__init__()\n        self.layer = layer\n        self.filter_size = filter_size\n        self.initializer = initializer\n\n    def call(self,inputs):\n        if(self.layer==1):\n            x = Convolution2D(self.filter_size, 3, activation = 'relu', padding='same', kernel_initializer=self.initializer)(inputs)\n            x = Convolution2D(self.filter_size, 3, activation = 'relu', padding='same', kernel_initializer=self.initializer)(x)\n            return x\n        x =  MaxPooling2D(pool_size=(2,2))(inputs)\n        x = Convolution2D(self.filter_size, 3, activation = 'relu', padding='same', kernel_initializer=self.initializer)(x)\n        x = Convolution2D(self.filter_size, 3, activation = 'relu', padding='same', kernel_initializer=self.initializer)(x)\n        return x\n    def model(self):\n        if self.layer == 1:\n            x = Input(shape=

In [64]:
# define loss and other metrics
loss_func = tf.keras.losses.SparseCategoricalCrossentropy()
optimizer = tf.keras.optimizers.Adam()
ckpt = tf.train.Checkpoint(step=tf.Variable(1), optimizer=optimizer, net=model)
manager = tf.train.CheckpointManager(ckpt, 'tf_ckpts/', max_to_keep=3)
train_loss = tf.keras.metrics.Mean('train_loss', dtype=tf.float32)
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy('train_accuracy')
test_loss = tf.keras.metrics.Mean('test_loss', dtype=tf.float32)
test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy('test_accuracy')

# function to calculate loss every step and checkpoint
@tf.function
def train_step(model, optimizer, x_train, y_train):
    with tf.GradientTape() as tape:
        predictions = model(x_train, training=True)
        loss = loss_func(y_train, predictions)
    grads = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(grads, model.trainable_variables))
    train_loss(loss)
    train_accuracy(y_train, predictions)

@tf.function
def test_step(model, x_test, y_test):
    predictions = model(x_test)
    loss = loss_func(y_test, predictions)
    test_loss(loss)
    test_accuracy(y_test, predictions)
    return predictions

def train_and_checkpoint(model, manager, dataset, epoch):
    ckpt.restore(manager.latest_checkpoint)
    if manager.latest_checkpoint:
        print("Restored from {}".format(manager.latest_checkpoint))
    else:
        print("Initializing from scratch.")
    for (x_train, y_train) in dataset['train'].take(math.ceil(TRAIN_SIZE/BATCH_SIZE)):
        train_step(model, optimizer, x_train, y_train)
        return
    ckpt.step.assign_add(1)
    save_path = manager.save()
    print("Saved checkpoint for epoch {}: {}".format(epoch, save_path))
    

train_log_dir = 'logs/gradient_tape/train'
test_log_dir = 'logs/gradient_tape/test'
train_summary_writer = tf.summary.create_file_writer(train_log_dir)
test_summary_writer = tf.summary.create_file_writer(test_log_dir)

In [None]:
# function to train the model
# give input file name where model accuracy will be stored every epoch
highest_accuracy = 0
noEpochs = 100
startEpoch = 0
fields = ['Epoch', 'Loss', 'Acc', 'Val Loss', 'Val Acc'] 
filename = input("Enter File Name without type : ") + ".txt"
paramsFile = open(filename, "w+")
paramsFile.write(','.join(fields))
paramsFile.write("\n")
paramsFile.close()
for epoch in range(startEpoch, startEpoch + noEpochs):

    print("Epoch ",epoch+1)
    delTrash()
    start = time.time()
    
    train_and_checkpoint(model, manager, dataset, epoch+1)
    break
    with train_summary_writer.as_default():
        tf.summary.scalar('loss', train_loss.result(), step=ckpt.step.numpy())
        tf.summary.scalar('accuracy', train_accuracy.result(), step=ckpt.step.numpy())
    
    for (x_test, y_test) in dataset['val'].take(math.ceil(VAL_SIZE/BATCH_SIZE)):
        pred = test_step(model, x_test, y_test)
    
    with test_summary_writer.as_default():
        tf.summary.scalar('loss', test_loss.result(), step=ckpt.step.numpy())
        tf.summary.scalar('accuracy', test_accuracy.result(), step=ckpt.step.numpy())
    
    #print("Time taken ",time.time()-start)
    
    template = 'Epoch {}, Loss: {:.3f}, Accuracy: {:.3f}, Val Loss: {:.3f}, Val Accuracy: {:.3f}'
    otemp = '{}, {:.3f}, {:.3f}, {:.3f}, {:.3f}'
    print (template.format(epoch+1,
                            train_loss.result(), 
                            train_accuracy.result()*100,
                            test_loss.result(), 
                            test_accuracy.result()*100))
    text = otemp.format(epoch+1,
                            train_loss.result(), 
                            train_accuracy.result()*100,
                            test_loss.result(), 
                            test_accuracy.result()*100)
    paramsFile = open(filename, "a")
    paramsFile.write(text+"\n") 
    paramsFile.close()
    if(test_accuracy.result().numpy()*100>highest_accuracy):
        print("Validation accuracy increased from {:.3f} to {:.3f}. Saving model weights.".format(highest_accuracy,test_accuracy.result().numpy()*100))
        highest_accuracy = test_accuracy.result().numpy()*100
        model.save_weights('unet_weights-epoch-{}.hdf5'.format(epoch+1))

    print('_'*80)
    
    # Reset metrics after every epoch
    train_loss.reset_states()
    test_loss.reset_states()
    train_accuracy.reset_states()
    test_accuracy.reset_states()

In [66]:
model = segUnetModel(8)
model.build(input_shape=(None,128,128,3))
model.summary()
model.load_weights('Model3/unet_weights-epoch-87.hdf5')

Model: "seg_unet_model_48"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
downsampling_240 (Downsampli multiple                  38720     
_________________________________________________________________
downsampling_241 (Downsampli multiple                  221440    
_________________________________________________________________
downsampling_242 (Downsampli multiple                  885248    
_________________________________________________________________
downsampling_243 (Downsampli multiple                  3539968   
_________________________________________________________________
downsampling_244 (Downsampli multiple                  14157824  
_________________________________________________________________
upsampling_192 (Upsampling)  multiple                  9176576   
_________________________________________________________________
upsampling_193 (Upsampling)  multiple            

In [None]:
#mask images
import cv2
import imutils

cap = cv2.VideoCapture(input("Enter file name"))
ret, frame = cap.read()
frameNo = 1

outFileName = input("enter name.avi")
out = cv2.VideoWriter(outFileName,cv2.VideoWriter_fourcc('M','J','P','G'), 25, (IMG_WIDTH, IMG_HEIGHT))
name = outFileName.split(".")[0]
outArrow = cv2.VideoWriter(name+"_arrow.avi",cv2.VideoWriter_fourcc('M','J','P','G'), 25, (IMG_WIDTH, IMG_HEIGHT))

leftCrop = 150
rightCrop = 350
rotateFrame = 270
print("Rotating frame by ", rotateFrame, " degrees")
print("Cropping frame from 0 to ", leftCrop, " and ", rightCrop, " to max y value")

ignoreHeightPixel = 10
ignoreWidthPixel = 30
print("Ignoring", ignoreHeightPixel ," pixels from top and bottom")
print("Ignoring", ignoreWidthPixel ," pixels from left and right")

while(1):
    ret, frame = cap.read()
    if ret == False :
        print("Done")
        break


    frame = imutils.rotate(frame, rotateFrame)
    frame = frame[:,leftCrop:]
    frame = frame[:, :rightCrop]
    
    origFrame = cv2.resize(frame, dsize=(IMG_WIDTH, IMG_HEIGHT), interpolation=cv2.INTER_CUBIC)
  
    origFrameCopy = origFrame.copy()
  
  
    frame = tf.image.convert_image_dtype(frame, tf.uint8)
    frame = tf.image.resize(frame, (IMG_HEIGHT, IMG_WIDTH))
    frame = tf.expand_dims(frame, 0)
    frame = tf.cast(frame, tf.float32) / 255.0
 
    prediction = model(frame)
    prediction = tf.argmax(prediction, axis=-1)
    prediction = tf.squeeze(prediction, axis = 0)
    prediction = tf.expand_dims(prediction, axis=-1)
    prediction = np.array(prediction)
    prediction[prediction == 7] = 255
    # get the mask and coord
    xTot = 0
    yTot = 0
    Tot = 0
 
    for i in range(origFrame.shape[0]) :
        for j in range(origFrame.shape[1]) :
            if prediction[i][j][0] == 0 or prediction[i][j][0] == 1 or prediction[i][j][0] == 2   :
                origFrameCopy[i][j] = [0, 150 ,0]
                if (i > ignoreHeightPixel and i < int(IMG_HEIGHT - ignoreHeightPixel)) and (j > ignoreWidthPixel and j <= int(IMG_WIDTH - ignoreWidthPixel)) :
                    xTot += j
                    yTot += i
                    Tot += 1

    alpha = 0.3
    cv2.addWeighted(origFrameCopy, alpha, origFrame, 1 - alpha, 0, origFrame)
    startpoint = (int(IMG_WIDTH/2), IMG_HEIGHT)
    endpoint = (int(xTot/Tot), int(yTot/Tot))
    out.write(origFrame)
    outArrow.write(cv2.arrowedLine(origFrame, startpoint, endpoint, (255, 0, 0), 9, tipLength = 0.5))
    frameNo += 1

cap.release()
out.release()
outArrow.release()