In [None]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import keras
import tensorflow as tf
import tensorflow_addons as tfa
from torchvision import datasets
from torchvision import transforms
from torch.utils.data.sampler import SubsetRandomSampler
from keras.applications.vgg16 import VGG16
from keras.applications.vgg16 import preprocess_input
from keras.models import Model, Sequential, load_model
from tensorflow.keras.preprocessing.image import img_to_array, load_img
from keras.optimizers import Adam, Adamax, SGD
from keras.layers import Dense, Conv2D, MaxPool2D , Flatten , Input
from keras.losses import categorical_crossentropy
from keras.callbacks import ModelCheckpoint, EarlyStopping
from keras import metrics
from sklearn.utils import class_weight
from sklearn.model_selection import train_test_split
from collections import Counter
import cv2
import matplotlib.pyplot as plt
import mimetypes
import argparse
import imutils
import os
from os import listdir
from os.path import isfile, join
# TF_ENABLE_ONEDNN_OPTS=0

In [None]:
# Device configuration
import tensorflow as tf
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(tf.device('/gpu:0'))


In [None]:
label_file_path="/home/neel/Acoustic/Acoustics/data/labels/"
labels=[]
for i in os.listdir(label_file_path):
    labels.append(i[:-4])
dataset=[]
for label in labels:
    row=[]
    file=open(label_file_path+label+".txt","r")
    label_s=file.read()
    label_list=label_s.replace('\n','').split(" ")
    channel1="channel1_"+label+".jpg"
    channel2="channel2_"+label+".jpg"
    row.append(channel1)
    row.append(channel2)
    for cord in label_list[1:]:
        row.append(cord)
    dataset.append(row)

In [None]:
datasets=pd.DataFrame(dataset)

In [None]:
datasets.to_csv('data/train.csv', header=False, index=False)

In [None]:
base_path="/home/neel/Acoustic/Acoustics/data"
spectrogram_path=os.path.sep.join([base_path,"images"])
annots_path=os.path.sep.join([base_path,"train.csv"])

base_output="/home/neel/Acoustic/Acoustics/output"
model_path=os.path.sep.join([base_output,"detector.h5"])
plot_path=os.path.sep.join([base_output,"plot.png"])
test_file=os.path.sep.join([base_output,"test.txt"])

In [None]:
import pickle
print("[INFO] loading dataset...")
rows = open(annots_path).read().strip().split("\n")
spectrogram=np.empty(((len(rows)),216,216,6), dtype="float32")
bounding_box_cords=[]
filenames=[]
cnt=0
for row in rows:
    row = row.split(",")
    (filename1,filename2,startX, startY, endX, endY) = row
    imagePath1 = os.path.sep.join([spectrogram_path, filename1])
    imagePath2 = os.path.sep.join([spectrogram_path, filename2])
    image1 = tf.io.read_file(imagePath1)
    image1=tf.image.decode_image(image1,channels=3,dtype=tf.float32) 
    image1=tf.image.resize(image1,[216,216])
    image2 = tf.io.read_file(imagePath2)
    image2=tf.image.decode_image(image2,channels=3,dtype=tf.float32) 
    image2=tf.image.resize(image2,[216,216])
    # print(image1.shape)
    data=np.concatenate((image1,image2),axis=-1)
    spectrogram[cnt]=data / 255.0
    cnt+=1
    bounding_box_cords.append((startX,startY,endX,endY))
    filenames.append([filename1,filename2])
print("Done.")

In [None]:
#(example , height, width, rgb*channel) = (316,640,640,3*2)

targets = np.array(bounding_box_cords, dtype="float32")
print(spectrogram.shape)
del bounding_box_cords

In [None]:
split = train_test_split(spectrogram, targets, filenames, test_size=0.10,random_state=42)
del spectrogram, targets,filenames
(trainImages, testImages) = split[:2]
(trainTargets, testTargets) = split[2:4]
(trainFilenames, testFilenames) = split[4:]

# print("[INFO] saving testing filenames...")
# f = open(test_file, "w")
# for i in testFilenames:
#     f.write(i[0]+","+i[1])
#     f.write("\n")
# # f.write("\n".join(testFilenames))
# f.close()

In [None]:
vgg = VGG16(weights=None, include_top=False,input_tensor=Input(shape=(216, 216, 6)))
vgg.trainable = False
# flatten the max-pooling output of VGG
flatten = vgg.output
flatten = Flatten()(flatten)
# construct a fully-connected layer header to output the predicted
# bounding box coordinates
bboxHead = Dense(128, activation="relu")(flatten)
bboxHead = Dense(64, activation="relu")(bboxHead)
bboxHead = Dense(32, activation="relu")(bboxHead)
bboxHead = Dense(4, activation="sigmoid")(bboxHead)
model = Model(inputs=vgg.input, outputs=bboxHead)

In [None]:
# tf.test.is_gpu_available()
from tensorflow.python.client import device_lib 
print(device_lib.list_local_devices())

In [None]:
# hyper parameters
init_lr=1e-4
epoch=50
batch_size=16

In [None]:
opt = Adamax(lr=init_lr)
model.compile(loss=tfa.losses.GIoULoss(), optimizer=opt)
print(model.summary())
# train the network for bounding box regression
print("[INFO] training bounding box regressor...")
with tf.device("/gpu:0"):
	H = model.fit(
		trainImages, trainTargets,
		validation_data=(testImages, testTargets),
		batch_size=batch_size,
		epochs=epoch,
		verbose=1)
# print ("Average Loss: ",loss,"%")

In [None]:
print("[INFO] saving object detector model...")
model.save(model_path, save_format="h5")
N = epoch
plt.style.use("ggplot")
plt.figure()
plt.plot(np.arange(0, N), H.history["loss"], label="train_loss")
plt.plot(np.arange(0, N), H.history["val_loss"], label="val_loss")
plt.title("Bounding Box Regression Loss on Training Set")
plt.xlabel("Epoch #")
plt.ylabel("Loss")
plt.legend(loc="lower left")
plt.savefig(plot_path)
del testImages,trainImages

In [None]:
model=load_model(model_path)
for img in testFilenames:
    imagePath1=os.path.sep.join([spectrogram_path, img[0]])
    imagePath2=os.path.sep.join([spectrogram_path, img[1]])
    image1 = tf.io.read_file(imagePath1)
    image1=tf.image.decode_image(image1,channels=3,dtype=tf.float32) 
    image1=tf.image.resize(image1,[216,216])
    image2 = tf.io.read_file(imagePath2)
    image2=tf.image.decode_image(image2,channels=3,dtype=tf.float32) 
    image2=tf.image.resize(image2,[216,216])
    image1 = np.expand_dims(image1, axis=0)
    image2 = np.expand_dims(image2, axis=0)
    image=np.concatenate((image1,image2),axis=-1)
    
    print(image.shape)

    preds = model.predict(image)[0]
    (startX, startY, endX, endY) = preds
    # image2 = cv2.imread(os.path.sep.join(["/home/neel/Acoustic/yolov5_training/img_data/frames", img[0][9:]]))
    # image2 = imutils.resize(image2, width=600)
    # (h, w) = image1.shape[:2]
    # startX = startX*w
    # startY = startY*h
    # endX = endX*w
    # endY = endY*h
    f=open("/home/neel/Acoustic/Acoustics/data/labels/"+img[0][9:-4]+".txt")
    print(f.read())
    print(startX, startY, endX, endY)
    # cv2.rectangle(image2, (startX, startY), (endX, endY),(0, 255, 0), 2)
	# # show the output image
    # cv2.imwrite("/home/neel/Acoustic/Acoustics/output/test_img/"+img[0], image2)
    # cv2.imshow("Output", image2)
    # cv2.waitKey(0)

In [None]:
import tensorflow as tf
print(tf.test.gpu_device_name())