# Configs

In [8]:
#imports 
import os
import json
import csv
import argparse
import xml.etree.ElementTree
import pprint as pp
import math
from random import shuffle
import statistics
import shutil

import cv2
import matplotlib.pyplot as plt

from skimage.io import imread, imshow, imread_collection, concatenate_images

import numpy as np
import cv2

%matplotlib inline



# <font color='blue'/> Pose ResNet data preparation

## Copy over all the XMLs

### Methods

In [5]:
class ReadCSVFile:
    def __init__(self, csv_file, label_type):
        self.csv_file = csv_file
        self.column_name = self.get_column_name(label_type)

    @staticmethod
    def get_column_name(label_type):
        if label_type in ["teethline", "teeth", "keypoints"]:
            column_name = "Teeth"
        else:
            column_name = "Scene"
        return column_name

    @property
    def read_csv(self):
        label_folders = []
        video_paths = []
        equipment_types = []
        with open(self.csv_file, newline='') as f:
            reader = csv.reader(f, delimiter=',')
            first_row = next(reader)
            index_label_directory = first_row.index(
                "Custom field ({0} - Image Directory)".format(self.column_name))
            index_is_labeled = first_row.index(
                "Custom field ({0} Images Labeled)".format(self.column_name))
            index_video_path = first_row.index("Custom field (Full Path)")
            index_equipment_type = first_row.index("Custom field (Equipment Type/Model)")
            for row in reader:
                if row[index_is_labeled] == "Yes":
                    # label_folder = row[index_label_directory].replace('file:////motionmetrics.net/nas/', 'N:/')
                    label_folder = row[index_label_directory].replace(
                        'file:////motionmetrics.net/nas/', '/home/hooman/')
                    video_path = row[index_video_path].replace(
                        'file:////motionmetrics.net/nas/', '/home/hooman/')
                    equipment_type = row[index_equipment_type]
                    video_paths.append(video_path)
                    label_folders.append(os.path.normpath(label_folder))
                    equipment_types.append(equipment_type)
        return label_folders, video_paths, equipment_types

In [None]:
def process_teeth_and_wear(root, mpii_json, path2RawImages, save_path, eq_type="keypoints"):
    is_wrong = False
    for xml_image in root.findall('XMLSaveThumbnail'):
        original_image_name = xml_image.get("Path")
        exported_image_name = original_image_name.replace(".png", ".jpg")
        image = cv2.imread(os.path.join(path2RawImages, exported_image_name))
        if image is None:
            image = cv2.imread(os.path.join(path2RawImages, original_image_name))
            
        image, self.x_shift, self.x_aspect_ratio, self.y_aspect_ratio = \
            apply_crop_setting(image, self.camera_type)
        mpii_json, is_wrong, image_keypoints = \
            self.extract_keypoints(mpii_json, xml_image, exported_image_name,
                                   self.is_filter_state, eq_type)

        has_container = xml_image.find("HasContainer").text
        is_wrong = is_wrong or not has_container

        if not is_wrong:
            cv2.imwrite(os.path.join(save_path, exported_image_name), image)

            image_debug = image.copy()
            self.visualize_img_keypoints(image_debug, image_keypoints,
                                         exported_image_name, save_path)

### Pipeline

In [None]:
# paths
csv_file = '/media/hooman/1tb-ssd-hs3-linu/BucketTracking-Project/poseNet/hydraulic/try0_allAnuarStuff/MMI_JIRA_2019-03-18T15_42_45-0700.csv'

path2RawImages = '/media/hooman/1tb-ssd-hs3-linu/BucketTracking-Project/poseNet/hydraulic/data/try2_sameDataAsTry1_butCleanedUpAnnot/images/'

path2SaveProcessedLabels = '/media/hooman/1tb-ssd-hs3-linu/BucketTracking-Project/poseNet/hydraulic/data/try2_sameDataAsTry1_butCleanedUpAnnot/'

In [18]:
# Copy over the xml labels
path2SaveXmls = '/media/hooman/1tb-ssd-hs3-linu/BucketTracking-Project/poseNet/hydraulic/data/allXMLs/'

label_folders, video_paths, equipment_types = ReadCSVFile(csv_file, label_type="keypoints").read_csv

for imFolder in label_folders:
    newName = path2SaveXmls + imFolder.split('/')[-3] + '.xml'
    shutil.copy(os.path.join(imFolder, "Imageinfo.xml"), newName)

In [12]:
# saves the datain a json file.

mpii_json = []

for xml_file in os.listdir(path2SaveXmls):
    try:
        path2SaveAnnots = os.path.join(path2SaveProcessedLabels, "annot")
        create_folder(path2SaveAnnots)
        create_folder(os.path.join(path2SaveProcessedLabels, "debug"))
        is_wrong = False

        tree = xml.etree.ElementTree.parse(xml_file)
        root = tree.getroot()

        mpii_json, is_wrong = process_teeth_and_wear(root, mpii_json, path2RawImages, save_path)

        print("Exporting the images of %s folder is done!" % image_folder)
        if is_wrong:
            print("Issue %s wrong teeth coords!" % (image_folder))
    except Exception as e:
        print("======= Issue %s failed!\n %s =======" % (image_folder, e))
self.save_dataset(mpii_json, val_ratio=0.08)
print("\nFinished putting images and json files of %d issues together!\n" %
      len(folders))

In [None]:
    def save_output(self):
        """ Iterates through each issue and then saves the resulting dict in
        a json file. """
        mpii_json = []
        folders, videos, eq_types = self.read_csv
        for image_folder, video_path, eq_type in zip(folders, videos, eq_types):
            try:
                save_path = os.path.join(self.output_dir, "images")
                save_path_annot = os.path.join(self.output_dir, "annot")
                create_folder(save_path)
                create_folder(save_path_annot)
                if self.is_debug:
                    create_folder(os.path.join(save_path, "debug"))
                is_wrong = False
                if "keypoints" in self.label_type:  # NOTE!
                    root = self.parse_xml(os.path.join(image_folder, "Imageinfo.xml"))
                    mpii_json, is_wrong = self.process_teeth_and_wear(root,
                                                                      mpii_json,
                                                                      image_folder,
                                                                      save_path,
                                                                      eq_type)
                else:
                    raise Exception("Wrong args.label_type", self.label_type)

                print("Exporting the images of %s folder is done!" % image_folder)
                if is_wrong:
                    print("Issue %s wrong teeth coords!" % (image_folder))
            except Exception as e:
                print("======= Issue %s failed!\n %s =======" % (image_folder, e))
        self.save_dataset(mpii_json, val_ratio=0.08)
        print("\nFinished putting images and json files of %d issues together!\n" %
              len(folders))

# <font color='blue'/> Data Exploration Toolkit

## Images: Copying, Moving, Deleting from different directoreis

In [None]:
# deleting files that are not in one dir from another
fileToKeepDict = {}

for fileName in os.listdir('/home/hooman/Downloads/MOTIONMETRICS (2)/FM/preds/'):
    fileToKeepDict[fileName[:-3]+'gmp'] = 1


    
for fileName in os.listdir('/home/hooman/Downloads/MOTIONMETRICS (2)/FM/1947EFC0-16D8-1588-A042-3534DFB3FA0F/'):
    if fileName not in fileToKeepDict:
        print(fileName)
        os.remove('/home/hooman/Downloads/MOTIONMETRICS (2)/FM/1947EFC0-16D8-1588-A042-3534DFB3FA0F/' + fileName)

In [None]:
# deleting images that cannot be opened (usaully after augmentation)
dirToDeleteFrom = '/home/hooman/dataPreparation/hsTrainingSetBucyrusAndPnH/unet/randomCroppedImages/'

for fileName in os.listdir(dirToDeleteFrom):
    try:
        img = imread(dirToDeleteFrom + fileName)
    except:
        os.remove(dirToDeleteFrom + fileName)
        os.remove('/home/hooman/dataPreparation/hsTrainingSetBucyrusAndPnH/unet/randomCroppedMasks/' + fileName)

In [3]:
# copying files from one dir to another

import shutil

for imgId in os.listdir('/media/hooman/1tb-ssd-hs3-linu/BucketTracking-Project/poseNet/hydraulic/data/dataCleanup_round1/goodLabels'):
    shutil.copy('/media/hooman/1tb-ssd-hs3-linu/BucketTracking-Project/poseNet/hydraulic/data/images/' + imgId, '/media/hooman/1tb-ssd-hs3-linu/BucketTracking-Project/poseNet/hydraulic/data/try1_only_dataCleanup_round1_goodImages/images')

In [2]:
# deleting images from a directory

for imgId in os.listdir('/media/hooman/1tb-ssd-hs3-linu/BucketTracking-Project/poseNet/hydraulic/data/try1_only_dataCleanup_round1_goodImages/takeOut'):
    os.remove('/media/hooman/1tb-ssd-hs3-linu/BucketTracking-Project/poseNet/hydraulic/data/try2_sameDataAsTry1_HeavierCleanup/images/' + imgId)

In [None]:
# Exclusing testSet images from trainingSet for BucketTracking
# copying files from one dir to another

dirWithListOfimages = "/media/hooman/hsSsdPartUbuntu/FM_PROJECT/FMDL_3.1/cable/boxDetector_V2_multiclass_Cable/try1/dataFor__boxDetector_V2_multiclass_cable__try1/goodMatInsides_forBBLabelingOfYolo/"

dir2RemoveFrom = "/media/hooman/hsSsdPartUbuntu/FM_PROJECT/dataPreparation/FMDL_3.1/cable/fmdl-cable-trainingData/images/"


import shutil

movedLabels = []

for imgId in os.listdir(dirWithListOfimages):
    
    movedLabels.append(imgId)

    imgName = imgId.replace('.jpg', '.xml')
    
    
    if os.path.isfile(dir2RemoveFrom + imgId):
    #if os.path.isfile(dir2RemoveFrom + imgName):
    
        #shutil.move(dir2RemoveFrom + imgName, '/media/hooman/1tb-ssd-hs3-linu/BucketTracking-Project/cable/validationsSet_hsPicked_labels')
        
        shutil.copy(dir2RemoveFrom + imgId, '/media/hooman/hsSsdPartUbuntu/FM_PROJECT/FMDL_3.1/cable/boxDetector_V2_multiclass_Cable/try1/dataFor__boxDetector_V2_multiclass_cable__try1/temp')
    
        #os.remove(dir2RemoveFrom + imgId)
        #os.remove(dir2RemoveFrom + imgName)
        print(dir2RemoveFrom + imgName)
        
    
print("")
print("Moved labels and deleted images for  " + str(len(movedLabels))  + "  examples\n")

## Images: resizing, converting formats and channels, correcting ids

In [None]:
#Resize and downsample all images to (128, 160, 3)
resizedImagesPath = '/home/hooman/dataPreparation/hsTestSet/images0PaddedForUNet/'

for fileName in os.listdir(imagesPath):

    img = imread(imagesPath + fileName) 
    
    imgResized = cv2.resize(img, (640, 480)) 

    # you can downsample with numpy too. I use opencv to be consistant.    imgDs = img[::4, ::4, :]
    imgDs = cv2.resize(imgResized, (0,0), fx=0.25, fy=0.25) 

    imgPadded = cv2.copyMakeBorder(imgDs,4,4,0,0,cv2.BORDER_CONSTANT,value=(0,0,0))
    
    cv2.imwrite(resizedImagesPath + fileName, imgPadded)

In [None]:
# converting single channel images to 3 channels
for imId in os.listdir('/media/hooman/1tb-ssd-hs3-linu/FM_PROJECT/dataPreparation/FMDL_3.1/cable/temp/'):

    img = imread('/media/hooman/1tb-ssd-hs3-linu/FM_PROJECT/dataPreparation/FMDL_3.1/cable/temp/'+ imId)
    if len(img.shape) < 3:
        img3Chan = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
    
        cv2.imwrite('/media/hooman/1tb-ssd-hs3-linu/FM_PROJECT/dataPreparation/FMDL_3.1/cable/hsTestSetOfHardImages/' + imId, img3Chan)
        
    else:
        print(img.shape)


In [None]:
# converting single channel images to 3 channels (LOOPING OVER MULTIPLE FOLDERS)

img_dest_dir = '/home/hooman/Desktop/i2lData_cropped/'



for mainDir in os.listdir(img_dest_dir):
    

    for imId in os.listdir(img_dest_dir + mainDir):

        img = imread(img_dest_dir + mainDir + '/' + imId)
        if len(img.shape) < 3:
            img3Chan = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)

            cv2.imwrite(img_dest_dir + mainDir + '/' + imId, img3Chan)

        else:
            print(img.shape)


In [None]:
# Converting jpg image to png, and removing the jpegs
img_dest_dir = '/home/hooman/dataPreparation/hsTrainingSetBucyrusAndPnH/allImages/'



from PIL import Image


imageFileNameDic = {}

for file in os.listdir(img_dest_dir):
    fileName = file.replace(".jpg", "")
    #fileName = file.replace(".png", "")
    if fileName in imageFileNameDic:
        imageFileNameDic[fileName] += 1
    else:
        imageFileNameDic[fileName] = 0

        
        
for name in imageFileNameDic.keys():
    print(name)
    im = Image.open(img_dest_dir + '/' + name + '.jpg')
    im.save(img_dest_dir + '/' + name + '.png')
    os.remove(img_dest_dir + '/' + name + '.jpg')

In [None]:
# Converting jpg image to png, and removing the jpegs (looping over multiple folders)

img_dest_dir = '/home/hooman/Desktop/i2lData_cropped/'



from PIL import Image


for mainDir in os.listdir(img_dest_dir):


    imageFileNameDic = {}

    for file in os.listdir(img_dest_dir + mainDir):
        
        fileName = file.replace(".jpg", "")
        #fileName = file.replace(".png", "")
        if fileName in imageFileNameDic:
            imageFileNameDic[fileName] += 1
        else:
            imageFileNameDic[fileName] = 0



    for name in imageFileNameDic.keys():
        print(name)
        im = Image.open(img_dest_dir + mainDir + '/' + name + '.jpg')
        im.save(img_dest_dir + mainDir + '/' + name + '.png')
        os.remove(img_dest_dir + mainDir + '/' + name + '.jpg')

In [None]:
# compressing png images with jpeg

from PIL import Image

saveDir = '/home/hooman/FM_PROJECT/dataPreparation/fmdlTestData-optical-hydraulic/compressedJpeg80/'

for imId in os.listdir('/home/hooman/FM_PROJECT/dataPreparation/fmdlTestData-optical-hydraulic/Frame/'):
    img = Image.open('/home/hooman/FM_PROJECT/dataPreparation/fmdlTestData-optical-hydraulic/Frame/'+ imId)
    
    fileName = imId.replace(".png", "")
    
    img.save(saveDir + '/' + fileName + '.jpg', quality=80,optimize=True)

In [None]:
#Correcting image Ids by content matching singleImage
#HSNOTE: this does not work with abs error must be squared.

pathToCorrectNames = '/home/hooman/dataPreparation/testingMahdisNetworkOnMyLatestTestSetForComparison/input-orig/'
pathToWrongNames   = '/home/hooman/dataPreparation/testingMahdisNetworkOnMyLatestTestSetForComparison/outputOfSaveH5/'
pathToWrongPreds = '/home/hooman/dataPreparation/testingMahdisNetworkOnMyLatestTestSetForComparison/ouputOfNetworkJustOutput/'


for srcId in os.listdir(pathToWrongNames):

    minScore = 100000000
    minId = ''
    
    for targId in os.listdir(pathToCorrectNames):
        srcIm = imread(pathToWrongNames + srcId)

        temp= imread(pathToCorrectNames + targId)
        targIm = cv2.resize(temp, (srcIm.shape[1], srcIm.shape[0])) 

        dif = np.square((srcIm - targIm))
        score = np.sum(dif)

        if score < minScore:
            minScore = score
            minId = targId
            

    print("src: " + srcId + "  matched with: " + minId)
    os.rename(pathToWrongNames + srcId, pathToWrongNames + minId)
    os.rename(pathToWrongPreds + srcId, pathToWrongPreds + minId)

In [None]:
#Correcting image Ids by content matching allChannels


pathToCorrectNames = '/home/hooman/dataPreparation/testingMahdisNetworkOnMyLatestTestSetForComparison/input-orig/'
pathToWrongNames   = '/home/hooman/dataPreparation/testingMahdisNetworkOnMyLatestTestSetForComparison/outputOfSaveH5/'

pathToCorrectNamesIn = '/home/hooman/dataPreparation/testingMahdisNetworkOnMyLatestTestSetForComparison/ouputOfNetworkAllChannels/' 

namesDic = {}
for srcId in os.listdir(pathToWrongNames):

    minScore = 100000000
    minId = ''
    
    for targId in os.listdir(pathToCorrectNames):
        srcIm = imread(pathToWrongNames + srcId)

        temp= imread(pathToCorrectNames + targId)
        targIm = cv2.resize(temp, (srcIm.shape[1], srcIm.shape[0])) 

        dif = np.square((srcIm - targIm))
        score = np.sum(dif)

        if score < minScore:
            minScore = score
            minId = targId
            
    print(srcId + "___" + minId)
    namesDic[srcId] = minId




import glob

for srcId in namesDic.keys():

    #print("src: " + srcId + "  matched with: " + minId)
    
    nameAr = srcId.split('_')
    nameAr = nameAr[0:2]

    shortName = ""
    for i in range(len(nameAr)):
        shortName = shortName + nameAr[i] + '_'

    chanFiles = glob.glob1(pathToCorrectNamesIn, shortName + '*')
    
    for fil in chanFiles:
        chan = fil.split('_')[2]
        #print(chan)
        
        if chan[0:2] == "ch":
            newname = chan + "_" + namesDic[srcId]
            print("renamed CH: " + fil + " to: " + newname + "\n")
            os.rename(pathToCorrectNamesIn + fil, pathToCorrectNamesIn + newname)
        else:
            newname = namesDic[srcId]
            print("renamed: " + fil + " to: " + newname + "\n")
            os.rename(pathToCorrectNamesIn + fil, pathToCorrectNamesIn + namesDic[srcId])    

## Images: Displaying side by side

In [10]:
# Showing Images side-by-side

predsDir1 = '/media/hooman/961293e3-04a5-40c5-afc0-2b205d0a7067/WM_PROJECT/algorithmDev/wmAlgo_usingWearLandmarsk_optical_hydraulics/try1/wmdlLogs_aitik_Komatsu_SH1142_PC5500_2019-02-26_to_2019-03-10/yolo_preds/'

predsDir2 = '/media/hooman/961293e3-04a5-40c5-afc0-2b205d0a7067/WM_PROJECT/algorithmDev/wmAlgo_usingWearLandmarsk_optical_hydraulics/try1/wmdlLogs_aitik_Komatsu_SH1142_PC5500_2019-02-26_to_2019-03-10/usingNewPoseNet/yolo_preds/'

dirToSaveResults = '/media/hooman/961293e3-04a5-40c5-afc0-2b205d0a7067/WM_PROJECT/algorithmDev/wmAlgo_usingWearLandmarsk_optical_hydraulics/try1/wmdlLogs_aitik_Komatsu_SH1142_PC5500_2019-02-26_to_2019-03-10/usingNewPoseNet/comp_newAndOldResnet/'

for imgId in os.listdir(predsDir1):  
    if '.png' in imgId:
        pred1 = imread(predsDir1 + imgId)
        pred2 = imread(predsDir2 + imgId)

        try:


            combImg = np.zeros((pred1.shape[0],1400, 3), np.uint8)

            combImg[:, 0:pred1.shape[1], :] = pred1
            combImg[:, 700:pred2.shape[1]+700, :] = pred2

            '''
            combImg = np.zeros((pred1.shape[0],1400), np.uint8)

            combImg[:, 0:pred1.shape[1]] = pred1
            combImg[:, 700:pred2.shape[1]+700] = pred2
            '''



            font = cv2.FONT_HERSHEY_SIMPLEX
            cv2.putText(combImg,'oldPoseNet',(30,70), font, 2,(255,255,255), 2, 0)
            cv2.putText(combImg,'newPoseNet',(730,70), font, 2,(255,255,255), 2, 0)


            #plt.imshow(combImg)
            #plt.show()
            #break

            cv2.imwrite(dirToSaveResults + imgId, combImg)
        except:
            print(imgId)



In [None]:
# Showing Images side-by-side for 1280*1280 image sizes 


#predsDir1 = '/media/hooman/1tb-ssd-hs3-linu/BucketTracking-Project/hydraulic/try0-AnuarConfigs/image_hard_pickedByHs_predicted_Anuars_model/'

predsDir1 = '/media/hooman/1tb-ssd-hs3-linu/BucketTracking-Project/hydraulic/try5-NewTrainingProcedure--higherBatchSize/preds_onBackgroundImages/'


predsDir2 = '/media/hooman/1tb-ssd-hs3-linu/BucketTracking-Project/hydraulic/try11_sameAs5_afterDataCorrections/preds_onBackground/'

dirToSaveResults = '/media/hooman/1tb-ssd-hs3-linu/BucketTracking-Project/hydraulic/try11_sameAs5_afterDataCorrections/combined_try5Vs11_backgroundImages/'

for imgId in os.listdir(predsDir1):
    #hsPred = imread(hsPredsDir + imgId)
    #temp = imread(vesPredsDir + imgId)
    #vesPred = img3Chan = cv2.cvtColor(temp, cv2.COLOR_GRAY2BGR) 
    
    pred1 = imread(predsDir1 + imgId)
    pred2 = imread(predsDir2 + imgId)

    combImg = np.zeros((pred1.shape[0],2560, 3), np.uint8)

    combImg[:, 0:pred1.shape[1], :] = pred1
    combImg[:, 1280:pred2.shape[1]+1280, :] = pred2
    
    
    font = cv2.FONT_HERSHEY_SIMPLEX
    cv2.putText(combImg,'try5',(30,70), font, 2,(255,255,255), 2, 0)
    cv2.putText(combImg,'try11',(730,70), font, 2,(255,255,255), 2, 0)
    
    #plt.imshow(combImg)
    #plt.show()
    #break
    
    cv2.imwrite(dirToSaveResults + imgId, combImg)



In [None]:
# Putting the optical flow and U-Net outputs sidebyside.  

#FMDL_2018.04.30_11.38.09.png

temp = imread('/home/hooman/dataPreparation/fmdlTestData-optical-hydraulic/Frame/' + 'FMDL_2018.04.30_11.38.09.png')

frame = cv2.cvtColor(temp, cv2.COLOR_GRAY2BGR) 

plt.imshow(frame)
plt.show()



of = imread('/home/hooman/dataPreparation/fmdlTestData-optical-hydraulic/OpticalFlowMagnitude/' + 'FMDL_2018.04.30_11.38.09.png')

plt.imshow(of)
plt.show()




no = imread('/home/hooman/dataPreparation/fmdlTestData-optical-hydraulic/NetOut/' + 'FMDL_2018.04.30_11.38.09.png')

plt.imshow(no)
plt.show()


temp2 = imread('/home/hooman/dataPreparation/fmdlTestData-optical-hydraulic/VES_finalOutput/fragmentation_results/all/' + 'FMDL_2018.04.30_11.38.09.png')

fo = cv2.cvtColor(temp2, cv2.COLOR_GRAY2BGR) 

plt.imshow(fo)
plt.show()


print(np.amax(of))


combImg = np.zeros((frame.shape[0],2800, 3), np.uint8)

combImg[:, 0:frame.shape[1], :] = frame
combImg[:, 700:frame.shape[1]+700, :] = cv2.resize(cv2.cvtColor(of, cv2.COLOR_GRAY2BGR), (frame.shape[1], frame.shape[0])) 
combImg[:, 1400:frame.shape[1]+1400, :] = cv2.resize(cv2.cvtColor(no, cv2.COLOR_GRAY2BGR), (frame.shape[1], frame.shape[0])) 
combImg[:, 2100:fo.shape[1]+2100, :] = fo


plt.imshow(combImg)
plt.show()
cv2.imwrite('/home/hooman/' + 'combined_FMDL_2018.04.30_11.38.09.png', combImg)

In [None]:
# shuffling a csv  (adds an empty line somewhere, and moves the header)

csvRows = readCsvRows('/media/hooman/hsSsdPartUbuntu/FM_PROJECT/FMDL_3.1/backhoe/boxDetector_V2_multiclass/try2-withCaseObject-newData/trainingSet.csv')



# shuffle the rows
from random import shuffle
shuffle(csvRows)




#write the shuffled rows to csv
csv_file = open('/media/hooman/hsSsdPartUbuntu/FM_PROJECT/FMDL_3.1/backhoe/boxDetector_V2_multiclass/try2-withCaseObject-newData/trainingSet_shuffled.csv', "w") 


# write rows
for row in csvRows:
    csv_file.write(row + '\n')

csv_file.close()

print("wrote " + str(len(csvRows)) + " rows to csv file\n")

In [None]:
#read rows from the file you wanna append to
existingRowsDic = getCertainClassRowsDictFromCsv('/home/hooman/ssdMobileNet_multiClass_bucket_rockInside_FineInside_NoTeeth_NoCase/try5/trainSet_multiClass_bucket_fineRock_try5_manuallyCleaned.csv', ['matInside'])



#read rows from the file you wanna get the new rows from
rowsDicToAddFrom = getCertainClassRowsDictFromCsv('/home/hooman/ssdMobileNet_multiClass_bucket_rockInside_FineInside_NoTeeth_NoCase/unusedCsvFiles/trainSet_multiClass_bucket_fineRockInapp_try3_uncleaned.csv', ['matInside'])



#Append the missing rows
n = 0
rowsDicToAddTo = {}
for imId in os.listdir('/home/hooman/dataPreparation/hsTrainingSet/imsToAddMatInsideFor/'):
    n += 1
    print(imId)
    if imId not in existingRowsDic:
        if imId in rowsDicToAddFrom:
            rowsDicToAddTo[imId] = rowsDicToAddFrom[imId]
        else:
            print("error didn't find:  " + imId + "\n")
    else:
        print("already there\n")

print("processed " + str(n) + " rows")

writeRowDicToCsv(rowsDicToAddTo, '/home/hooman/ssdMobileNet_multiClass_bucket_rockInside_FineInside_NoTeeth_NoCase/try4/trainSet_multiClass_bucket_fineRock_try4_manuallyCleaned.csv')

In [None]:
# delete images from csv
imIdsToDelete = os.listdir('/media/hooman/1tb-ssd-hs3-linu/FM_PROJECT/FMDL_3.1/cable/boxDetector_V2_multiclass_Cable/try1/dataFor__boxDetector_V2_multiclass_cable__try1/examplesToRemove/')

csvToWorkWith = '/media/hooman/1tb-ssd-hs3-linu/FM_PROJECT/FMDL_3.1/cable/boxDetector_V2_multiclass_Cable/try1/dataFor__boxDetector_V2_multiclass_cable__try1/firstTry_final.csv'


existingRows = readCsvRows(csvToWorkWith)

n1 = 0
for row in existingRows:
    vals = row.split(',')

    if vals[0] not in imIdsToDelete:
        existingRows.remove(row)
        n1 += 1
        
print("in the first run deleted " + str(n1) +' rows\n')

n2 = 0
for row in existingRows:
    vals = row.split(',')

    if vals[0] not in imIdsToDelete:
        existingRows.remove(row)
        n2 += 1
        
print("in the second run deleted " + str(n2) +' rows\n')
print("deleted " + str(n1+n2) + " rows in total")





# open the file
csv_file = open(csvToWorkWith, "w") 

# define column names
columnTitles = "filename,pathname,xmins,xmax,ymins,ymax,class\n"
csv_file.write(columnTitles)

# write rows
for r in existingRows:
    row = r + '\n'
    csv_file.write(row)

csv_file.close()

print("wrote " + str(len(existingRows)) + " rows to csv file\n")



In [None]:
# add no bucket rows to existing csv and shuffle its rows

csvRows = readCsvRows('/home/hooman/ssdMobileNet_multiClass_bucket_rockInside_FineInside_NoTeeth_NoCase/try5/trainSet_multiClass_bucket_fineRock_try5_manuallyCleaned.csv')

#getRid of the empty row at the end
csvRows = csvRows[0:len(csvRows)-1]




# add the no bucket rows for the images in dir
for imId in os.listdir('/home/hooman/ssdMobileNet_multiClass_bucket_rockInside_FineInside_NoTeeth_NoCase/try6/noShovelImagesToAdd/'):
    newRow = str(str(imId) + ',' + str(imagesPath) + str(imId) + ',' + '' + ',' + '' + ',' + '' + ',' + '' + ',' + '')
    print(newRow)
    
    csvRows.append(newRow)
    

    
    
# shuffle the rows
from random import shuffle
shuffle(csvRows)




#write the shuffled rows to csv
csv_file = open('/home/hooman/ssdMobileNet_multiClass_bucket_rockInside_FineInside_NoTeeth_NoCase/try5/trainSet_multiClass_bucket_fineRock_try5_manuallyCleaned_new.csv', "w") 


# write rows
for row in csvRows:
    csv_file.write(row + '\n')

csv_file.close()

print("wrote " + str(len(csvRows)) + " rows to csv file\n")

In [None]:
# remove matInsideBoundary rows from CSV

rowsDic = getCertainClassRowsDictFromCsv('/home/hooman/dataPreparation/hsTrainingSetBucyrusAndPnH/unet/trainSet_bucketAndMatInsideBoundaries_allImages_BucAndPnH_cleaned.csv', ['bucket'])

writeRowDicToCsv(rowsDic, '/home/hooman/dataPreparation/hsTrainingSetBucyrusAndPnH/unet/trainSet_justBucketBoundaries_allImages_BucAndPnH_cleaned.csv')