1. Import modules and define functions

In [None]:
from PIL import Image
from scipy.io import loadmat
import glob
import pandas as pd

def count_mats_images(path, name=None):
  %cd $path
  images = glob.glob('*.JPG') + glob.glob('*.jpg')
  mats = glob.glob('*.mat')
  mat_names = [f.split('.')[0] for f in mats]
  im_names = [f.split('.')[0] for f in images]
  mat_no_im = [f for f in mat_names if f not in im_names]
  im_no_mat = [f for f in im_names if f not in mat_names]
  print(f'{name}: images = {len(images)}, mats = {len(mats)}, mats no image = {len(mat_no_im)}, image no mat = {len(im_no_mat)}')

def convert_mat(name, size, out_path):
  im_width, im_height = size
  file = open(out_path+name+'.txt', 'w')
  data = loadmat(name+'.mat')['DATA']
  if len(data.dtype.names) == 20:
    fish = data[0,0]['XX1'][0].size
    for i in range(fish):
      box_width, box_height = data[0,0]['rect_WIDTH'][0][i]/im_width, data[0,0]['rect_HEIGHT'][0][i]/im_height
      x_topleft, y_topleft  = data[0,0]['XX1'][0][i]/im_width, data[0,0]['YY1'][0][i]/im_height
      x_center, y_center    = x_topleft + box_width/2, y_topleft + box_height/2
      file.write(f'0 {x_center} {y_center} {box_width} {box_height}\n')
  else:
    fish = 0
  file.close()
  return fish

def create_blank(name, out_path):
  file = open(out_path+name+'.txt', 'w')
  file.close()

def convert_folder(in_path, label_folder, batch_id):
  %cd $label_folder
  !mkdir $batch_id
  out_path = label_folder + batch_id + '/'
  %cd $in_path
  cum_labels = 0
  cum_fish = 0
  i = 0
  metadata = pd.DataFrame(columns=['Name', 'Matlabel', 'Fish', 'Date', 'Time'])
  images = glob.glob('*.JPG') + glob.glob('*.jpg')
  mats = glob.glob('*.mat')
  for image in images:
    name = image.split('.')[0]
    if name+'.mat' in mats:
      size = Image.open(image, 'r').size
      fish, label = convert_mat(name, size, out_path), 1
    else:
      create_blank(name, out_path)
      fish, label = 0, 0
    
    date, time = Image.open(image).getexif().get(36867).split(' ')
    metadata.loc[i] = [name, label, fish, date, time]
    cum_fish += fish
    cum_labels += label
    i += 1
  
  %cd $out_path
  metadata.to_csv(f'{batch_id}.csv')
  print(f'Successfully created {len(images)} labels ({cum_labels} converted from mat) with {cum_fish} fish.')


2. Mount Google drive

In [1]:
from google.colab import drive
drive.mount('/content/gdrive', force_remount=True)

Mounted at /content/gdrive


In [2]:
# Create link to drive
!ln -s /content/gdrive/Shareddrives/FishOASIS_ML-Detector/ /drive

In [3]:
%cd /drive/

/content/gdrive/Shareddrives/FishOASIS_ML-Detector


In [8]:
!ls -lh DATA/IMAGES

total 8.0K
drwx------ 2 root root 4.0K Apr 30 22:23 2017
drwx------ 2 root root 4.0K Apr 22 19:50 2018


3. Check for data completedness, i.e. each mat file has an image

In [None]:
label_folder = '/drive/INTERNS/CLARE/Pre-processing/Yolo_labels/'
image_folders = ['/drive/DATA/IMAGES/2017/Site_C/September-October/Label/',
                  '/drive/DATA/IMAGES/2018/Site_C/May-June/Label/', 
                  '/drive/DATA/IMAGES/2018/Site_C.2/July/Label/1/', 
                  '/drive/DATA/IMAGES/2018/Site_C.2/July/Label/2/',
                  '/drive/DATA/IMAGES/2018/Site_C.2/July/Label/3/',
                  '/drive/DATA/IMAGES/2018/Site_C.2/July/Label/4/' ]

In [None]:
folders = glob.glob(f'{image_folders[1]}*')
for folder in folders:
  count_mats_images(folder)

/content/gdrive/Shareddrives/FishOASIS/DATA/IMAGES/2018/Site_C/May-June/Label/180525
None: images = 940, mats = 937, mats no image = 0, image no mat = 3
/content/gdrive/Shareddrives/FishOASIS/DATA/IMAGES/2018/Site_C/May-June/Label/180526
None: images = 1618, mats = 805, mats no image = 0, image no mat = 813


In [None]:
# folders = glob.glob(f'{image_folders[5]}*')
for folder in ['/content/gdrive/Shareddrives/FishOASIS/DATA/IMAGES/2018/Site_C.2/July/Label/4/180710 ']:
  %cd $folder
  images = glob.glob('*.JPG') + glob.glob('*.jpg')
  mats = glob.glob('*.mat')
  mat_names = [f.split('.')[0] for f in mats]
  im_names = [f.split('.')[0] for f in images]
  mat_no_im = [f for f in mat_names if f not in im_names]
  if len(mat_no_im) > 0:
    df = pd.DataFrame(mat_no_im, columns=['Missing_images'])
    date=folder.split('/')[-1]
    df.to_csv(f'/drive/INTERNS/CLARE/Pre-processing/missing_images/4_{date}.csv', index=False)

/content/gdrive/Shareddrives/FishOASIS/DATA/IMAGES/2018/Site_C.2/July/Label/4/180710


In [None]:
%cd '/content/gdrive/Shareddrives/FishOASIS/DATA/IMAGES/2018/Site_C.2/July/Label/4/180710'
!mv '180710_113637.mat' '/drive/INTERNS/CLARE/Pre-processing/missing_image_cam_4_180710_113637.mat'

/content/gdrive/Shareddrives/FishOASIS/DATA/IMAGES/2018/Site_C.2/July/Label/4/180710


2018 May-June Site C

In [None]:
folders = glob.glob(f'{image_folders[1]}*')
for folder in ['/content/gdrive/Shareddrives/FishOASIS/DATA/IMAGES/2018/Site_C/May-June/Label/180526']:
  in_path = folder
  batch_id = folder.split('/')[-1]+'_0'
  convert_folder(in_path, label_folder, batch_id)

/content/gdrive/Shareddrives/FishOASIS/INTERNS/CLARE/Pre-processing/Yolo_labels
/content/gdrive/Shareddrives/FishOASIS/DATA/IMAGES/2018/Site_C/May-June/Label/180526
/content/gdrive/Shareddrives/FishOASIS/INTERNS/CLARE/Pre-processing/Yolo_labels/180526
Successfully created 1618 labels (805 converted from mat) with 5390 fish.


2018 July Site C.2 Camera 1

In [None]:
 # folders = glob.glob(f'{image_folders[2]}*')
folders = ['/content/gdrive/Shareddrives/FishOASIS/DATA/IMAGES/2018/Site_C.2/July/Label/1/180716', '/content/gdrive/Shareddrives/FishOASIS/DATA/IMAGES/2018/Site_C.2/July/Label/1/180718', '/content/gdrive/Shareddrives/FishOASIS/DATA/IMAGES/2018/Site_C.2/July/Label/1/180719', '/content/gdrive/Shareddrives/FishOASIS/DATA/IMAGES/2018/Site_C.2/July/Label/1/180720',
           '/content/gdrive/Shareddrives/FishOASIS/DATA/IMAGES/2018/Site_C.2/July/Label/1/180721', '/content/gdrive/Shareddrives/FishOASIS/DATA/IMAGES/2018/Site_C.2/July/Label/1/180722', '/content/gdrive/Shareddrives/FishOASIS/DATA/IMAGES/2018/Site_C.2/July/Label/1/180723']
for folder in folders:
  in_path = folder
  batch_id = folder.split('/')[-1]+'_1'
  convert_folder(in_path, label_folder, batch_id)

/content/gdrive/Shareddrives/FishOASIS/INTERNS/CLARE/Pre-processing/Yolo_labels
/content/gdrive/Shareddrives/FishOASIS/DATA/IMAGES/2018/Site_C.2/July/Label/1/180716
/content/gdrive/Shareddrives/FishOASIS/INTERNS/CLARE/Pre-processing/Yolo_labels/180716_1
Successfully created 1534 labels (236 converted from mat) with 761 fish.
/content/gdrive/Shareddrives/FishOASIS/INTERNS/CLARE/Pre-processing/Yolo_labels
mkdir: cannot create directory ‘180718_1’: File exists
/content/gdrive/Shareddrives/FishOASIS/DATA/IMAGES/2018/Site_C.2/July/Label/1/180718
/content/gdrive/Shareddrives/FishOASIS/INTERNS/CLARE/Pre-processing/Yolo_labels/180718_1
Successfully created 1483 labels (321 converted from mat) with 1475 fish.
/content/gdrive/Shareddrives/FishOASIS/INTERNS/CLARE/Pre-processing/Yolo_labels
mkdir: cannot create directory ‘180719_1’: File exists
/content/gdrive/Shareddrives/FishOASIS/DATA/IMAGES/2018/Site_C.2/July/Label/1/180719
/content/gdrive/Shareddrives/FishOASIS/INTERNS/CLARE/Pre-processing/Yo

2018 July Site C.2 Camera 2

In [None]:
# folders = glob.glob(f'{image_folders[3]}*')
folders = ['/content/gdrive/Shareddrives/FishOASIS/DATA/IMAGES/2018/Site_C.2/July/Label/2/180718', '/content/gdrive/Shareddrives/FishOASIS/DATA/IMAGES/2018/Site_C.2/July/Label/2/180719',
           '/content/gdrive/Shareddrives/FishOASIS/DATA/IMAGES/2018/Site_C.2/July/Label/2/180720']
for folder in folders:
  in_path = folder
  batch_id = folder.split('/')[-1]+'_2'
  convert_folder(in_path, label_folder, batch_id)

/content/gdrive/Shareddrives/FishOASIS/INTERNS/CLARE/Pre-processing/Yolo_labels
/content/gdrive/Shareddrives/FishOASIS/DATA/IMAGES/2018/Site_C.2/July/Label/2/180718
/content/gdrive/Shareddrives/FishOASIS/INTERNS/CLARE/Pre-processing/Yolo_labels/180718_2
Successfully created 1684 labels (464 converted from mat) with 2545 fish.
/content/gdrive/Shareddrives/FishOASIS/INTERNS/CLARE/Pre-processing/Yolo_labels
mkdir: cannot create directory ‘180719_2’: File exists
/content/gdrive/Shareddrives/FishOASIS/DATA/IMAGES/2018/Site_C.2/July/Label/2/180719
/content/gdrive/Shareddrives/FishOASIS/INTERNS/CLARE/Pre-processing/Yolo_labels/180719_2
Successfully created 1650 labels (481 converted from mat) with 3272 fish.
/content/gdrive/Shareddrives/FishOASIS/INTERNS/CLARE/Pre-processing/Yolo_labels
/content/gdrive/Shareddrives/FishOASIS/DATA/IMAGES/2018/Site_C.2/July/Label/2/180720
/content/gdrive/Shareddrives/FishOASIS/INTERNS/CLARE/Pre-processing/Yolo_labels/180720_2
Successfully created 1647 labels (3

2018 July Site C.2 Camera 3

In [None]:
folders = glob.glob(f'{image_folders[4]}*')
for folder in folders:
  in_path = folder
  batch_id = folder.split('/')[-1]+'_3'
  convert_folder(in_path, label_folder, batch_id)

/content/gdrive/Shareddrives/FishOASIS/INTERNS/CLARE/Pre-processing/Yolo_labels
/content/gdrive/Shareddrives/FishOASIS/DATA/IMAGES/2018/Site_C.2/July/Label/3/180717
/content/gdrive/Shareddrives/FishOASIS/INTERNS/CLARE/Pre-processing/Yolo_labels/180717_3
Successfully created 1681 labels (315 converted from mat) with 1058 fish.
/content/gdrive/Shareddrives/FishOASIS/INTERNS/CLARE/Pre-processing/Yolo_labels
/content/gdrive/Shareddrives/FishOASIS/DATA/IMAGES/2018/Site_C.2/July/Label/3/180718
/content/gdrive/Shareddrives/FishOASIS/INTERNS/CLARE/Pre-processing/Yolo_labels/180718_3
Successfully created 1620 labels (505 converted from mat) with 2236 fish.
/content/gdrive/Shareddrives/FishOASIS/INTERNS/CLARE/Pre-processing/Yolo_labels
/content/gdrive/Shareddrives/FishOASIS/DATA/IMAGES/2018/Site_C.2/July/Label/3/180719
/content/gdrive/Shareddrives/FishOASIS/INTERNS/CLARE/Pre-processing/Yolo_labels/180719_3
Successfully created 1651 labels (647 converted from mat) with 2299 fish.
/content/gdrive/

2018 July Site C.2 Camera 4

In [None]:
folders = ['/content/gdrive/Shareddrives/FishOASIS/DATA/IMAGES/2018/Site_C.2/July/Label/4/180720']
for folder in folders:
  in_path = folder
  batch_id = folder.split('/')[-1]+'_4'
  convert_folder(in_path, label_folder, batch_id)

/content/gdrive/Shareddrives/FishOASIS/INTERNS/CLARE/Pre-processing/Yolo_labels
mkdir: cannot create directory ‘180720_4’: File exists
/content/gdrive/Shareddrives/FishOASIS/DATA/IMAGES/2018/Site_C.2/July/Label/4/180720
/content/gdrive/Shareddrives/FishOASIS/INTERNS/CLARE/Pre-processing/Yolo_labels/180720_4
Successfully created 1665 labels (975 converted from mat) with 3771 fish.


In [None]:
folders = ['/content/gdrive/Shareddrives/FishOASIS/DATA/IMAGES/2018/Site_C.2/July/Label/4/180720']
for folder in folders:
  in_path = folder
  batch_id = folder.split('/')[-1]+'_4'
  convert_folder(in_path, label_folder, batch_id)

/content/gdrive/Shareddrives/FishOASIS/INTERNS/CLARE/Pre-processing/Yolo_labels
/content/gdrive/Shareddrives/FishOASIS/DATA/IMAGES/2018/Site_C.2/July/Label/4/180710
/content/gdrive/Shareddrives/FishOASIS/INTERNS/CLARE/Pre-processing/Yolo_labels/180710_4
Successfully created 883 labels (538 converted from mat) with 2949 fish.
/content/gdrive/Shareddrives/FishOASIS/INTERNS/CLARE/Pre-processing/Yolo_labels
/content/gdrive/Shareddrives/FishOASIS/DATA/IMAGES/2018/Site_C.2/July/Label/4/180711
/content/gdrive/Shareddrives/FishOASIS/INTERNS/CLARE/Pre-processing/Yolo_labels/180711_4
Successfully created 1385 labels (753 converted from mat) with 4354 fish.
/content/gdrive/Shareddrives/FishOASIS/INTERNS/CLARE/Pre-processing/Yolo_labels
/content/gdrive/Shareddrives/FishOASIS/DATA/IMAGES/2018/Site_C.2/July/Label/4/180712
/content/gdrive/Shareddrives/FishOASIS/INTERNS/CLARE/Pre-processing/Yolo_labels/180712_4
Successfully created 1403 labels (545 converted from mat) with 1996 fish.
/content/gdrive/S

KeyboardInterrupt: ignored

In [None]:
folders = glob.glob(f'{image_folders[5]}*')
for folder in folders:
  in_path = folder
  batch_id = folder.split('/')[-1]+'_4'
  convert_folder(in_path, label_folder, batch_id)

/content/gdrive/Shareddrives/FishOASIS/INTERNS/CLARE/Pre-processing/Yolo_labels
/content/gdrive/Shareddrives/FishOASIS/DATA/IMAGES/2018/Site_C.2/July/Label/4/180717
/content/gdrive/Shareddrives/FishOASIS/INTERNS/CLARE/Pre-processing/Yolo_labels/180717_4
Successfully created 1672 labels (805 converted from mat) with 2531 fish.
/content/gdrive/Shareddrives/FishOASIS/INTERNS/CLARE/Pre-processing/Yolo_labels
/content/gdrive/Shareddrives/FishOASIS/DATA/IMAGES/2018/Site_C.2/July/Label/4/180718


Consider mio5.varmats_from_mat to split file into single variable files
  matfile_dict = MR.get_variables(variable_names)


/content/gdrive/Shareddrives/FishOASIS/INTERNS/CLARE/Pre-processing/Yolo_labels/180718_4
Successfully created 1673 labels (1150 converted from mat) with 6231 fish.
/content/gdrive/Shareddrives/FishOASIS/INTERNS/CLARE/Pre-processing/Yolo_labels
/content/gdrive/Shareddrives/FishOASIS/DATA/IMAGES/2018/Site_C.2/July/Label/4/180720


Test

In [None]:
label_folder = '/mydrive/INTERNS/CLARE/Pre-processing/Yolo_labels/'
in_path = '/mydrive/INTERNS/CLARE/Pre-processing/Test/'
batch_id = '180526_2'

In [None]:
convert_folder(in_path, label_folder, batch_id)

/content/gdrive/Shareddrives/FishOASIS/INTERNS/CLARE/Pre-processing/Yolo_labels
/mydrive/INTERNS/CLARE/Pre-processing/Yolo_labels/180526_2/
/content/gdrive/Shareddrives/FishOASIS/INTERNS/CLARE/Pre-processing/Test
/content/gdrive/Shareddrives/FishOASIS/INTERNS/CLARE/Pre-processing/Yolo_labels/180526_2
Successfully copied 4 images and 4 labels with 20 fish.
