In [0]:
# Mount a google drive with the person_detection repository inside from here https://github.com/Yannick947/person_detection
from google.colab import drive
drive.mount('/content/drive')

%cd ../../../../../content/sample_data/
!git clone https://github.com/Yannick947/WiderPerson.git
%cd /../..

In [0]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import os
import csv
import time
import shutil




# Constants

In [0]:
HEADER = ['image_name', 'x_min', 'y_min', 'x_max', 'y_max', 'label']


%cd ../../../../../../
annot_path = './content/drive/My Drive/person_detection/WiderPerson/Annotations'
images_path = ',/content/drive/My Drive/person_detection/WiderPerson/Images'
images_path_sample = '/content/sample_data/WiderPerson/Images'

annot_csv_path = '/content/drive/My Drive/person_detection/keras-retinanet/annotations_sampledata.csv'
keras_path = '/content/drive/My Drive/person_detection/keras-retinanet'
#Remove classes which shall be left out in the csv
classes_ids = {1:'pedestrian',
               3:'partially-visible'}


# Preprocessing


In [0]:
%cd ../../../
def filter_images_by_size(df, max_size=4000, min_size=440):
  start_size = len(os.listdir(images_path_sample))
  for image_name in os.listdir(images_path_sample):
    try: 
      image = read_image_bgr(images_path_sample + '/' + image_name)
      if (min(image.shape[0:2]) < min_size) or (max(image.shape[0:2]) > max_size):
        df = df[~df.image_name.str.contains(image_name)]
    except: 
      print('Image not in dataset. Name of file: ', image_name)
  print('final df shape: ', df.shape)
  print('Removed ',start_size - df.image_name.nunique() , 'images')
  return df

df = pd.read_csv(annot_csv_path,
                  header=None,
                  names=['image_name', 'x1', 'y1', 'x2', 'y2', 'label'])

df_filtered = filter_images_by_size(df, min_size=600, max_size = 4000)
df_filtered.to_csv(keras_path + '/' + 'annot_filtered_600_4000.csv', header=None, index=None)


In [0]:
#create train test split based on image names, not on annotations
%cd ../../../../../
annotations = pd.read_csv('/content/drive/My Drive/person_detection/keras-retinanet/annot_filtered_600_4000.csv',
              header=None,
              names=HEADER)
image_names = pd.Series(os.listdir(images_path), name='image_names')
image_names = '/content/sample_data/WiderPerson/Images' + '/' + image_names
train_names, test_names = train_test_split(image_names, test_size=0.15)
train_df = annotations[annotations.image_name.isin(train_names)]
test_df = annotations[annotations.image_name.isin(test_names)]
print(train_df.shape, train_df.image_name.nunique(), test_df.image_name.nunique(), test_df.shape)
# print(train_annot.head())


train_df.to_csv(keras_path + '/annot_train_filtered_600_4000.csv', header=None, index=None)
test_df.to_csv(keras_path + '/annot_test_filtered_600_4000.csv', header=None, index=None)


In [0]:
#remove other classes than pedestrian and make partially visible to pedestrian
def filter_classes(df):
  df_filtered= df.loc[(df.label == 'pedestrian') | (df.label == 'partially-visible')]
  df_filtered.replace(to_replace='partially-visible', value='pedestrian', inplace=True)
  print('reduced from ', df.shape, 'to', df_filtered.shape)
  return df_filtered

df = pd.read_csv(keras_path + '/' + 'annot_test_filtered_600_2500.csv', 
                 header=None, names=['image_name', 'x1', 'y1', 'x2', 'y2', 'label'])
df_filtered = filter_classes(df)
df_filtered.to_csv(keras_path + '/' + 'annot_test_600_2500_classes_filtered.csv', header=None, index=None) 

In [0]:
shapes_x = list()
shapes_y = list()
for i in range(200):
  path = annot_test.iloc[i].image_name
  image = read_image_bgr(path)
  shapes_x.append(image.shape[0])
  shapes_y.append(image.shape[1])
print(shapes_x[0:10])
print(shapes_y[0:10])
print('average in x', sum(shapes_x) / len(shapes_x))
print('average in y', sum(shapes_y) / len(shapes_y))
print('std in x', np.std(shapes_x), '\nstd in y', np.std(shapes_y))




In [0]:
def replace_annoation_folder(new_path, csv_annot_path, img_path):
  '''Start this function from folder root, otherwise wont work properly'''

  df_annot = pd.read_csv('/content/annotations_sample.csv',
                         names=HEADER)
  df_annot = df_annot.reset_index().drop(0).drop(columns='index')
  df_annot['name'] = df_annot['name'].str.replace(img_path, new_path)
  df_annot = df_annot.dropna()
  df_annot.to_csv(csv_annot_path + '/annotations_sampledata.csv',
                  index=None, header=None)
  return

new_path = '/content/sample_data/WiderPerson/Images'
path_to_csv_annot = '/content/drive/My Drive/person_detection/keras-retinanet/annotations'
old_path = '/content/drive/My Drive/PersonDetection/WiderPerson/Images'
replace_annoation_folder(new_path, path_to_csv_annot, old_path)

In [0]:

def generate_annotations():
  # Generate the classes csv file
  annot = os.listdir(annot_path)
  with open(keras_path + '/annotations.csv', newline='', mode='x') as csvfile:
    csv_writer = csv.writer(csvfile, delimiter=',')
    for filename in os.listdir(images_path_sample)[0:10]:
      if str(filename + '.txt') in annot:
        f = open(annot_path + '/' +  filename + '.txt', 'r')
        
        for index, line in enumerate(f): 
          if index == 0: 
            if line.strip() == '0':
              print('Not any  object in the image!')
            continue
            
          else: 
            split_line = line[:line.find('/')].split(' ')
            first_char = split_line.pop(0)
            split_line.insert(len(split_line), first_char)
            split_line.insert(0, images_path + '/' + filename)
            #convert from index to class label 
            try:
              split_line[-1] = classes_ids[int(split_line[-1])]
            except: 
              continue
            split_line[0] = split_line[0]

            csv_writer.writerow(split_line)
        
        f.close()
  return

generate_annotations()
#Check for wrongly annotaded bounding boxes
path = '/content/drive/My Drive/person_detection/keras-retinanet/annotations.csv'

def check_bb(path):
  colnames = ['filename', 'x1', 'y1', 'x2', 'y2', 'class_label']
  df = pd.read_csv(path, names=colnames)
  df_new = df.loc[(df.x1 < df.x2) & (df.y1 < df.y2), : ]
  print ('Reduces shape from ', df.shape, 'to ', df_new.shape)
  return df_new

In [0]:
#create indexing csv file
with open('classes.csv', newline='', mode='x') as csvfile:
  csv_writer = csv.writer(csvfile, delimiter=',')
  for index, key in enumerate(classes_ids.keys()):
    csv_writer.writerow([str(classes_ids[key]), index])
    
#Check for wrongly annotaded bounding boxes
def check_bb(path):
  colnames = ['filename', 'x1', 'y1', 'x2', 'y2', 'class_label']
  df = pd.read_csv(path, names=colnames)
  df_new = df.loc[(df.x1 < df.x2) & (df.y1 < df.y2), : ]
  print ('Reduces shape from ', df.shape, 'to ', df_new.shape)
  return df_new

df = check_bb(path)


In [0]:
def show_image_objects(image_rows):
  image = read_image_bgr(image_rows.iloc[0].image_name)
  draw = image.copy()

  for image_row in image_rows.itertuples():
    box = [image_row.x_min, image_row.y_min, image_row.x_max, image_row.y_max]
    draw_box(draw, box, color=(255, 255, 0))
  draw = cv2.cvtColor(draw, cv2.COLOR_BGR2RGB)

  plt.axis('off')
  plt.imshow(draw)
  plt.show()

i = 3000
image_rows = annot_test[annot_test.image_name == annot_test.image_name.iloc[i]]
show_image_objects(image_rows)