<a href="https://colab.research.google.com/github/bhargav23/OpenCV/blob/main/BirdData/Bird_Final_Data_Creation_V2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# Data Generation

In [2]:
# find a random location to put the cow image
from random import seed
import random
# seed random number generator

In [3]:
import numpy as np
import pandas as pd
from PIL import Image

## Helper methods

### random crop and flip


In [4]:
import math
import os
from tqdm.notebook import tqdm
 
# flip image horizontally with a probability of 0.5
def randomflip(im):
  r = random.random()
  if r>0.5:
    return im.transpose(Image.FLIP_LEFT_RIGHT), True
  return im, False
 
# crops image and its segment to given size and possibly flips is horizontally
def random_crop_n_flip(img, newsize=(250, 250)):
  img, f = randomflip(img)
  w, h = img.size
 
  if h>w:
    w, h = (newsize[0], int(newsize[0]*h/w))
    img = img.resize((w,h))
    left, right = 0, w
    bottom, top = h, h-w
  else:
    w, h = (int(newsize[1]*w/h), newsize[1])
    img = img.resize((w,h))
    left = int(random.random()*(w-h))
    right = left+h
    bottom, top = h, 0
 
  cropped = img.crop((left, top, right, bottom))  # size: 45, 45
  
  return cropped

### Location selection to place foreground object
We want to place foreground object on ground plane randomly. Below functions will first extract the potential ground pixels and then pick one at random as center of the foreground object

In [5]:
def show_binary_image(img):
  display(Image.fromarray((img*255).astype(np.uint8), mode='L'))
 
def pick_random_center(w,h):
  py = int(random.random()*h)
  px = int(random.random()*w)
  return px, py

### Scale selection
Next we want to determine how big the foreground should be. We are picing a random alue between 0.3 and 0.6. A scale indicates what proportion of area of background image should the foreground image span.

In [6]:
def scaled_foreground(fg, newsize, py):
  fh, fw = fg.height, fg.width
  # get area wrt bg size
  r = min(random.random()+0.3, 0.2)
  csize = (newsize[0]*r, newsize[1]*r)
  ar = csize[0]*csize[1]
 
  # resize based on area and aspect
  dw = int(math.sqrt(ar)*fw/fh)
  dh = int(dw*(fh/fw))
  fg = fg.resize((dw, dh))
  # get mask of resized
  #mask = np.array(fg)[:, :, 3]
  #mask[mask<200]=0
  #mask = Image.fromarray(mask, mode='L')
 
  return fg
 
def place(bg, fg, px, py):
  fh, fw = fg.height, fg.width
  py -= fh//2
  px -= fw//2
  bg.paste(fg, (px, py))
 
  return bg

In [7]:
  global a
  a = []

In [8]:
def ann(bg, fg, px, py):
  fh, fw = fg.height, fg.width
  py -= fh//2
  px -= fw//2
  bgh = bg.height//2
  bgw = bg.width//2
  w = fg.width//2
  h = fg.height//2
  x = px//2
  y = py//2
  if x<0:
    w = w+x
    x = 0
  if y<0:
    h=h+y
    y = 0
  if (y+h)>bgh:
    h = bgh-y
  if (x+w)>bgw:
    w = bgw-x
  #fmask = Image.new('1', bg.size)
  #fmask.paste(mask, (px, py))
 
 
  return [x,y,w,h,bgw,bgh]

## Generation of flipped foreground images
Caution : Run only once

In [9]:
def flipforeground(fgpath, imgno):
  fglist = list(filter(lambda x: x.endswith('png'), os.listdir(fgpath)))
  print('Creating flipped foregrounds')
  for i in range(len(fglist)):
    fgimg = os.path.join(fgpath, fglist[i])
    fgim = Image.open(fgimg)
    fgout = fgim.transpose(Image.FLIP_LEFT_RIGHT)
    fgout.save(os.path.join(fgpath, f"image{imgno:0>3}.png"))
    imgno +=1

#We have 100 foreground images. the flipped images numbers will start from 101
#flipforeground('/content/drive/MyDrive/Data/BirdData/foreground', imgno=101)

# Pipeline
1. We need to select BG image
2. We need to select a FG image
3. 448x448 image -> Depth -> 224x224.
4. 4000 as a batch size, and we create 200, 448x448 images so a 200x448x448x3 size tensor
5. we pass this to depth network and it return 200x224x224x1
6. in a loop we resize our 200 448x448 images to 224x224 and save then in a zip folder
7. at the same time we save the mask 224x224 and the depth.
we will have 2000 batches, 400000 images.
8 while we save the depth images we also want to save .npy numpy array for that respective image because that is what will be our training data.

if 1 batch takes 10 sec 20000 sec = 6 hours


In [10]:
#path for background and foreground images
bgpath = os.path.join('/content/drive/MyDrive/Data/BirdData/', "bgimages")
fgpath = os.path.join('/content/drive/MyDrive/Data/BirdData/', "foreground")
#list of filenames of background images and foreground
bgimagelist = list(filter(lambda x: x.endswith('jpg'), os.listdir(bgpath)))
fgimagelist = list(filter(lambda x: x.endswith('png'), os.listdir(fgpath)))

In [11]:
# pipeline
# for a batch size of 2000 
# len(bg) = 1
# len(fg) = 100
# multiplier = 20
# this method saves fgonbg, mask, depth images on to their respective folders of the zipfolder (batch - 2000)
# This method need to be called twice for every bg...(ater testing for several batch sizes)
# original background images are 448X448, we are resizing fgonbg and mask to 224X224
# passing 448$448 to depth prediction and getting 224X224.
# All 1200 K images are of size 224 X 224
 
def run_one_batch(inpath, bg, fglist, zipfolder, multiplier=20, imgno=0):
  loaded_images =[]
  imagemeta = []
 
 
  bgpath = os.path.join(inpath, "bgimages")
  fgpath = os.path.join(inpath, "foreground")
 
  bgimg = os.path.join(bgpath, bg)
  original_im = Image.open(bgimg)
  w, h = original_im.width, original_im.height
 
  for i in tqdm(range(len(fglist))):
    fgimg = os.path.join(fgpath, fglist[i])
    fg = Image.open(fgimg)
 
    for _ in range(multiplier):
      cropped = random_crop_n_flip(original_im, (448,448))
      px, py = pick_random_center(448, 448)
      fg= scaled_foreground(fg, (448,448), py)
      finalimg = place(cropped, fg, px, py)
      ta = ann(cropped, fg, px, py)
      ta.insert(len(ta),f'fgbg{imgno:0>6d}.jpg');
      a.append(ta)

      imagemeta.append((imgno, finalimg.width, finalimg.height))
      x = np.clip(np.asarray(finalimg, dtype=float) / 255, 0, 1)
 
      finalimg.resize((224, 224)).save("bgtemp.jpg", optimize = True, quality=60)
      
      
      loaded_images.append(x)
      
      zipfolder.write('bgtemp.jpg', f'output/images/fgbg{imgno:0>6d}.jpg')
 
      imgno += 1
  
  return imgno
 
 
    
   # multiplex
   # create a numpy array
   # stack all 448x448 bg-fg images in that array, along size create 224x224 masks
   # pass that to depth network to get 224x224 depth numpy values
   # now resize 448x448 to 224x224 images and save in zipfolder/images
   # save masks in the masks zipfolder/folder
   # save depth images as well as .npy values in depths zipfolder/folder
 
# time for one batch, and see it should be ~5 sec but < 10sec

## Data Generation
1. created a zipfile with the name fgbganddepth2.zip and opened it in append mode
2. generated and saved fgbg, mask and dpeth for first 25 background images (total of 100 K of each type). File numbers are also started from 000001 to 100000
3. saved the intermediated zip to drive
4. Repeated the steps 2 and 3 for the rest 75 background images. which generated a total of 400 K images of each type.

In [12]:
from zipfile import ZipFile
out = ZipFile('fgbg.zip', mode='a')

In [None]:
#overlaying fg on bg and generating masks for 25 background images
imgno = 0
for x in range(1):
  print("background image - ", (x+1))
  imgno = run_one_batch('/content/drive/MyDrive/Data/BirdData/', bgimagelist[x], fgimagelist[:100], zipfolder=out, multiplier=2, imgno=imgno)
  imgno = run_one_batch('/content/drive/MyDrive/Data/BirdData/', bgimagelist[x], fgimagelist[100:], zipfolder=out, multiplier=2, imgno=imgno)

print("images created so far: ", imgno)

In [20]:
path, dirs, files = next(os.walk("/content/drive/MyDrive/Data/BirdData/Temp"))
file_count = len(files)

In [21]:
file_count

101

In [None]:
df = pd.DataFrame(a,columns=['x','y','width','height','bgh','bgw','imgno'])

In [None]:
out.close()

In [None]:
cp /content/fgbg.zip /content/drive/MyDrive/Data/BirdData/dataset

In [None]:
def convert_annotation(annot):
  box = np.array([annot['x'], annot['y'], annot['width'], annot['height']],
                                                dtype=np.float32).ravel()
  box[[0, 2]] /= annot['bgw']  # normalize x by width
  box[[1, 3]] /= annot['bgh']  # normalize y by height
  box = [box[0] + box[2] / 2, box[1] + box[3] / 2, box[2],
          box[3]]  # xywh (left-top to center x-y)
  return box

In [None]:
df['bbox']=df.apply(convert_annotation,axis=1)

In [None]:
df.to_csv('ann.csv')

In [None]:
df.head()

Unnamed: 0,x,y,width,height,bgh,bgw,imgno,bbox
0,18,26,84,44,224,224,fgbg000000.jpg,"[0.2678571417927742, 0.214285708963871, 0.375,..."
1,46,106,85,44,224,224,fgbg000001.jpg,"[0.3950892984867096, 0.571428582072258, 0.3794..."
2,122,152,79,44,224,224,fgbg000002.jpg,"[0.7209821492433548, 0.7767856866121292, 0.352..."
3,37,0,80,31,224,224,fgbg000003.jpg,"[0.34375, 0.0691964253783226, 0.35714287, 0.13..."
4,13,74,64,44,224,224,fgbg000004.jpg,"[0.2008928619325161, 0.428571417927742, 0.2857..."


In [None]:
mkdir lables

In [None]:
cd lables/

/content/lables


In [None]:
l = ZipFile('lables.zip', mode='a')

In [None]:
category_id = 0
for index, row in df.iterrows():
  with open(row['imgno'].split('.')[0] + '.txt', 'w') as file: 
    file.write('%g %.6f %.6f %.6f %.6f' % (category_id, row['bbox'][0],row['bbox'][1],row['bbox'][2],row['bbox'][3]))
    l.write(file.name)

In [None]:
l.close()

In [None]:
cp /content/lables/lables.zip /content/drive/MyDrive/Data/BirdData/dataset