In [1]:
import numpy as np
import pandas as pd
import math
import random
from PIL import Image
import cv2
import os
import matplotlib.pyplot as plt
from lxml.etree import Element, SubElement, tostring
from xml.dom.minidom import parseString
import shutil
import json
from shapely.geometry import Polygon, MultiPolygon
import tensorflow as tf


class Point_Source_Generator:
  #This is a class which generates a source image based on some (latitude, longitude) coordinate describing the source
  #and number of photons detected, everything else is fixed, but can be made parameters later
  #How/the order to call the class:
  #Make class instance, call fill_counts(), call convert_to_jpg(), call get_num_pixels(), finally call gen_xml_annot()

  #How parameters should be varied:
  #counts - [50, 100, 1000, 10000]    
  #b_source - [-20, 0, 20]        
  #l_source - [60, 80, 100]       
  #psf_sigma - [0.5, 1.0, 1.5, 2.0]  
  #consts - [0.1, 0.8, 3.0, 5.0, 7.0, 10.0]
  #pixel size need to be smaller than 1
  #number of segments must be 20 or less to avoid errors!
  
  seed = 1234 #this is the seed used to seed the poisson generator
  random.seed() #this uses system time, but should try to use a seed eventually in case we need to reproduce images

  #UNCHANGING QUANTITIES:
  RAND_MAX = 2147483647 #this value is RAND_MAX in C++ (but is different per compiler I think)

  def __init__(self, folder_path, b_source, l_source, source_counts, namef, psf_sigma,
               constant_ = 20.0, num_sources=1, segments = 20, spread = 10.0, pixel_size = 0.2, l_min = 10.0, l_max = 110.0, 
               b_min = -50.0, b_max =50.0):
    #constructor
    if num_sources > 1:
      assert len(b_source) == len(l_source), "Lengths of B/L arrays are NOT equal."
      assert len(b_source) == len(source_counts), "Length of source counts array is NOT equal."

    self.b_source= b_source #if multiple sources, will be list
    self.l_source=l_source #if multiple sources, will be list
    self.source_counts = source_counts #if multiple, will be list
    self.psf_sigma = psf_sigma #this can also be  a list!

    self.l_min = l_min
    self.l_max = l_max
    self.b_min = b_min
    self.b_max = b_max
    
    self.pixel_size = pixel_size

    self.namef = namef
    self.spread = spread
    self.segments = segments
    self.folder_path = folder_path
    self.constant_ = constant_
    self.num_sources = num_sources

    self.nl = (l_max - l_min)/pixel_size
    self.nb = (b_max - b_min) / pixel_size #pixel_size +1 ?
    self.norm = 0.0
  
  def debug_type(self, anything):
    #debug function to return data type and if np array dtype
    n = "nothing"
    if anything is None:
      print("NOTHING HERE")
      return n
    else:
      print("type:" , type(anything))
      if type(anything) is np.ndarray:
        print("np array dtype: ", anything.dtype)
        print("np array dim: ", np.shape(anything))
        self.debug_np_print(anything)
    return n

  def debug_np_print(self, np_ar):
    for x in np_ar:
      for y in x:
        print(y, end =" ")
      print()

  def call_once(self):
    print("Integer Poisson counts: ")
    self.debug_np_print(self.int_counts)

  def gaussian_fill(self, i, j):
    #2D gaussian fill generator, takes coordinates and generates 2D gaussian
    i = float (i)
    j = float (j)
    l = self.l_min + i*self.pixel_size 
    b = self.b_min + j*self.pixel_size
    angle = math.sqrt(pow(l - self.l_source, 2) + pow(b - self.b_source, 2))
    temp_ = math.exp( -pow(angle,2)/(2*pow(self.psf_sigma,2))) #Gaussian calculation 
    return temp_

  def gaussian_fill_m(self, i, j, ind):
    #2D gaussian fill generator for multiple sources
    i = float (i)
    j = float (j)
    l = self.l_min + i*self.pixel_size 
    b = self.b_min + j*self.pixel_size
    angle = math.sqrt(pow(l - self.l_source[ind], 2) + pow(b - self.b_source[ind], 2))
    temp_ = math.exp( -pow(angle,2)/(2*pow(self.psf_sigma[ind],2))) #Gaussian calculation 
    return temp_

  def poisson_fill(self, x, init=1, p = 0.05 ): #p = 0.05
    #poisson random number generator, takes some input number x and generates an integer 
    m = int (x/p) #double input divided by probability of an event (0.05 in this case)
    if m<1: 
      m =1  #need at least one trial, m will give the number of trials
    p = x / (float (m)) #updating the probability from the decided number of trials
    if init == 1:
      init=0
    P = int (p * self.RAND_MAX) #P = probability times highest randomly generated int
    iter = 0
    N = 0
    for i in range(m):
      iter += 1
      temp = random.randint(0, self.RAND_MAX)
      if (temp < P):
        N +=1
    return N

  def make_source(self, index):
    norm = 0.0
    counts = np.zeros((round(self.nb), round(self.nl)), dtype = float)
    for (x,y), value in np.ndenumerate(counts):
      tmp_ = self.gaussian_fill_m(x, y, index)
      counts[x,y] = tmp_
      norm += tmp_
    counts = np.multiply(counts, (self.source_counts[index]/norm))
    return counts

  def fill_counts(self):
    #fills the counts np array with floats and int_counts with ints, sets norm
    list_counts = []
    self.counts = np.zeros((round(self.nb), round(self.nl)), dtype = float)
    
    if self.num_sources > 1:
      for i in range(self.num_sources):
        new_counts = self.make_source(i)
        list_counts.append(new_counts)
      for i in list_counts:
        for (x, y), value in np.ndenumerate(self.counts):
          self.counts[x, y] += i[x, y]
      for (x,y), value in np.ndenumerate(self.counts):
        self.counts[x, y] += self.constant_     
    
    else:
      for (x,y), value in np.ndenumerate(self.counts):
        tmp_ = self.gaussian_fill(x, y)
        self.counts[x,y] = tmp_
        self.norm += tmp_ 
      for (x,y), value in np.ndenumerate(self.counts):
        self.counts[x, y] += self.constant_
      self.counts = np.multiply(self.counts, (self.source_counts/self.norm))

    self.int_counts = np.zeros((round(self.nb), round(self.nl)), dtype=int)

    for (x,y), value in np.ndenumerate(self.int_counts):
      tmp = self.poisson_fill(self.counts[x, y])
      self.int_counts[x , y] = tmp


  def convert_to_jpg(self):
    #takes np array and then converts to jpg image and returns name of file saved
    #For some reason, saving the images in RGB format produces very different looking images
    #Why does this happen?
    #we may have to normalize pixel values from 0-1 instead of 0-255
    nn = self.int_counts.astype(float) * 255.0 / float (self.int_counts.max())
    nn = np.rint(nn)
    nn = nn.astype(np.uint8)
    im = Image.fromarray(nn)
    jpeg_nme = self.namef + ".jpeg"
    jpeg_path = self.folder_path + jpeg_nme
    im.save(jpeg_path,"JPEG")
    print("File with name {}.jpeg generated.".format(self.namef))


  def get_num_pixels(self):
    #gets the total number of pixels + the pixels along the sides of the image
    #alternatively we could use nl, nb variables for width/height but if these ever get mixed up in the class
    #this could be dangerous, with Image.open() we are more likely to catch mistakes here

    filepath = self.folder_path+ self.namef + ".jpeg"

    width, height = Image.open(filepath).size
    self.num_pixels = width*height
    self.num_pix_w = width
    self.num_pix_h = height

    if width < 300 or height < 300:
      print("Image size has too few pixels - adjust model for {} x {} image".format(width, height))
    if width > 2000 or height > 2000:
      print("Image size has too many pixels - adjust model for {} x {} image".format(width, height))
    print("Total pixels: {}, height pixels: {}, width pixels: {}".format(self.num_pixels, self.num_pix_h, self.num_pix_w))
    return height, width


  def get_xyminmax(self):
    #gets the xmin, xmax, ymin, and ymax pixels of the source in the image
    self.xmin = []
    self.ymin = []
    self.xmax = []
    self.ymax = []
    self.wid_ = []
    self.heig_ = []
    self.xcsource = []
    self.ycsource = []
    self.temp_spr = []

    for i in range(self.num_sources):
      self.temp_spr.append(self.spread*self.psf_sigma[i])
      x = abs(self.b_min - self.b_source[i])
      y = abs(self.l_min - self.l_source[i])
      xcs = float (x) / self.pixel_size
      ycs = float (y) / self.pixel_size
      ymax_ = ycs + self.temp_spr[i]
      xmax_ = xcs + self.temp_spr[i]
      xmin_ = xcs - self.temp_spr[i]
      ymin_ = ycs - self.temp_spr[i]
      self.xcsource.append(xcs)
      self.ycsource.append(ycs)
      self.xmin.append(xmin_)
      self.ymin.append(ymin_)
      self.xmax.append(xmax_)
      self.ymax.append(ymax_)
      self.wid_.append(abs(xmax_ - xmin_))
      self.heig_.append(abs(ymax_ - ymin_))
      print("x coord of source #{} is {} and y coord is {}".format(i+1, self.xcsource[i], self.ycsource[i]))
    self.bbox = [self.xmin, self.ymin, self.wid_, self.heig_]

  def gen_xml_annot(self):
    #generates an .xml file with annotations giving location of the point source
    node_root = Element('annotation') #so that the root element of xml file says it is an annotation
    node_folder = SubElement(node_root, 'folder') #sub-element of root givng folder image is located in
    node_folder.text = 'train' #name of folder image is located in
 
    node_filename = SubElement(node_root, 'filename') #sub-element of root giving file name
    node_filename.text = self.namef + ".jpeg"
 
    node_size = SubElement(node_root, 'size') #sub-element of root giving size measurements
    node_width = SubElement(node_size, 'width') #sub-element of size giving width in pixels
    node_height = SubElement(node_size, 'height') #sub-element of size giving height in pixels
    node_depth = SubElement(node_size, 'depth') #sub-element of size, depth is either 3 (RGB) or 0 (black and white)
    node_depth.text = '1' #if we decide to use color images, depth has to = 3
    node_width.text = str (self.num_pix_w)
    node_height.text = str (self.num_pix_h)

    #may need to add <segmented> attribute before <object>
    for i in range(self.num_sources):
      node_object = SubElement(node_root, 'object')
      node_name = SubElement(node_object, 'name')
      #may need to add <pose> attribute here
      node_truncated = SubElement(node_object, 'truncated')
      node_difficult = SubElement(node_object, 'difficult')
      node_bndbox = SubElement(node_object, 'bndbox')

      node_name.text = 'point source'
      node_difficult.text = '0'
      node_truncated.text = '0'
      node_xmin = SubElement(node_bndbox, 'xmin')
      node_ymin = SubElement(node_bndbox, 'ymin')
      node_xmax = SubElement(node_bndbox, 'xmax')
      node_ymax = SubElement(node_bndbox, 'ymax')
      node_xmin.text = str (self.xmin[i])
      node_xmax.text = str (self.xmax[i])
      node_ymin.text = str (self.ymin[i])
      node_ymax.text = str (self.ymax[i])
 
    xml = tostring(node_root, pretty_print=True) #Formatted display, the newline of the newline
    dom = parseString(xml)
    #if resized:
     # xml_path = self.second_f_path + "{}_rzd.xml".format(self.namef)
    #else:
    xml_path = self.folder_path + self.namef + ".xml"

    f = open(xml_path, "wb")
    f.write(xml)
    f.close()


In [2]:
#Function to visualize images in a folder
def visualize_imgs(path_to_folder):
  items = os.listdir(path_to_folder)
  for each_image in items:
    if each_image.endswith(".jpeg"):
      full_path = path_to_folder + each_image
      image = cv2.imread(full_path)
      image = cv2.cvtColor(image,cv2.COLOR_BGR2RGB)
      plt.figure()
      plt.imshow(image)
      plt.title(each_image)

def visualize_img(path_to_img):
  image = cv2.imread(path_to_img)
  image = cv2.cvtColor(image,cv2.COLOR_BGR2RGB)
  plt.figure()
  plt.imshow(image)

In [None]:
#!rm -r /content/train/
!mkdir train

tmp_ = Point_Source_Generator("/content/train/", -20, 35, 5000, "imgtest", 0.5, 2, 1)
tmp_.fill_counts()
tmp_.convert_to_jpg()

mkdir: cannot create directory ‘train’: File exists
File with name imgtest.jpeg generated.


In [None]:
counts = [100, 300, 400, 500, 1000, 3000, 5000]
B_ = [-40, -35, -30, -25, -20, -15, -10, -5, 0, 5, 10, 15, 20, 25, 30, 35, 40] #B goes from -50 to 50
L_ = [20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90, 95, 100] #L goes from 10 to 110
psf = [0.2, 0.5, 0.7, 1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0, 5.0]
const = [5.0, 7.0, 8.0, 9.0, 10.0] #note: noise to signal ratio for faintest source is 5, LAT should detect
num_src = [3, 5, 7]

In [None]:
from google.colab import drive
drive.mount("/content/drive")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
name_ = "img"
mit = -1
ids = 1

for i in range(15):  #images
  for ns in num_src:
    mit += 1
    name_new = name_ + str (mit)
    BB = []
    LL = []
    CC = []
    PSF = []
    bind = random.sample(range(0, len(B_)), ns)
    lind = random.sample(range(0, len(L_)), ns)
    cind = [random.randint(0, len(counts)-1)  for i in range(ns)]
    psfind = [random.randint(0, len(psf)-1)  for i in range(ns)]
    for i in bind:
      BB.append(B_[i])
    for j in lind:
      LL.append(L_[j])
    for k in psfind:
      PSF.append(psf[k])
    for m in cind:
      CC.append(counts[m])
    cnst = const[random.randint(0, len(const)-1)]
    print("cnst is ", cnst)

    tmp_ = Point_Source_Generator("/content/train/", BB, LL, CC, name_new, PSF, cnst, ns)
    tmp_.fill_counts()
    tmp_.convert_to_jpg()
    tmp_.get_num_pixels()
    tmp_.get_xyminmax()
    tmp_.gen_xml_annot()

cnst is  10.0
File with name img0.jpeg generated.
Total pixels: 250000, height pixels: 500, width pixels: 500
x coord of source #1 is 325.0 and y coord is 175.0
x coord of source #2 is 450.0 and y coord is 200.0
x coord of source #3 is 125.0 and y coord is 100.0
cnst is  5.0
File with name img1.jpeg generated.
Total pixels: 250000, height pixels: 500, width pixels: 500
x coord of source #1 is 450.0 and y coord is 100.0
x coord of source #2 is 325.0 and y coord is 450.0
x coord of source #3 is 275.0 and y coord is 275.0
x coord of source #4 is 150.0 and y coord is 300.0
x coord of source #5 is 300.0 and y coord is 50.0
cnst is  5.0
File with name img2.jpeg generated.
Total pixels: 250000, height pixels: 500, width pixels: 500
x coord of source #1 is 175.0 and y coord is 325.0
x coord of source #2 is 300.0 and y coord is 450.0
x coord of source #3 is 200.0 and y coord is 300.0
x coord of source #4 is 325.0 and y coord is 125.0
x coord of source #5 is 225.0 and y coord is 225.0
x coord of

In [None]:
!zip -r /content/TRAIN_XML.zip /content/train
!cp "/content/TRAIN_XML.zip" "/content/drive/MyDrive/DOESULI"

updating: content/train/ (stored 0%)
updating: content/train/img34.jpeg (deflated 1%)
updating: content/train/img9.xml (deflated 73%)
updating: content/train/img35.xml (deflated 84%)
updating: content/train/img23.jpeg (deflated 1%)
updating: content/train/img35.jpeg (deflated 68%)
updating: content/train/img2.jpeg (deflated 3%)
updating: content/train/img39.jpeg (deflated 2%)
updating: content/train/img23.xml (deflated 84%)
updating: content/train/img40.xml (deflated 80%)
updating: content/train/img17.xml (deflated 84%)
updating: content/train/img40.jpeg (deflated 1%)
updating: content/train/img15.xml (deflated 73%)
updating: content/train/img0.xml (deflated 73%)
updating: content/train/img32.jpeg (deflated 26%)
updating: content/train/img19.xml (deflated 80%)
updating: content/train/img38.jpeg (deflated 1%)
updating: content/train/img28.jpeg (deflated 2%)
updating: content/train/img34.xml (deflated 80%)
updating: content/train/img14.xml (deflated 83%)
updating: content/train/img19.jpe