# Finding timelaps photos

Use OCR to find metadata, and compare "T" (timelaps) flag with standard flags to find timelaps photos and move them

In [0]:
from google.colab import drive
drive.mount('/content/gdrive')

In [0]:
!apt install tesseract-ocr
!pip install pytesseract

In [0]:
%cd /content/gdrive/My\ Drive/CNBE01

Essential functions, this part only work for Reconyx cameras for now

In [0]:
from PIL import Image
from PIL.ExifTags import TAGS, GPSTAGS
import pytesseract as tes # this is the key package used in this OCR
import os
from pathlib import Path
import numpy as np

def extract_data_Reconyx(Img_dir,entries_range = {"date_time":(0,0,750,50),
                          "sequence_OCR":(750,0,1000,50),
                          "temp":(1720,0,1920,50), 
                          "site":(0,1030,400,1080)} ,timelapstag = "T",timelaps_dest = "./timelaps"): # @Img_dir is the path of image, @entries_range is the meta-data want to extract, should be an dictionary, with range of that metadata, see Reconyx_entries for an example
    Img = Image.open(Img_dir) # open the image
    res = {} # result dictionary
    exif_data = Img._getexif()
    
    for entry in list(entries_range.keys()):
        img_temp = Img.crop(entries_range[entry]) # crop out the part of interest
        text = tes.image_to_string(img_temp) # get the meta-data
        res[entry] = text
        
    res["original_path"] = Img_dir
    res["current_path"] = Img_dir
    flag_M = Image.open("/content/gdrive/My Drive/sequence_flag_M.JPG") # standard M flag
    flag_T = Image.open("/content/gdrive/My Drive/sequence_flag_T.JPG") # standard T flag (timelaps)
    flag_M_array = np.array(flag_M.convert("1"))
    flag_T_array = np.array(flag_T.convert("1")) # change to binary
    flag_curr = Img.crop((800,0,845,30))
    flag_curr_array = np.array(flag_curr.convert("1")) # change to binary for the flag part
    pixels = flag_M.size[0]*flag_M.size[1] # how many pixels to compare
    series_flags = ("unknown","M","T") # options of flags 
    res["series_flag"]=series_flags[(sum(sum(flag_M_array==flag_curr_array))==pixels)+
                                      2*(sum(sum(flag_T_array==flag_curr_array))==pixels)] # find the flag by perfect matching
    if (res["series_flag"]==timelapstag):
      os.rename(Img_dir, Img_dir.replace("./",timelaps_dest+"/timelaps_") ) # move timelaps to timelaps folder
      res["current_path"] = Img_dir.replace("./",timelaps_dest+"/timelaps_") # make sure we still know where it is 
    res["date_time_exif"] = exif_data[306] # extract date and time using exif, suppose we could extract sequence but I did not find it
    return(res)



In [0]:
import glob
import json
def extract_data_Reconyx_batch(entries_range = {"date_time":(0,0,750,50),
                          "sequence":(750,0,1000,50),
                          "temp":(1720,0,1920,50), 
                          "site":(0,1030,400,1080)} ,timelapstag = "T",timelaps_dest = "./timelaps"):
  photo_dirs = glob.glob("./*.[jJ][pP][gG]") # all photos in the working dirctory
  res = {}
  err = {}
  if not (os.path.exists(timelaps_dest)): # if timelaps folder does not exist, make one
    os.mkdir(timelaps_dest)
  for photo_dir in photo_dirs: # loop over photos
    try:
      temp = extract_data_Reconyx(photo_dir,entries_range,timelapstag,timelaps_dest)
      res[photo_dir.replace("./","")] = temp
    except Exception as e:
      err[photo_dir.replace("./","")] = e
  with open('Metadata-OCR.json', 'w') as fp: # write down metadata
    json.dump(res, fp)
  with open('err.json', 'w') as fperr:
    json.dump(err, fperr)



In [0]:
extract_data_batch()

In [0]:
def timelaps_filter_Reconyx(path_list): # absolute path list please.
  for path in path_list:
    os.chdir(path)
    extract_data_Reconyx_batch()