In [None]:
## Changes from v1.1 to 1.2
# - adding naming system: <beach_name>_<original image name>_tile_<i>_<j>.JPG
# - create zooniverse manifest file automatically
#  - IDs should be labelled consecutively starting at 100000 for the first beach

import cv2
import numpy as np
#import matplotlib.pyplot as plt
import os
import math
import csv
#import operator

# debugging
from IPython.core.debugger import Pdb
pdb = Pdb()

#============================================================================================================
# EDIT: set these parameters according to your dataset 
beach_name = "30birubi"
beach_name_manifest = "Birubi"
geo_area_manifest = "New South Wales"
date_survey_manifest = "09/11/2017"
tile_id = 30*100000  # assuming less than 100'000 images per beach, the first number is the survey id, 
source_img_path = '../survey_data/30. Birubi/'  # set to '.' to read all images in current directory
save_path = os.path.join('zooniverse_tiles/', beach_name)
#============================================================================================================

# create output folder if it doesn't already exist
if not os.path.exists(save_path):
    os.makedirs(save_path)
print('Saving images to', save_path, '.')

# prepare manifest file
manifest_line = [['subject_id', 'image_name1', 'attribution', 'beach', 'region', 'date']]  # QUESTION: should the image name be an absolute or relative path?

# desired output image paramters
desired_output_width = 900
desired_output_height = 675
overlap = 0.1 # desired overlap of images - this will be adjusted to accomodate the exact desired output width and 
              # height since they have precedence
#print([f for f in os.listdir(source_img_path) if (os.path.isfile(os.path.join(source_img_path, f)))])
# loop through all .JPG files in directory. TODO: case-sensitive? Other file types?
images = [os.path.join(source_img_path, f) for f in os.listdir(source_img_path) if (os.path.isfile(os.path.join(source_img_path, f)) and f.lower().endswith('.jpg'))]
#print(images)
#images = images[1:3]

# loop through all images in directory
for input_file_name in images:
    # load image
    img = cv2.imread(input_file_name)

    # calculate number of output image tiles
    input_height, input_width = img.shape[0:2]
    print('Input image dimensions: w =', input_height, ', h =', input_width)

    horizontal_tiles = round((input_width - desired_output_width) / (desired_output_width * (1 - overlap)))# round down since we don't want to crop any parts of 
                                                                                 # the image or lose any accuracy.
    vertical_tiles = round((input_height - desired_output_height) / (desired_output_height * (1 - overlap)))

    # calculate the amount the overlap needs to be increased/ decreased to enable about the right number of tiles without going 
    # beyond the image boundaries
    tile_width_exact_fit = (input_width - desired_output_width) / horizontal_tiles
    tile_height_exact_fit = (input_height - desired_output_height) / vertical_tiles

    overlap_adjusted_horizontal = (desired_output_width - tile_width_exact_fit) / desired_output_width
    overlap_adjusted_vertical = (desired_output_height - tile_height_exact_fit) / desired_output_height

    #print('horizontal tiles:', horizontal_tiles + 1)
    #print('vertical tiles:', vertical_tiles + 1)
    #print('overlap_adjusted_horizontal:',overlap_adjusted_horizontal)
    #print('overlap_adjusted_vertical', overlap_adjusted_vertical)

    output_width = desired_output_width #input_width / horizontal_tiles
    output_height = desired_output_height #input_height / vertical_tiles
    #pdb.set_trace()
    print('Creating', horizontal_tiles + 1, '*', vertical_tiles + 1, '=', (horizontal_tiles + 1)*(vertical_tiles + 1), 'output image tiles.')
    print('Tile width:', output_width)
    print('Tile hight:', output_height)
    print('Horizontal Overlap:', '{0:.2f}'.format(overlap_adjusted_horizontal * 100), '%.')
    print('Vertical Overlap:', '{0:.2f}'.format(overlap_adjusted_vertical * 100), '%.')

    # pre-allocate image matrix
    tile = np.zeros((output_width, output_height))

    # loop through original image and save each tile
    # the + 1 is due to the python range not including the last element - we want to see where the that tile starts as well, though
    for i in range(0, horizontal_tiles + 1): 
            for j in range(0, vertical_tiles + 1):
                    #print(i, ':', i * output_width - i * output_width * overlap)
                    print('Processing file', input_file_name, ', tile (', i, ',', j, ').')
                    
                    # calculate tile begin in x and y direction
                    tile_begin_x = math.floor(i * output_width - i * output_width * overlap_adjusted_horizontal)
                    tile_end_x = math.floor(tile_begin_x + output_width)
                    tile_begin_y = math.floor(j * output_height - j * output_height * overlap_adjusted_vertical)
                    tile_end_y = math.floor(tile_begin_y + output_height)
                    print('(', i,',', j, ')', 'x:', tile_begin_x, tile_end_x, 'y:', tile_begin_y, tile_end_y)

                    # draw the lines
                    #cv2.line(img, (tile_begin_x, 0), (tile_begin_x, input_height), (0, 0, 255), 10)
                    #cv2.line(img, (0, tile_begin_y), (input_width, tile_begin_y), (0, 0, 255), 10)

                    tile = img[tile_begin_y:tile_end_y, tile_begin_x:tile_end_x]
                    #print('mean: ', np.mean(tile)) 

                  
                    # create folder for the input image, therein place all the tiles named 'tile<x>-<y>.jpg'
                    # save files for each image in an individual folder
                    # output_file_name = r'imgs/' + input_file_name.split('/')[1].split('.')[0] + '/tile' + str(i) + '-' + str(j) + '.jpg'                
                    # OR
                    # output_file_name = r'zooniverse/' + input_file_name.split('/')[1].split('.')[0] + '/' + input_file_name.split('/')[1].split('.')[0] + '_tile' + str(i) + '-' + str(j) + '.jpg'                

                    # OR save files in the same folder WHY THE 'r'?
                    output_file_name =  save_path + '/' + beach_name + '_' + input_file_name.split('/')[-1].split('.')[0] + '_tile' + str(i) + '_' + str(j) + '.jpg'                
                    
                    print('Saving tile (', i, ',', j, ') to ', output_file_name)
                    cv2.imwrite(output_file_name, tile)
                    
                    # write line to .csv file containing the zooniverse manifest, as described on their website
                    tile_id = tile_id + 1
                    manifest_line.append([str(tile_id), output_file_name.split('/')[-1] , 'Pete',
                                          beach_name_manifest, geo_area_manifest, date_survey_manifest])  # id
                    #manifest_line.append([[str(tile_id)], [input_file_name], ['Pete']])  # id
                    #manifest_line.append("second_line")  # file name
                    #manifest_line.append("attribution")  # owner
                    

print(manifest_line)
print(save_path)
with open(save_path + '_manifest.csv', 'w+', newline = '') as manifest_file:
    writer = csv.writer(manifest_file, delimiter =',')
    writer.writerows(manifest_line) 
                    
    #plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
    cv2.imwrite('imgcv.jpg', img) # .png for higher quality (huge though)
    #plt.show()

