# Generate images

In [9]:
#imports
import os
import glob
#restrict gpu usage
os.environ["CUDA_VISIBLE_DEVICES"]="3"

from PIL import Image
import cv2
import bezier
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib
import matplotlib.pyplot as plt
from matplotlib.pyplot import figure
from matplotlib.patches import Rectangle

# setting theme
#px = 1/plt.rcParams['figure.dpi']
#sns.set(style=None, rc={'figure.figsize':(256*px, 256*px)})

# generate random floating point values
from numpy.random import seed, rand, randint
from random import gauss

import fnmatch

# clear folders and files

In [2]:
for i in (['workspace/mobile/images/multi_lines/imgs/*', 
          'workspace/mobile/images/multi_lines/train/*', 
          'workspace/mobile/images/multi_lines/test/*']):
    files = glob.glob(i)
    for f in files:
        os.remove(f)
    
os.remove('workspace/mobile/images/multi_lines/train.csv')
os.remove('workspace/mobile/images/multi_lines/test.csv')

# Generate XML

In [3]:
# for xml generation

import xml.etree.cElementTree as ET

def xmlgen2(foln, fn, path, a_path, width, height, random_nodes, length):

    root = ET.Element("annotation")
    folder = ET.SubElement(root, "folder").text = foln
    filename = ET.SubElement(root, "filename").text = fn+'.png'
    path = ET.SubElement(root, "path").text = path
    source = ET.SubElement(root, "source") 
    
    ET.SubElement(source, "database").text = "unknown"
    
    size = ET.SubElement(root, "size")

    ET.SubElement(size, "width").text = width
    ET.SubElement(size, "height").text = height
    
    segmented = ET.SubElement(root, "segmented").text = "0"
    _object = ET.SubElement(root, "_object")
    
    ET.SubElement(_object, "name").text = fn
    ET.SubElement(_object, "pose").text = "Unspecified"
    
    lengths = ET.SubElement(_object, "lengths")
    
    for i,j in enumerate(length):
        ET.SubElement(lengths, "l_{}".format(i)).text = str(j)
        
    ET.SubElement(_object, "truncated").text = "0"
    ET.SubElement(_object, "difficult").text = "0"
    
    bndboxs = ET.SubElement(_object, "bndboxs")
    
    for i,j in enumerate(random_nodes):
        xmin, ymin, xmax, ymax = j
        
        hook = ET.SubElement(bndboxs, "b_{}".format(i))
        ET.SubElement(hook, "xmin").text = str(xmin)
        ET.SubElement(hook, "ymin").text = str(ymin)
        #ET.SubElement(hook, "x_2").text = str(x_2)
        ET.SubElement(hook, "xmax").text = str(xmax)
        ET.SubElement(hook, "ymax").text = str(ymax)
        #ET.SubElement(hook, "y_2").text = str(y_2)
    
    tree = ET.ElementTree(root)
    tree.write(a_path)

# get all raw images 
def xmlmaker2(arr):
        fn, random_nodes, length = arr
        
        width = str(image_width)
        height = str(image_height)

        foln = "./workspace/mobile/images/multi_lines/"
        fn = fn
        path = "{}/imgs/{}.png".format(foln, fn)
        a_path = "{}/imgs/{}.xml".format(foln, fn)
        xmlgen2(foln, fn, path, a_path, width, height, random_nodes, length)
        
#xmlmaker()

# line maker

In [4]:
image_height = image_width = 400
# for 400x400, inches = 5.56,5.56
# for 600x600, inches = 8.334,8.334
# for 144x144, inches = 2,2

def multi_line_datasetGen(_, randLines):
        fig = plt.gcf()
        fig.set_size_inches(5.56,5.56)
        ax = plt.subplot()

        ax.set_axis_off()

        plt.axis([0, image_height, 0, image_width])
        plt.tight_layout(pad=0)

        #randLines = np.random.randint(iStart, iStop)
        
        random_nodes = []
        curveLength = []
    
        
        for i in range(randLines):
            r_nodes = np.random.uniform(0,image_width,[2,2])
            
            #print(r_nodes)
            con = np.concatenate((r_nodes[0], r_nodes[1]))
            random_nodes.append(con)

            curve = bezier.Curve(r_nodes, degree=1)
            curveLength.append(curve.length)

            curve.plot(num_pts=256, ax=ax)
        #plt.show()
        #print(fig)
            
        
        # Save just the portion _inside_ the second axis's boundaries
        fn = 'li_{}'.format(_)
        ax.figure.savefig('./workspace/mobile/images/multi_lines/imgs/{}.png'.format(fn))

        cap_data = np.array([fn, random_nodes, curveLength], dtype="object")
        xmlmaker2(cap_data)

        #raw.append(cap_data)

        plt.close('all')
        
        
#multi_line_datasetGen("s", 3)

In [5]:
def exec_file_maker():
    counter = -1
    for _ in range(5000):
        counter = counter + 1 if counter < 3 else 0
        #print(counter)
        multi_line_datasetGen(_, counter)

exec_file_maker()

# Partition dataset

In [38]:
# Imports

import os
import re
from shutil import copyfile
import argparse
import math
import random

# Partition images

def partition_images(source, dest, ratio=0.1, copy_xml=False):
    source = source
    dest = dest
    train_dir = os.path.join(dest, 'train')
    test_dir = os.path.join(dest, 'test')

    if not os.path.exists(train_dir):
        os.makedirs(train_dir)
    if not os.path.exists(test_dir):
        os.makedirs(test_dir)

    images = [f for f in os.listdir(source)
              if re.search(r'([a-zA-Z0-9\s_\\.\-\(\):])+(.jpg|.jpeg|.png)$', f)]

    num_images = len(images)
    num_test_images = math.ceil(ratio*num_images)

    for i in range(num_test_images):
        idx = random.randint(0, len(images)-1)
        filename = images[idx]
        copyfile(os.path.join(source, filename),
                 os.path.join(test_dir, filename))
        if copy_xml:
            xml_filename = os.path.splitext(filename)[0]+'.xml'
            copyfile(os.path.join(source, xml_filename),
                     os.path.join(test_dir,xml_filename))
        images.remove(images[idx])

    for filename in images:
        copyfile(os.path.join(source, filename),
                 os.path.join(train_dir, filename))
        if copy_xml:
            xml_filename = os.path.splitext(filename)[0]+'.xml'
            copyfile(os.path.join(source, xml_filename),
                     os.path.join(train_dir, xml_filename))

#partition_images("./workspace/mobile/images/multi_lines/imgs", "./workspace/mobile/images/multi_lines")
partition_images("./workspace/mobile/datasets/3CP/orig", "./workspace/mobile/datasets/3CP")

# XML to CSV

In [7]:
import os
import glob
import pandas as pd
import io
import xml.etree.ElementTree as ET

def xml_to_csv2(path):
    xml_list = []
    for xml_file in glob.glob(path + '/*.xml'):
        tree = ET.parse(xml_file)
        root = tree.getroot()
        #print(root.findall('_object'))
        for member in root.findall('_object'):
            # if no line
            if(not member[2]):
                value = (member[0].text,
                     int(float(root.find('size')[0].text)),
                     int(float(root.find('size')[1].text)),
                     str(0),
                     str(0),
                     str(0),
                     str(0),
                     str(0),
                     str(0),
                     #str(0),
                     #str(0)
                     )
                
                xml_list.append(value)
            else:
                for i,j in enumerate(member[2]):
                    value = (member[0].text,
                         int(float(root.find('size')[0].text)),
                         int(float(root.find('size')[1].text)),
                         member[2][i].text,
                         str(1),
                         float(member[5][i][0].text),
                         float(member[5][i][1].text),
                         float(member[5][i][2].text),
                         float(member[5][i][3].text),
                         #float(member[5][i][4].text),
                         #float(member[5][i][5].text)
                         )
                    
                    xml_list.append(value)
    column_name = ['filename', 'width', 'height', 'length', 
                   'class', 'xmin', 'xmax', 'ymin', 'ymax']
    #'class', 'x_0', 'x_1', 'x_2', 'y_0', 'y_1', 'y_2',
    xml_df = pd.DataFrame(xml_list, columns=column_name)
    
    return xml_df

for i in ["test", "train"]:
    examples = xml_to_csv2("workspace/mobile/images/multi_lines/{}".format(i))
    #print(examples)

    examples.to_csv("workspace/mobile/images/multi_lines/{}.csv".format(i), index=None)

In [8]:
def shuffler():
    for i in ["test", "train"]:
        df = pd.read_csv("workspace/mobile/images/multi_lines/{}.csv".format(i))
        ds = df.sample(frac=1)
        ds.to_csv("workspace/mobile/images/multi_lines/{}.csv".format(i), index=None)
        
#shuffler()

In [40]:
# Split CSV according to train/test folder

train_img = "./workspace/mobile/datasets/3CP/train"
test_img = "./workspace/mobile/datasets/3CP/test"
total_files = fnmatch.filter(os.listdir(test_img), '*.png')

csvd = "./workspace/mobile/datasets/3CP/orig.csv"

df=pd.read_csv(csvd, sep=',',header=0)

orig_filenames = []
for i in total_files:
    orig_filenames.append(os.path.splitext(i)[0])

x = df.loc[df['filename'].isin(orig_filenames)]
x.to_csv("./workspace/mobile/datasets/3CP/test.csv", index=False)