In [1]:
import re
import os
import sys
import exceptions
import numpy as np
from PIL import Image
from tqdm import tqdm
import tensorflow as tf
import matplotlib.pyplot as plt
import thread
import time

%matplotlib inline



In [2]:
# get files in directory
def get_files(src_dir):
    f = []
    for (dirpath, dirnames, filenames) in os.walk(src_dir):
        f.extend(filenames)
        break
    return f

# create graph from pb file
def create_graph(pb_file, sess):
    with sess:        
        with tf.gfile.FastGFile(pb_file, 'rb') as f:
            graph_def = tf.GraphDef()
            graph_def.ParseFromString(f.read())
            tf.import_graph_def(graph_def, name='')
    return sess.graph

def get_image_metrics(f, dpi_scale=72):
    
    if os.path.isfile(f + '.correct.jpg'): 
        f += '.correct.jpg'
    
    i = Image.open(f)
    info = i.info
    
    if 'dpi' in info:
        dpi = info['dpi']
    elif 'jfif_density' in info:
        dpi = info['jfif_density']
    else:
        dpi = (0, 0)
    
    bb = i.getbbox()[2:]
    
    return \
        float(dpi[0]) / dpi_scale,\
        float(dpi[1]) / dpi_scale,\
        float(bb[0]) / float(bb[1])

In [3]:
INPUT_DIRS = ['/Users/seyoungoh/Downloads/final/train', '/Users/seyoungoh/Downloads/final/test']
OUTPUT_DIR_META = 'out/meta'

In [4]:
if not os.path.isdir(OUTPUT_DIR_META): os.makedirs(OUTPUT_DIR_META)

In [5]:
files = []
files = [INPUT_DIRS[0] + '/' + f for f in get_files(INPUT_DIRS[0])]
files.extend([INPUT_DIRS[1] + '/' + f for f in get_files(INPUT_DIRS[1])])

In [6]:
def calc_metrics(files):
    for f in files:
        m = re.findall(r'(\d+)\.jpg$', f)

        if m:
            fid = m[0]
            f_meta = OUTPUT_DIR_META + '/' + fid + '.npy'

            if not os.path.isfile(f_meta):
                metrics = get_image_metrics(f)
                metrics = np.array(metrics, dtype=np.float32)
                np.save(f_meta, metrics)

In [7]:
print (thread.start_new_thread(calc_metrics, (files[0:25000],)))
print (thread.start_new_thread(calc_metrics, (files[25000:50000],)))
print (thread.start_new_thread(calc_metrics, (files[50000:75000],)))
print (thread.start_new_thread(calc_metrics, (files[75000:],)))

139940083848960
139940075456256
139940067063552
139940058670848
