In [3]:
import os
from scipy import misc
import numpy as np
from PIL import Image
import random
import json
import pickle

# note to self: clean stuff like bodoni std bold such that images aren't .png.png

# Input: Image Path
# Output: Image
# Resizes image to height of 105px. Maintains aspect ratio
def resize_image(image_path):    
    base_height = 105
    img = Image.open(image_path)
    height_percent = (base_height/float(img.size[1]))
    wsize = int((float(img.size[0])*float(height_percent)))
#     print("Width", wsize)
    img = img.resize((wsize, base_height),Image.ANTIALIAS )
    return img

# Input: Image object
# Output: A list of images.
# Randomly generates 15 cropped images
def generate_crop(img):
    num_vals = 15
    cropped_images = []
    width = len(np.array(img)[1])
    # 120 is 105 + 15; we need at least 15 random crops possible, thus the width must be greater than 120
    # in the condition when width < 120, we shoould find a way to edit the image rather than omitting it
    if width > 120:
        bounds = random.sample(range(0, width-105), 15)
        for i in range(num_vals):
            new_img = img.crop((bounds[i], 0, bounds[i] + 105, 105))
    #         new_img.save("crop" + str(i) + ".jpg", format='JPEG')
            cropped_images.append(new_img)
    return cropped_images

# Input: Root directory (string)
# Output: Dictionary. Key is font name, value is a 3D
# array that contains a list of images of shape (number of images x 105 x 105)
def crop_files(root_dir):
    dictionary = {}
    image_data = []
    for subdir in os.listdir(root_dir): # goes through all font folders
        subdir_path = root_dir + "/" + subdir
        font_name = subdir.split("-")[0]
        if font_name not in dictionary:
            dictionary[font_name] = []
            print(font_name)
        for file in os.listdir(subdir_path): # goes through all sample images
            image_path = subdir_path + "/" + file
            image = resize_image(image_path)
            cropped_images = generate_crop(image)
            for c in cropped_images:
                arr = np.array(c)
                dictionary[font_name].append(arr)
    return dictionary
                
            
def main():
    print("Running preprocessing...")
    root_dir = 'C:/Users/katsa/Documents/cs/cs1470/WhatTheFont/data/real_images/VFR_real_test'
#     get_files(root_dir)
    cropped_images = crop_files(root_dir)
    
#     print(cropped_images["TimesNewRomanMTStd"])
    
#     with open('font_samples.json', 'w') as fp:
# #         json.dump(cropped_images, fp)
#         json_dump = json.dumps(cropped_images, fp, indent=4, cls=NumpyEncoder)
    
    with open('fonts.pkl', 'wb') as output:
    # Pickle dictionary using protocol 0.
        pickle.dump(cropped_images, output)
    
    print("Finished preprocessing.")
    
#     root_dir = 'C:/Users/katsa/Documents/cs/cs1470/WhatTheFont/data/real_images/VFR_real_test/ACaslonPro-Bold/ACaslonPro-Bold1957.png'
#     img = resize_image(root_dir)
#     cropped_images = generate_crop(img)
    
#     for i in range(15):
#         cropped_images[i].save("crop" + str(i) + ".jpg", format='JPEG')


if __name__ == "__main__":
    main()
        




Running preprocessing...
ACaslonPro
AJensonPro
AldusLTStd
AmigoStd
AnnaStd
AntiqueOliveStd
ApolloMTStd
ArcanaGMMStd
ArnoPro
AshleyScriptMTStd
AuriolLTStd
BakerSignetStd
BalzanoStd
BancoStd
BansheeStd
BaskervilleCyrLTStd
BeeskneesStd
BellCentennialStd
BellGothicStd
BellMTStd
BemboStd
BenguiatGothicStd
BerkeleyStd
BerlingLTStd
BermudaLPStd
BernhardModernStd
BernhardStd
BiffoMTStd
BodoniStd
BossaNovaMVBStd
BriemScriptStd
BrunoJBStd
BrushScriptStd
BulmerMTStd
CaeciliaLTStd
CaflischScriptPro
CalcitePro
CalibanStd
CalvertMTStd
CantoriaMTStd
CarolinaLTStd
CascadeScriptLTStd
Caslon224Std
CastellarMTStd
CaxtonStd
CelestiaAntiquaStd
CentaurMTStd
CenturyStd
CerigoStd
ChaparralPro
CharlemagneStd
CharmeStd
CheltenhamStd
ClairvauxLTStd
ClearfaceGothicLTStd
CloisterStd
CongaBravaStd
CongaBravaStencilStd
CopperplateGothicStd
CoronetLTStd
CourierStd
CritterStd
CronosPro
CushingStd
CutoutStd
DelphinLTStd
DidotLTStd
DINEngschriftStd
DINMittelschriftStd
DINNeuzeitGroteskStd
DiotimaLTStd
DiskusLTStd
Dorche

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)

