In [1]:
import pandas as pd
import numpy as np
import cv2
import os
from PIL import Image, ImageOps
import ntpath

In [47]:
# 10 .
# 11 +
# 12 -
# 13 *
# 14 /
# 15 =

In [2]:
def convert_images_to_png(folder, output_dir):
    print('Folder: ' + folder)
    images = []
    for filename in os.listdir(folder):
        img = cv2.imread(os.path.join(folder,filename))
        if img is not None:
            print(filename)
            images.append(img)

        os.makedirs(output_dir, exist_ok=True)
        cv2.imwrite(os.path.join(output_dir, os.path.splitext(filename)[0] + '.png'), img)
    
    print(f'Total images converted: {len(images)}')
    print('Finished')
    print('================================================================================')

In [3]:
convert_images_to_png('./raw_data/jpg', './raw_data/png')
print('finished')

Folder: ./raw_data/jpg
IMG_3033.jpg
IMG_3034.jpg
IMG_3035.jpg
Total images converted: 3
Finished
finished


In [4]:
def tranfer_folders():
    target_file_dir = './dataset'
    os.makedirs(target_file_dir, exist_ok = True)
    target_eval_dir = 'eval'
    target_train_dir = 'train'
    target_eval_dir = os.path.join(target_file_dir, target_eval_dir)
    target_train_dir = os.path.join(target_file_dir, target_train_dir)
    os.makedirs(target_eval_dir, exist_ok = True)
    os.makedirs(target_train_dir, exist_ok = True)

    eval_dir = './archive/eval'
    train_dir = './archive/train'
    #train Folder
    for folder in os.listdir(train_dir):
        convert_images_to_png(os.path.join(train_dir, folder), os.path.join(target_train_dir, folder))
    #eval Folder
    for folder in os.listdir(eval_dir):
        convert_images_to_png(os.path.join(eval_dir, folder), os.path.join(target_eval_dir, folder))

In [5]:
def rescale_images_to_1d_array(img, model_image_width, model_image_height, flatten = False):
    img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    model_image_size = (model_image_height, model_image_width)  # (height, width)
    mat = Image.fromarray(img) # covert image from numpy array to pillow
    img_width, img_height = mat.size
    mat = ImageOps.scale(mat, float(min(float(model_image_height / img_height), float(model_image_width / img_width))))
    mat = ImageOps.pad(mat, model_image_size)
    rescale_img = np.array(mat)
    if flatten == True:
        return rescale_img.flatten()
    else:
        return rescale_img

In [6]:
def img_to_pdDataFrame(img, value):
    rescaled_image = rescale_images_to_1d_array(img, 28, 28, flatten = True)
    rescaled_image = np.insert(rescaled_image, 0, value)
    df = pd.DataFrame(np.array([rescaled_image]))
    return df

In [7]:
def merge_to_one_dataFrame(input_dir, value):
    df = pd.DataFrame()
    foldername = ntpath.basename(input_dir)
    print('Foldername: ' + foldername)
    total = len(os.listdir(input_dir))
    print(f"Totel files: {total}")
    for file in os.listdir(input_dir):
        img = cv2.imread(os.path.join(input_dir,file))
        if img is None:
            print('Filename: ' + filename + ' is none.')
            continue
        df_1 = img_to_pdDataFrame(img, value)
        df = df.append(df_1,ignore_index=True)
    print('================================================================================')
    return df

In [8]:
def merge_all_imgs(input_dir):
    df = pd.DataFrame()
    count = 0
    for file in os.listdir(input_dir):
        value = -1
        if file == 'decimal':
            value = 10
            continue
        elif file == 'plus':
            value = 11
            continue
        elif file == 'minus':
            value = 12
            continue
        elif file == 'times':
            value = 13
            continue
        elif file == 'div':
            value = 14
            continue
        elif file == 'equal':
            value = 15
            continue
        elif file == 'original number' or file == 'original sign' or file == 'other number' or file == 'other sign' :
            continue
        else:
            value = file
        print('Filename: ' + file)
        df_1 = merge_to_one_dataFrame(os.path.join(input_dir, file), value)
        df = df.append(df_1, ignore_index=True)
    print('Finished')
    return df

In [54]:
#Run train
input_dir = './dataset/train'
train_df = merge_all_imgs(input_dir)
#Run eval
input_dir = './dataset/eval'
eval_df = merge_all_imgs(input_dir)
#save
train_df.to_csv('./train_without_symbol.csv',index=False)
eval_df.to_csv('./eval_without_symbol.csv',index=False)

Filename: 0
Foldername: 0
Totel files: 426
Filename: 1
Foldername: 1
Totel files: 432
Filename: 2
Foldername: 2
Totel files: 430
Filename: 3
Foldername: 3
Totel files: 429
Filename: 4
Foldername: 4
Totel files: 431
Filename: 5
Foldername: 5
Totel files: 431
Filename: 6
Foldername: 6
Totel files: 429
Filename: 7
Foldername: 7
Totel files: 430
Filename: 8
Foldername: 8
Totel files: 429
Filename: 9
Foldername: 9
Totel files: 430
Finished
Filename: 0
Foldername: 0
Totel files: 52
Filename: 1
Foldername: 1
Totel files: 55
Filename: 2
Foldername: 2
Totel files: 54
Filename: 3
Foldername: 3
Totel files: 54
Filename: 4
Foldername: 4
Totel files: 54
Filename: 5
Foldername: 5
Totel files: 54
Filename: 6
Foldername: 6
Totel files: 53
Filename: 7
Foldername: 7
Totel files: 54
Filename: 8
Foldername: 8
Totel files: 54
Filename: 9
Foldername: 9
Totel files: 54
Finished


In [10]:
#Run eval
input_dir = './raw_data/png'
eval_df = merge_all_imgs(input_dir)
#save
eval_df.to_csv('./raw_data_eval.csv',index=False)

Filename: 2
Foldername: 2
Totel files: 1
Filename: 3
Foldername: 3
Totel files: 1
Filename: 4
Foldername: 4
Totel files: 1
Finished
