In [1]:
import os
import matplotlib.pyplot as plt
from skimage import color
import numpy as np
from scipy.ndimage import gaussian_filter
from sklearn.model_selection import train_test_split

In [2]:
def get_folder_index(folder_name, unique_folders):
    """Get the index of the folder name in the list of unique folders."""
    return unique_folders.index(folder_name)

In [3]:
def apply_gaussian_blur(image_array, sigma=2):
    """Apply Gaussian blur to an image."""
    return gaussian_filter(image_array, sigma=sigma)

In [4]:
def apply_fft_and_inverse(im):
    """Apply FFT and inverse FFT on an image."""
    f_transform = np.fft.fft2(im)
    f_shift = np.fft.fftshift(f_transform)
    rows, cols = im.shape
    crow, ccol = rows // 2, cols // 2
    high_pass_filter_radius = 3
    f_shift[crow - high_pass_filter_radius:crow + high_pass_filter_radius,
            ccol - high_pass_filter_radius:ccol + high_pass_filter_radius] = 0
    f_ishift = np.fft.ifftshift(f_shift)
    img_back = np.fft.ifft2(f_ishift)
    img_back = np.abs(img_back)
    return img_back

In [5]:
# Define the folder names and paths
folders = [str(i) for i in range(10)] + [chr(i) for i in range(65, 91)] + [chr(i) + '_lower' for i in range(97, 123)]
characters_path = '/Users/gbutts/Desktop/Characters'
train_data_dir = '/Users/gbutts/Desktop/datasets/train_data'
test_data_dir = '/Users/gbutts/Desktop/datasets/test_data'

# Create directories if they don't exist
for dir_path in [train_data_dir, test_data_dir]:
    os.makedirs(dir_path, exist_ok=True)

In [6]:
import os
import numpy as np
import matplotlib.pyplot as plt
from skimage import color
from sklearn.model_selection import train_test_split

# Function definitions for apply_gaussian_blur, apply_fft_and_inverse, create_one_hot_encoding
# Add these function definitions here...

def save_data(data_label_pairs, data_dir, prefix):
    filename = os.path.join(data_dir, f'{prefix}.npy')
    np.save(filename, data_label_pairs)


# Loop through each folder and process the images
for folder in folders:
    print(f"{folder} start")
    folder_path = os.path.join(characters_path, folder)
    if not os.path.exists(folder_path):
        continue
        
    image_counter = 0  # Initialize a counter for each folder

    all_data = []
    all_labels = []

    for image_file in os.listdir(folder_path):
        if image_file.endswith('.png'):
            
            if image_counter >= 1000:  # Check if 1000 images have already been processed
                break 
                
            image_path = os.path.join(folder_path, image_file)  # get path to image
            image = plt.imread(image_path)  # load image
            gray_image = color.rgb2gray(image)  # greyscale image
            gray_blurred = apply_gaussian_blur(gray_image)  # new gaussian blur image
            
            img_back_original = apply_fft_and_inverse(gray_image)  # ifft original image
            img_back_gaussian = apply_fft_and_inverse(gray_blurred)  # ifft gaussian blur image

            two_channel_image_original = np.stack((gray_image, img_back_original), axis=-1)  # stack original image and ifft
            two_channel_image_gaussian = np.stack((gray_blurred, img_back_gaussian), axis=-1)  # stack gaussian blur and ifft

            label = get_folder_index(folder, folders)
            all_data.append(two_channel_image_original)
            all_data.append(two_channel_image_gaussian)
            all_labels.append(label)
            all_labels.append(label)
            
            image_counter += 1  # Increment the counter

    # Split data into training and testing sets for each folder
    data_label_pairs = list(zip(all_data, all_labels))

    # Split data into training and testing sets for each folder
    train_pairs, test_pairs = train_test_split(
        data_label_pairs, train_size=0.8
    )

    # Save training and testing data/label pairs for each folder
    save_data(train_pairs, train_data_dir, f"{folder}_train")
    save_data(test_pairs, test_data_dir, f"{folder}_test")

    print(f"{folder} end")

print("Processing and splitting complete.")

0 start


  arr = np.asanyarray(arr)


0 end
1 start
1 end
2 start
2 end
3 start
3 end
4 start
4 end
5 start
5 end
6 start
6 end
7 start
7 end
8 start
8 end
9 start
9 end
A start
A end
B start
B end
C start
C end
D start
D end
E start
E end
F start
F end
G start
G end
H start
H end
I start
I end
J start
J end
K start
K end
L start
L end
M start
M end
N start
N end
O start
O end
P start
P end
Q start
Q end
R start
R end
S start
S end
T start
T end
U start
U end
V start
V end
W start
W end
X start
X end
Y start
Y end
Z start
Z end
a_lower start
a_lower end
b_lower start
b_lower end
c_lower start
c_lower end
d_lower start
d_lower end
e_lower start
e_lower end
f_lower start
f_lower end
g_lower start
g_lower end
h_lower start
h_lower end
i_lower start
i_lower end
j_lower start
j_lower end
k_lower start
k_lower end
l_lower start
l_lower end
m_lower start
m_lower end
n_lower start
n_lower end
o_lower start
o_lower end
p_lower start
p_lower end
q_lower start
q_lower end
r_lower start
r_lower end
s_lower start
s_lower end
t_lower st