In [1]:
#Data loader, loads your .csv with image data
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import csv
import cv2 as cv
import random

In [2]:
#Enter absolute path to your dataset:
my_dir_path = "/home/zule/anaconda3/envs/AlpNum/Font-Recognizer"
data = pd.read_csv(my_dir_path + "/dataset_table.csv")

In [3]:
data.head()

Unnamed: 0,label,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,...,pixel6262,pixel6263,pixel6264,pixel6265,pixel6266,pixel6267,pixel6268,pixel6269,pixel6270,pixel6271
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [4]:
#Label Loader, loads .csv with font labels
label_file = my_dir_path + '/font_labels.csv'
label_dict = {}

with open(label_file,'r') as font_data_file:
    for name, label in csv.reader(font_data_file):
        label_dict[label] = name

In [11]:
#Data arange, shuffle and splitting into train and validation data
data = np.array(data)
m, n = data.shape
np.random.shuffle(data) # shuffle before splitting into dev and training sets


In [12]:
print(m, n)

11 6273


In [21]:
#Add range for data. One_hot(y) is not working because the array is empty. 
data_dev = data[0:int(n/2)+1].T
Y_dev = data_dev[0]
X_dev = data_dev[1:n] 
X_dev = X_dev / 255.

#Train data changed from 8k to 0, as the initial variable returns empty numpy array
#data_train = data[8000:m].T
data_train = data[int(n/2):].T
Y_train = data_train[0]
X_train = data_train[1:n] 
X_train = X_train / 255.
_,m_train = X_train.shape

In [18]:
#Weigth and bias params initialization
def init_params():
    W1 = np.random.rand(10, 6272) - 0.5
    b1 = np.random.rand(10, 1) - 0.5
    W2 = np.random.rand(10, 10) - 0.5
    b2 = np.random.rand(10, 1) - 0.5
    return W1, b1, W2, b2

#ReLU actiavtion function, done on Z[1]
def ReLU(Z):
    return np.maximum(Z, 0)

#Softmax actiavtion function, done on Z[2]
def softmax(Z):
    A = np.exp(Z) / sum(np.exp(Z))
    return A
    
def forward_prop(W1, b1, W2, b2, X):
    Z1 = W1.dot(X) + b1
    A1 = ReLU(Z1)
    Z2 = W2.dot(A1) + b2
    A2 = softmax(Z2)
    return Z1, A1, Z2, A2

#Derivation for back propagation
def ReLU_deriv(Z):
    return Z > 0

def one_hot(Y):
    one_hot_Y = np.zeros((Y.size, Y.max() + 1))
    one_hot_Y[np.arange(Y.size), Y] = 1
    one_hot_Y = one_hot_Y.T
    return one_hot_Y

def backward_prop(Z1, A1, Z2, A2, W1, W2, X, Y):
    one_hot_Y = one_hot(Y)
    dZ2 = A2 - one_hot_Y
    dW2 = 1 / m * dZ2.dot(A1.T)
    db2 = 1 / m * np.sum(dZ2)
    dZ1 = W2.T.dot(dZ2) * ReLU_deriv(Z1)
    dW1 = 1 / m * dZ1.dot(X.T)
    db1 = 1 / m * np.sum(dZ1)
    return dW1, db1, dW2, db2

def update_params(W1, b1, W2, b2, dW1, db1, dW2, db2, alpha):
    W1 = W1 - alpha * dW1
    b1 = b1 - alpha * db1    
    W2 = W2 - alpha * dW2  
    b2 = b2 - alpha * db2    
    return W1, b1, W2, b2

In [19]:
def get_predictions(A2):
    return np.argmax(A2, 0)

def get_accuracy(predictions, Y):
    print(predictions, Y)
    return np.sum(predictions == Y) / Y.size

def gradient_descent(X, Y, alpha, iterations):
    W1, b1, W2, b2 = init_params()
    number_of_iteration = []
    accuracy_of_iteration = []
    for i in range(iterations):
        Z1, A1, Z2, A2 = forward_prop(W1, b1, W2, b2, X)
        dW1, db1, dW2, db2 = backward_prop(Z1, A1, Z2, A2, W1, W2, X, Y)
        W1, b1, W2, b2 = update_params(W1, b1, W2, b2, dW1, db1, dW2, db2, alpha)
        if i % 10 == 0:
            print("Iteration: ", i)
            predictions = get_predictions(A2)
            number_of_iteration.append(i)
            print(get_accuracy(predictions, Y))
            accuracy_of_iteration.append(get_accuracy(predictions, Y))
    return W1, b1, W2, b2, number_of_iteration, accuracy_of_iteration

In [20]:
    W1, b1, W2, b2, iteration, accuracy = gradient_descent(X_train, Y_train, 0.10, 30)

ValueError: zero-size array to reduction operation maximum which has no identity

In [None]:
#Learning graph, accuracy over iterations

plt.title("Accuracy over time") 
plt.xlabel("Iteration") 
plt.ylabel("Accuracy") 
plt.plot(iteration,accuracy) 
plt.show()

In [None]:
#Use this code section for random images from booted dataset. For custom images, look down
def make_predictions(X, W1, b1, W2, b2):
    _, _, _, A2 = forward_prop(W1, b1, W2, b2, X)
    predictions = get_predictions(A2)
    return predictions

def test_prediction(index, W1, b1, W2, b2):
    current_image = X_train[:, index, None]
    prediction = make_predictions(X_train[:, index, None], W1, b1, W2, b2)
    label = Y_train[index]
    
    print(prediction)
    
    print("Prediction: ", label_dict[str(int(prediction))])
    print("Label: ", label_dict[str(int(label))])
    
    print("Prediction numerical label: ", prediction)
    print("Numerical label: ", label)
    
    current_image = current_image.reshape((28, 224)) * 255
    plt.gray()
    plt.imshow(current_image, interpolation='nearest')
    plt.show()
    
    if int(prediction) != label:
        print("Wrong prediction!")
        return False
    else:
        print("Correct prediction!")
        return True
   


In [None]:
test_prediction(15, W1, b1, W2, b2)

In [None]:
#Getting accuracy of random parts of booted dataset
num_of_iterations = 0
correct_count = 0

for iter in range(0, 50):
    rand_sample = random.randint(100, 2000)
    num_of_iterations += 1
    if test_prediction(rand_sample, W1, b1, W2, b2) == True:
        correct_count += 1        
accuracy = correct_count / num_of_iterations
print("Accuracy: " + str(accuracy))


In [None]:
#Use this code section for custom booted images
def make_predictions(X, W1, b1, W2, b2):
    _, _, _, A2 = forward_prop(W1, b1, W2, b2, X)
    predictions = get_predictions(A2)
    return predictions

def test_prediction(my_image, W1, b1, W2, b2):
    current_image = my_image
    prediction = make_predictions(my_image, W1, b1, W2, b2)
    
    print("Prediction: ", label_dict[str(int(prediction))])
    print("Prediction numerical label: ", prediction)
    
    current_image = current_image.reshape((28, 224)) * 255
    plt.gray()
    plt.imshow(current_image, interpolation='nearest')
    plt.show()

In [None]:
#Enter the name of your image, you want to check font of. 

#Your image has to be 224x28pixels!
image_name = "/home/zule/anaconda3/envs/AlpNum/Font-Recognizer/Renders/arial.ttf2420.png"
image = cv.imread(image_name)
cv_image = cv.cvtColor(np.array(image), cv.COLOR_RGB2BGR)
cv_image_gs = cv.cvtColor(cv_image, cv.COLOR_BGR2GRAY)

#If you have white background use this line of code
#cv_image_gs = ~cv_image_gs

image_array = np.asarray(cv_image_gs)
oneDimensionArray = image_array.flatten()
oneDimensionArray = oneDimensionArray / 255

oneDimensionArray = np.reshape(oneDimensionArray, (-1, 1))

test_prediction(oneDimensionArray, W1, b1, W2, b2)
plt.imshow(cv_image_gs, cmap='gray')





In [None]:
# Save params from a file
import pickle

# After training the model, save the parameters
def save_params(W1, b1, W2, b2):
    params = {"W1": W1, "b1": b1, "W2": W2, "b2": b2}
    
    with open(my_dir_path + "/params.pkl", "wb") as f:
        pickle.dump(params, f)

save_params(W1, b1, W2, b2)

print(W1, b1, W2, b2)

In [None]:
# Load parameters from a file
def load_params():
    with open(my_dir_path + "/params.pkl", "rb") as f:
        params = pickle.load(f)
        
    return params["W1"], params["b1"], params["W2"], params["b2"]

# Use it like this
W1, b1, W2, b2 = load_params()