<div align="center">

<p align="center">
    <img
        src="./Assets/Turkaracter Logo.png"
        raw=true
        alt="turkaracter_Logo"
        style="width: 128px; height: 128px" />
</p>

<h1 align="center">
    TurKaracter: Entwicklung eines Erkennungssystems von türkischen Handschriften unter Verwendung von künstlichen neuronalen Netzwerken
</h1>
<h3 align="center">
    Bachelor Abschlussarbeit von Abdurrahman ODABAŞI
</h3>
<h3 align="center">
    Betreuer: Dr. Ahmet YILDIZ (Türkisch-Deutsche Universität)
</h3>
<h3 align="center">
    Industriepartner: BIS Çözüm Bilgisayar ve Entegrasyon Hiz. ve Tic. A.Ş.
</h3>
<h3 align="center">
    Bearbeitungszeitraum 04.11.2021 - 01.07.2022
</h3>
</div>

## 0. Imports & Setup

In [1]:
import numpy as np
import os
import cv2
from keras.models import load_model
from statistics import mean

In [2]:
from ImageProcessingMethods import grayscale, invert, binarize, segment_to_lines, segment_to_words, segment_to_chars, repairShapeOfCharacter, add_borders

from InferenceMethods import segmentTextToChars, predictText, decodeLabel, autoCorrectText

## 1. Similarity Metrics

In [3]:
from textdistance import hamming, levenshtein, jaro_winkler, ratcliff_obershelp

def hamming_similarity(a, b):
    return hamming.normalized_similarity(a, b)

def levenshtein_similarity(a, b):
    return levenshtein.normalized_similarity(a, b)

def jaro_winkler_similarity(a, b):
    return jaro_winkler(a, b)

def ratcliff_obershelp_similarity(a, b):
    return ratcliff_obershelp(a, b)

## 2. Testing the models with realistic Test Set

### 2.0. Implementing Testing Method

In [4]:
def test_model(model, verbose=0, show_corrections=False):
    ro_similarity_ex1 = []
    corrected_ro_similarity_ex1 = []
    confidence_ex1 = []
    ro_similarity_ex2 = []
    corrected_ro_similarity_ex2 = []
    confidence_ex2 = []
    predicted_strings = []
    for index in range(1,3,1):
    #setting the path to the directory containing the pics
        path = f"./Actual Testset/{index}/"

        for img in os.listdir(path):
            pic = cv2.imread(os.path.join(path,img))
            allChars = segmentTextToChars(pic, verbose=verbose)
            recognized_text, confidence = predictText(allChars, model, show_top_k=False, k=2)
            recognized_text = recognized_text.strip()
            corrected_text = autoCorrectText(recognized_text.lower())
            if index == 1:
                confidence_ex1.append(confidence)
                ro_similarity = ratcliff_obershelp_similarity("gönlüm vazo diyorsun jilet fıstıkçı şehap bahane ciğer twix", recognized_text.lower())
                ro_similarity_corrected = ratcliff_obershelp_similarity("gönlüm vazo diyorsun jilet fıstıkçı şehap bahane ciğer twix", corrected_text)
                ro_similarity_ex1.append(ro_similarity)
                if ro_similarity_corrected > ro_similarity:
                    if show_corrections == True:
                        print(recognized_text.lower())
                        print(corrected_text)
                        print("Ratcliff Obershelp Similarity of predicted text = ",round(ro_similarity * 100, 4), "%")
                        print("Ratcliff Obershelp Similarity of corrected text = ",round(ro_similarity_corrected * 100, 4), "%")
                        print("====================================")
                    corrected_ro_similarity_ex1.append(ro_similarity_corrected)
                    predicted_strings.append(corrected_text)
                else:
                    corrected_ro_similarity_ex1.append(ro_similarity)
                    predicted_strings.append(recognized_text.lower())
            elif index == 2:
                confidence_ex2.append(confidence)
                ro_similarity = ratcliff_obershelp_similarity("sözleşmenin bir örneğini elden teslim aldım", recognized_text.lower())
                ro_similarity_corrected = ratcliff_obershelp_similarity("sözleşmenin bir örneğini elden teslim aldım", corrected_text)
                ro_similarity_ex2.append(ro_similarity)
                if ro_similarity_corrected > ro_similarity:
                    if show_corrections == True:
                        print(recognized_text.lower())
                        print(corrected_text)
                        print("Ratcliff Obershelp Similarity of predicted text = ",round(ro_similarity * 100, 4), "%")
                        print("Ratcliff Obershelp Similarity of corrected text = ",round(ro_similarity_corrected * 100, 4), "%")
                        print("====================================")
                    predicted_strings.append(corrected_text)
                    corrected_ro_similarity_ex2.append(ro_similarity_corrected)
                else:
                    predicted_strings.append(recognized_text.lower())
                    corrected_ro_similarity_ex2.append(ro_similarity)
    ro_similarity_ex1_mean = mean(ro_similarity_ex1)
    ro_similarity_ex2_mean = mean(ro_similarity_ex2)
    confidence_ex1_mean = mean(confidence_ex1)
    confidence_ex2_mean = mean(confidence_ex2)
    overall_ro_similarity = mean(ro_similarity_ex1 + ro_similarity_ex2)
    overall_corrected_ro_similarity = mean(corrected_ro_similarity_ex1 + corrected_ro_similarity_ex2)
    # overall_ro_similarity = (ro_similarity_ex1_mean + ro_similarity_ex2_mean) / 2
    overall_confidence = (confidence_ex1_mean + confidence_ex2_mean) / 2
    print("Overall Ratcliff Obershelp Similarity without AutoCorrection = ", round(overall_ro_similarity * 100, 4), "%")
    print("Overall Ratcliff Obershelp Similarity with AutoCorrection = ", round(overall_corrected_ro_similarity * 100, 4), "%")
    print("Overall Confidence = ", round(overall_confidence * 100, 4), "%")
    return predicted_strings, overall_ro_similarity, overall_corrected_ro_similarity, ro_similarity_ex1, ro_similarity_ex2, ro_similarity_ex1_mean, ro_similarity_ex2_mean, overall_confidence, confidence_ex1_mean, confidence_ex2_mean

### 2.1. The raw model (1.5) for Combination of (Cropped And Resized NIST19 (OpenCV) + Padded T-H-E)

In [5]:
model = load_model("Models/baseline_model_1_5.h5")

predicted_strings, overall_similarity, overall_corrected_similarity, similarity_ex1, similarity_ex2, similarity_ex1_mean, similarity_ex2_mean, overall_confidence, confidence_ex1_mean, confidence_ex2_mean = test_model(model, show_corrections=False)

Overall Ratcliff Obershelp Similarity without AutoCorrection =  61.6618 %
Overall Ratcliff Obershelp Similarity with AutoCorrection =  62.2662 %
Overall Confidence =  85.2772 %


### 2.2. The random sampled model (2.0.3) for Combination of (Cropped And Resized NIST19 (OpenCV) + Padded T-H-E)

In [6]:
model = load_model("Models/baseline_model_2_0_3.h5")

predicted_strings, overall_similarity, overall_corrected_similarity, similarity_ex1, similarity_ex2, similarity_ex1_mean, similarity_ex2_mean, overall_confidence, confidence_ex1_mean, confidence_ex2_mean = test_model(model, show_corrections=False)

Overall Ratcliff Obershelp Similarity without AutoCorrection =  61.9574 %
Overall Ratcliff Obershelp Similarity with AutoCorrection =  62.8424 %
Overall Confidence =  83.9583 %


### 2.3. The only random undersampled model (2.1.3) for Combination of (Cropped And Resized NIST19 (OpenCV) + Padded T-H-E)

In [7]:
model = load_model("Models/baseline_model_2_1_3.h5")

predicted_strings, overall_similarity, overall_corrected_similarity, similarity_ex1, similarity_ex2, similarity_ex1_mean, similarity_ex2_mean, overall_confidence, confidence_ex1_mean, confidence_ex2_mean = test_model(model, show_corrections=False)

Overall Ratcliff Obershelp Similarity without AutoCorrection =  63.3271 %
Overall Ratcliff Obershelp Similarity with AutoCorrection =  64.1018 %
Overall Confidence =  84.7208 %
