In [1]:
# ============================
# IMPORTS & SETUP
# ============================
import os
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from scipy import ndimage
from skimage.measure import regionprops
from skimage.filters import threshold_otsu
import pandas as pd
import tensorflow.compat.v1 as tf
import keras
from tensorflow.python.framework import ops
from time import time

tf.disable_v2_behavior()
ops.reset_default_graph()


Instructions for updating:
non-resource variables are not supported in the long term


In [2]:


# ============================
# PATHS (change base_path only)
# ============================
base_path = r"C:\Users\BHUSHAN\Downloads\data_sign_project\extract"  # parent folder containing all (682, 682_forg, etc.)

features_dir = os.path.join(base_path, "Features")
train_dir = os.path.join(features_dir, "Training")
test_dir = os.path.join(features_dir, "Testing")
testfeature_dir = os.path.join(base_path, "TestFeatures")

os.makedirs(train_dir, exist_ok=True)
os.makedirs(test_dir, exist_ok=True)
os.makedirs(testfeature_dir, exist_ok=True)

In [3]:
# ============================
# IMAGE PROCESSING FUNCTIONS
# ============================

def rgbgrey(img):
    greyimg = np.zeros((img.shape[0], img.shape[1]))
    for row in range(len(img)):
        for col in range(len(img[row])):
            greyimg[row][col] = np.average(img[row][col])
    return greyimg


def greybin(img):
    blur_radius = 0.8
    img = ndimage.gaussian_filter(img, blur_radius)
    thres = threshold_otsu(img)
    binimg = img > thres
    return np.logical_not(binimg)


def preproc(path, display=False):
    img = mpimg.imread(path)
    grey = rgbgrey(img)
    binimg = greybin(grey)
    r, c = np.where(binimg == 1)
    signimg = binimg[r.min():r.max(), c.min():c.max()]
    if display:
        plt.imshow(signimg, cmap='gray')
        plt.show()
    return signimg



In [4]:
# ============================
# FEATURE EXTRACTION FUNCTIONS
# ============================

def Ratio(img):
    a = np.sum(img == True)
    total = img.shape[0] * img.shape[1]
    return a / total


def Centroid(img):
    coords = np.argwhere(img)
    centroid = np.mean(coords, axis=0) / np.array(img.shape)
    return centroid[0], centroid[1]


def EccentricitySolidity(img):
    r = regionprops(img.astype("int8"))
    return r[0].eccentricity, r[0].solidity


def SkewKurtosis(img):
    h, w = img.shape
    x = np.arange(w)
    y = np.arange(h)
    xp = np.sum(img, axis=0)
    yp = np.sum(img, axis=1)
    cx = np.sum(x * xp) / np.sum(xp)
    cy = np.sum(y * yp) / np.sum(yp)
    sx = np.sqrt(np.sum((x - cx) ** 2 * xp) / np.sum(img))
    sy = np.sqrt(np.sum((y - cy) ** 2 * yp) / np.sum(img))
    skewx = np.sum(xp * (x - cx) ** 3) / (np.sum(img) * sx ** 3)
    skewy = np.sum(yp * (y - cy) ** 3) / (np.sum(img) * sy ** 3)
    kurtx = np.sum(xp * (x - cx) ** 4) / (np.sum(img) * sx ** 4) - 3
    kurty = np.sum(yp * (y - cy) ** 4) / (np.sum(img) * sy ** 4) - 3
    return (skewx, skewy), (kurtx, kurty)


def getCSVFeatures(path):
    img = preproc(path, display=False)
    ratio = Ratio(img)
    centroid = Centroid(img)
    ecc, sol = EccentricitySolidity(img)
    skew, kurt = SkewKurtosis(img)
    return (ratio, centroid[0], centroid[1], ecc, sol, skew[0], skew[1], kurt[0], kurt[1])


In [5]:

# ============================
# FEATURE CSV GENERATION
# ============================

def makeCSV():
    folders = [f for f in os.listdir(base_path) if os.path.isdir(os.path.join(base_path, f)) and not f.endswith("_forg")]
    print("Found persons:", folders)

    for person_folder in folders:
        person_id = person_folder
        genuine_folder = os.path.join(base_path, person_folder)
        forged_folder = os.path.join(base_path, person_folder + "_forg")

        if not os.path.exists(forged_folder):
            print(f"⚠️ Skipping {person_id} (forged folder missing)")
            continue

        genuine_images = sorted([os.path.join(genuine_folder, img)
                                for img in os.listdir(genuine_folder)
                                if img.endswith('.png') or img.endswith('.jpg')])
        forged_images = sorted([os.path.join(forged_folder, img)
                               for img in os.listdir(forged_folder)
                               if img.endswith('.png') or img.endswith('.jpg')])

        # Split data (8 train, 3 test)
        train_genuine = genuine_images[:8]
        test_genuine = genuine_images[8:]
        train_forged = forged_images[:8]
        test_forged = forged_images[8:]

        train_csv = os.path.join(train_dir, f"training_{person_id}.csv")
        test_csv = os.path.join(test_dir, f"testing_{person_id}.csv")

        with open(train_csv, 'w') as handle:
            handle.write('ratio,cent_y,cent_x,eccentricity,solidity,skew_x,skew_y,kurt_x,kurt_y,output\n')
            for path in train_genuine:
                features = getCSVFeatures(path)
                handle.write(','.join(map(str, features)) + ',1\n')
            for path in train_forged:
                features = getCSVFeatures(path)
                handle.write(','.join(map(str, features)) + ',0\n')

        with open(test_csv, 'w') as handle:
            handle.write('ratio,cent_y,cent_x,eccentricity,solidity,skew_x,skew_y,kurt_x,kurt_y,output\n')
            for path in test_genuine:
                features = getCSVFeatures(path)
                handle.write(','.join(map(str, features)) + ',1\n')
            for path in test_forged:
                features = getCSVFeatures(path)
                handle.write(','.join(map(str, features)) + ',0\n')

        print(f"✅ Features saved for person {person_id}")

In [None]:
makeCSV()

Found persons: ['001', '002', '003', '004', '005', '006', '007', '008', '009', '010', '011', '012', '013', '014', '015', '016', '017', '018', '019', '020', '021', '022', '023', '024', '025', '026', '027', '028', '029', '030', '031', '032', '033', '034', '035', '036', '037', '038', '039', '040', '041', '042', '043', '044', '045', '046', '047', '048', '049', '050', '051', '052', '053', '054', '055', '056', '057', '058', '059', '060', '061', '062', '063', '064', '065', '066', '067', '068', '069', '070', '071', '072', '073', '074', '075', '076', '077', '078', '079', '080', '081', '082', '083', '084', '085', '086', '087', '088', '089', '090', '091', '092', '093', '094', '095', '096', '097', '098', '099', '100', '101', '102', '103', '104', '105', '106', '107', '108', '109', '110', '111', '112', '113', '114', '115', '116', '117', '118', '119', '120', '121', '122', '123', '124', '125', '126', '127', '128', '129', '130', '131', '132', '133', '134', '135', '136', '137', '138', '139', '140', '141

In [None]:



# ============================
# MODEL & TRAINING
# ============================

n_input = 9  # features

def readCSV(train_path, test_path, type2=False):
    df = pd.read_csv(train_path, usecols=range(n_input))
    train_input = np.array(df.values, dtype=np.float32)
    df = pd.read_csv(train_path, usecols=(n_input,))
    corr_train = keras.utils.to_categorical(np.array(df.values).flatten(), 2)

    df = pd.read_csv(test_path, usecols=range(n_input))
    test_input = np.array(df.values, dtype=np.float32)

    if not type2:
        df = pd.read_csv(test_path, usecols=(n_input,))
        corr_test = keras.utils.to_categorical(np.array(df.values).flatten(), 2)
        return train_input, corr_train, test_input, corr_test
    else:
        return train_input, corr_train, test_input


# Network Parameters
n_hidden_1 = 7
n_hidden_2 = 10
n_hidden_3 = 30
n_classes = 2

# tf Graph input
X = tf.placeholder("float", [None, n_input])
Y = tf.placeholder("float", [None, n_classes])

# Store layers weight & bias
weights = {
    'h1': tf.Variable(tf.random_normal([n_input, n_hidden_1], seed=1)),
    'h2': tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2])),
    'h3': tf.Variable(tf.random_normal([n_hidden_2, n_hidden_3])),
    'out': tf.Variable(tf.random_normal([n_hidden_1, n_classes], seed=2))
}
biases = {
    'b1': tf.Variable(tf.random_normal([n_hidden_1], seed=3)),
    'b2': tf.Variable(tf.random_normal([n_hidden_2])),
    'b3': tf.Variable(tf.random_normal([n_hidden_3])),
    'out': tf.Variable(tf.random_normal([n_classes], seed=4))
}

def multilayer_perceptron(x):
    layer_1 = tf.tanh(tf.add(tf.matmul(x, weights['h1']), biases['b1']))
    layer_2 = tf.add(tf.matmul(layer_1, weights['h2']), biases['b2'])
    layer_3 = tf.add(tf.matmul(layer_2, weights['h3']), biases['b3'])
    out_layer = tf.tanh(tf.matmul(layer_1, weights['out']) + biases['out'])
    return out_layer

logits = multilayer_perceptron(X)
loss_op = tf.reduce_mean(tf.squared_difference(logits, Y))
optimizer = tf.train.AdamOptimizer(learning_rate=0.001)
train_op = optimizer.minimize(loss_op)
pred = tf.nn.softmax(logits)
correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(Y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
init = tf.global_variables_initializer()


def evaluate(train_path, test_path, person_id, type2=False):
    # If the person's training CSV does not exist, ask for data and auto-train
    if not os.path.exists(train_path):
        print(f"⚠️ No training data found for '{person_id}'")
        print("Let's create it now.")

        genuine_folder = input("Enter full path to genuine signatures folder: ").strip()
        forged_folder = input("Enter full path to forged signatures folder: ").strip()

        if not os.path.exists(genuine_folder) or not os.path.exists(forged_folder):
            print("❌ One or both folders don't exist. Please check the paths.")
            return None

        print("⏳ Extracting features and generating training/testing CSVs...")
        genuine_images = sorted([os.path.join(genuine_folder, img)
                                 for img in os.listdir(genuine_folder)
                                 if img.lower().endswith(('.png', '.jpg', '.jpeg'))])
        forged_images = sorted([os.path.join(forged_folder, img)
                                for img in os.listdir(forged_folder)
                                if img.lower().endswith(('.png', '.jpg', '.jpeg'))])

        if len(genuine_images) < 5 or len(forged_images) < 5:
            print("⚠️ Please provide at least 5 genuine and 5 forged signatures for training.")
            return None

        # Split data (8 train, 3 test — or all if fewer)
        train_genuine = genuine_images[:min(8, len(genuine_images))]
        train_forged = forged_images[:min(8, len(forged_images))]
        test_genuine = genuine_images[min(8, len(genuine_images)):]
        test_forged = forged_images[min(8, len(forged_images)):]

        os.makedirs(train_dir, exist_ok=True)
        os.makedirs(test_dir, exist_ok=True)

        train_csv = os.path.join(train_dir, f"training_{person_id}.csv")
        test_csv = os.path.join(test_dir, f"testing_{person_id}.csv")

        with open(train_csv, 'w') as handle:
            handle.write('ratio,cent_y,cent_x,eccentricity,solidity,skew_x,skew_y,kurt_x,kurt_y,output\n')
            for path in train_genuine:
                features = getCSVFeatures(path)
                handle.write(','.join(map(str, features)) + ',1\n')
            for path in train_forged:
                features = getCSVFeatures(path)
                handle.write(','.join(map(str, features)) + ',0\n')

        with open(test_csv, 'w') as handle:
            handle.write('ratio,cent_y,cent_x,eccentricity,solidity,skew_x,skew_y,kurt_x,kurt_y,output\n')
            for path in test_genuine:
                features = getCSVFeatures(path)
                handle.write(','.join(map(str, features)) + ',1\n')
            for path in test_forged:
                features = getCSVFeatures(path)
                handle.write(','.join(map(str, features)) + ',0\n')

        print(f"✅ New dataset created for '{person_id}'!")
        train_path = train_csv
        print("✅ Proceeding to training and evaluation...\n")

    # Normal training/testing process
    if not type2:
        train_input, corr_train, test_input, corr_test = readCSV(train_path, test_path)
    else:
        train_input, corr_train, test_input = readCSV(train_path, test_path, type2)

    with tf.Session() as sess:
        sess.run(init)
        for epoch in range(1000):
            _, cost = sess.run([train_op, loss_op], feed_dict={X: train_input, Y: corr_train})
            if cost < 0.0001:
                break

        if not type2:
            acc_train = accuracy.eval({X: train_input, Y: corr_train})
            acc_test = accuracy.eval({X: test_input, Y: corr_test})
            print(f"✅ Training Accuracy: {acc_train:.4f}, Test Accuracy: {acc_test:.4f}")
            return acc_train, acc_test
        else:
            prediction = pred.eval({X: test_input})
            if prediction[0][1] > prediction[0][0]:
                print("✅this signature is genuine/ original")
                return True
            else:
                print("❌this signature is forged/ fake ")
                return False



NameError: name 'tf' is not defined

In [None]:
# ============================
# TESTING ON A SAMPLE IMAGE
# ============================

def testing(path):
    feature = getCSVFeatures(path)
    test_csv_path = os.path.join(testfeature_dir, 'testcsv.csv')
    with open(test_csv_path, 'w') as handle:
        handle.write('ratio,cent_y,cent_x,eccentricity,solidity,skew_x,skew_y,kurt_x,kurt_y\n')
        handle.write(','.join(map(str, feature)) + '\n')
    return test_csv_path





In [None]:
# === RUNNING TEST ===
train_person_id = input("Enter person ID (e.g., 682): ")
test_image_path = input("Enter full path of signature image to test: ")

train_path = os.path.join(train_dir, f"training_{train_person_id}.csv")
test_path = testing(test_image_path)

evaluate(train_path, test_path, train_person_id, type2=True)


NameError: name 'os' is not defined