In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
# Install required libraries
!pip install --quiet pdf2image
!apt-get install -y poppler-utils
!pip install --quiet fpdf


In [None]:
import os
from pdf2image import convert_from_path
import numpy as np
import pandas as pd
from PIL import Image
import cv2
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Dense, Flatten, Dropout, Lambda, Concatenate
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.optimizers import Adam
from sklearn.preprocessing import LabelEncoder
import matplotlib.pyplot as plt

In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Dense, Flatten, Dropout
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split

# Set random seeds for reproducibility
np.random.seed(42)
tf.random.set_seed(42)

# Load and preprocess data
def load_data():
    # Load training data
    train_data = pd.read_csv('/kaggle/input/emnist/emnist-letters-train.csv')
    X_train = train_data.iloc[:, 1:].values
    y_train = train_data.iloc[:, 0].values
    
    # Load test data
    test_data = pd.read_csv('/kaggle/input/emnist/emnist-letters-test.csv')
    X_test = test_data.iloc[:, 1:].values
    y_test = test_data.iloc[:, 0].values
    
    # Reshape images to 28x28x1
    X_train = X_train.reshape(-1, 28, 28, 1)
    X_test = X_test.reshape(-1, 28, 28, 1)
    
    # Normalize pixel values
    X_train = X_train.astype('float32') / 255.0
    X_test = X_test.astype('float32') / 255.0
    
    return X_train, y_train, X_test, y_test

# Create the forensic analysis model
def create_model():
    input_shape = (28, 28, 1)
    
    # Input layer
    inputs = Input(shape=input_shape)
    
    # First convolutional block - Feature extraction
    x = Conv2D(32, (3, 3), activation='relu', padding='same')(inputs)
    x = Conv2D(32, (3, 3), activation='relu', padding='same')(x)
    x = MaxPooling2D((2, 2))(x)
    x = Dropout(0.25)(x)
    
    # Second convolutional block - Stroke analysis
    x = Conv2D(64, (3, 3), activation='relu', padding='same')(x)
    x = Conv2D(64, (3, 3), activation='relu', padding='same')(x)
    x = MaxPooling2D((2, 2))(x)
    x = Dropout(0.25)(x)
    
    # Third convolutional block - Deep features
    x = Conv2D(128, (3, 3), activation='relu', padding='same')(x)
    x = Conv2D(128, (3, 3), activation='relu', padding='same')(x)
    x = MaxPooling2D((2, 2))(x)
    x = Dropout(0.25)(x)
    
    # Flatten and dense layers
    x = Flatten()(x)
    x = Dense(512, activation='relu', name='forensic_dense')(x)
    x = Dropout(0.5)(x)
    
    # Output layer - number of classes (26 for letters)
    outputs = Dense(26, activation='softmax')(x)
    
    model = Model(inputs=inputs, outputs=outputs)
    return model

# Function to extract forensic features
def extract_forensic_features(model, image_batch):
    """Extract intermediate features for forensic analysis"""
    feature_model = Model(inputs=model.input, outputs=model.get_layer('forensic_dense').output)
    features = feature_model.predict(image_batch)
    return features

# Function to compare two characters
def compare_characters(model, char1, char2):
    """Compare two characters and return similarity score"""
    # Extract features
    feat1 = extract_forensic_features(model, char1.reshape(1, 28, 28, 1))
    feat2 = extract_forensic_features(model, char2.reshape(1, 28, 28, 1))
    
    # Compute cosine similarity
    similarity = np.dot(feat1, feat2.T) / (np.linalg.norm(feat1) * np.linalg.norm(feat2))
    return similarity[0][0]

# Main training and evaluation function
def main():
    print("Loading data...")
    X_train, y_train, X_test, y_test = load_data()
    
    # Adjust labels to 0-based indexing
    y_train = y_train - 1
    y_test = y_test - 1
    
    print("Creating model...")
    model = create_model()
    
    # Compile model
    model.compile(optimizer=Adam(learning_rate=0.001),
                 loss='sparse_categorical_crossentropy',
                 metrics=['accuracy'])
    
    print("Training model...")
    # Train with early stopping for efficiency
    history = model.fit(X_train, y_train,
                       batch_size=128,
                       epochs=10,  # Reduced epochs for quick training
                       validation_split=0.1,
                       verbose=1)
    
    print("\nEvaluating model...")
    test_loss, test_acc = model.evaluate(X_test, y_test, verbose=0)
    print(f"Test accuracy: {test_acc:.4f}")
    
    # Demonstrate forensic comparison
    print("\nDemonstrating character comparison...")
    # Compare two random test characters
    idx1, idx2 = np.random.randint(0, len(X_test), 2)
    similarity = compare_characters(model, X_test[idx1], X_test[idx2])
    print(f"Similarity score between test characters: {similarity:.4f}")
    print(f"True labels: {chr(65 + y_test[idx1])} and {chr(65 + y_test[idx2])}")
    
    return model, history

# Additional utility functions for forensic analysis
def analyze_character_features(model, character):
    """Detailed analysis of a single character"""
    features = extract_forensic_features(model, character.reshape(1, 28, 28, 1))
    
    # Basic statistical analysis
    feature_stats = {
        'mean': np.mean(features),
        'std': np.std(features),
        'max': np.max(features),
        'min': np.min(features)
    }
    
    return feature_stats

def batch_compare_characters(model, reference_char, comparison_chars):
    """Compare one character against multiple others"""
    similarities = []
    for char in comparison_chars:
        sim = compare_characters(model, reference_char, char)
        similarities.append(sim)
    return np.array(similarities)

# Run the main function
if __name__ == "__main__":
    print("Starting forensic handwriting analysis system...")
    model, history = main()

In [None]:
# This will run the complete training and analysis pipeline
model, history = main()

In [None]:
import matplotlib.pyplot as plt

def plot_training_history(history):
    # Plot training & validation accuracy values
    plt.figure(figsize=(12, 4))
    
    # Accuracy plot
    plt.subplot(1, 2, 1)
    plt.plot(history.history['accuracy'])
    plt.plot(history.history['val_accuracy'])
    plt.title('Model Accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend(['Train', 'Validation'], loc='upper left')
    
    # Loss plot
    plt.subplot(1, 2, 2)
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title('Model Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend(['Train', 'Validation'], loc='upper left')
    
    plt.show()

# Assuming `history` is the output from model.fit
plot_training_history(history)


In [None]:
def visualize_features(features):
    plt.figure(figsize=(12, 6))
    plt.plot(features.flatten(), label="Feature Intensity")
    plt.xlabel("Feature Index")
    plt.ylabel("Intensity")
    plt.title("Intermediate Forensic Features")
    plt.legend()
    plt.show()
# Load the data if not already loaded
X_train, y_train, X_test, y_test = load_data()

# Visualize features of a single character (example with the first test character)
features = extract_forensic_features(model, X_test[0].reshape(1, 28, 28, 1))
visualize_features(features)


In [None]:
import matplotlib.pyplot as plt

def visualize_comparison(X_test, y_test, idx1, idx2, similarity_score):
    fig, axes = plt.subplots(1, 2, figsize=(8, 4))
    
    # Show first character
    axes[0].imshow(X_test[idx1].reshape(28, 28), cmap='gray')
    axes[0].set_title(f"Label: {chr(65 + y_test[idx1])}")
    
    # Show second character
    axes[1].imshow(X_test[idx2].reshape(28, 28), cmap='gray')
    axes[1].set_title(f"Label: {chr(65 + y_test[idx2])}")
    
    plt.suptitle(f"Similarity Score: {similarity_score:.4f}", fontsize=16)
    plt.show()

# Example usage
idx1, idx2 = np.random.randint(0, len(X_test), 2)
similarity_score = compare_characters(model, X_test[idx1], X_test[idx2])
visualize_comparison(X_test, y_test, idx1, idx2, similarity_score)


In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Dense, Flatten, Dropout, Lambda, Concatenate
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split

class ForensicCharacterAnalyzer:
    def __init__(self):
        self.model = self.create_model()
        
    def create_model(self):
        """Create the forensic analysis model"""
        input_shape = (28, 28, 1)
        
        # Input layer
        inputs = Input(shape=input_shape)
        
        # First convolutional block - Feature extraction
        x = Conv2D(32, (3, 3), activation='relu', padding='same')(inputs)
        x = Conv2D(32, (3, 3), activation='relu', padding='same')(x)
        x = MaxPooling2D((2, 2))(x)
        x = Dropout(0.25)(x)
        
        # Second convolutional block - Deeper features
        x = Conv2D(64, (3, 3), activation='relu', padding='same')(x)
        x = Conv2D(64, (3, 3), activation='relu', padding='same')(x)
        x = MaxPooling2D((2, 2))(x)
        x = Dropout(0.25)(x)
        
        # Third convolutional block - Fine details
        x = Conv2D(128, (3, 3), activation='relu', padding='same')(x)
        x = Conv2D(128, (3, 3), activation='relu', padding='same')(x)
        x = MaxPooling2D((2, 2))(x)
        x = Dropout(0.25)(x)
        
        # Flatten and dense layers
        x = Flatten()(x)
        x = Dense(512, activation='relu', name='forensic_features')(x)
        x = Dropout(0.5)(x)
        x = Dense(256, activation='relu')(x)
        x = Dropout(0.5)(x)
        
        # Output layer for writer classification
        outputs = Dense(6, activation='softmax', name='writer_output')(x)  # 6 writers
        
        model = Model(inputs=inputs, outputs=outputs)
        return model
    
    def extract_features(self, image_batch):
        """Extract intermediate features for forensic analysis"""
        feature_model = Model(
            inputs=self.model.input,
            outputs=self.model.get_layer('forensic_features').output
        )
        features = feature_model.predict(image_batch)
        return features
    
    def compare_samples(self, sample1, sample2):
        """Compare two handwriting samples and return similarity score"""
        # Extract features
        feat1 = self.extract_features(sample1)
        feat2 = self.extract_features(sample2)
        
        # Compute cosine similarity
        similarity = np.dot(feat1.flatten(), feat2.flatten()) / (
            np.linalg.norm(feat1.flatten()) * np.linalg.norm(feat2.flatten())
        )
        return similarity

def load_and_preprocess_data():
    """Load and preprocess the handwriting data"""
    # Base paths
    base_path = '/kaggle/input/personalprofile'
    
    # Training participants (Macey and Mia)
    training_participants = ['Macey', 'Mia']
    X_train = []
    y_train = []
    
    for name in training_participants:
        participant_path = os.path.join(base_path, name)
        for file in os.listdir(participant_path):
            if file.endswith('.pdf'):
                pdf_path = os.path.join(participant_path, file)
                images = convert_from_path(pdf_path)
                for img in images:
                    # Convert to grayscale and resize
                    img_gray = img.convert('L')
                    img_resized = img_gray.resize((28, 28))
                    # Convert to array and normalize
                    img_array = img_to_array(img_resized)
                    img_array = img_array.astype('float32') / 255.0
                    X_train.append(img_array)
                    y_train.append(name)
    
    # Convert to numpy arrays
    X_train = np.array(X_train)
    y_train = np.array(y_train)
    
    # Encode labels
    le = LabelEncoder()
    all_participants = ['Emmanuel', 'Macey', 'Mia', 'Tam', 'Victoria', 'Suyesh']
    le.fit(all_participants)
    y_train = le.transform(y_train)
    
    return X_train, y_train, le

def main():
    """Main training and analysis function"""
    try:
        # Create analyzer
        analyzer = ForensicCharacterAnalyzer()
        
        # Load and preprocess data
        print("Loading data...")
        X_train, y_train, le = load_and_preprocess_data()
        
        # Compile model
        analyzer.model.compile(
            optimizer=Adam(learning_rate=0.001),
            loss='sparse_categorical_crossentropy',
            metrics=['accuracy']
        )
        
        # Train model
        print("Training model...")
        history = analyzer.model.fit(
            X_train, y_train,
            batch_size=32,
            epochs=10,
            validation_split=0.2,
            verbose=1
        )
        
        return analyzer, history
        
    except Exception as e:
        print(f"Error in main function: {str(e)}")
        raise

if __name__ == "__main__":
    analyzer, history = main()
    
    # Plot training results
    plt.figure(figsize=(12, 4))
    
    # Plot accuracy
    plt.subplot(1, 2, 1)
    plt.plot(history.history['accuracy'], label='Training')
    plt.plot(history.history['val_accuracy'], label='Validation')
    plt.title('Model Accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend()
    
    # Plot loss
    plt.subplot(1, 2, 2)
    plt.plot(history.history['loss'], label='Training')
    plt.plot(history.history['val_loss'], label='Validation')
    plt.title('Model Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()
    
    plt.tight_layout()
    plt.show()

In [None]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Dense, Flatten, Dropout
from tensorflow.keras.optimizers import Adam
from sklearn.preprocessing import LabelEncoder
from pdf2image import convert_from_path
from PIL import Image
import matplotlib.pyplot as plt
from tensorflow.keras.preprocessing.image import img_to_array

In [None]:
# First recreate your original model
class ForensicCharacterAnalyzer:
    def __init__(self):
        self.model = self.create_model()
        
    def create_model(self):
        """Create the forensic analysis model"""
        input_shape = (28, 28, 1)
        
        # Input layer
        inputs = Input(shape=input_shape)
        
        # First convolutional block - Feature extraction
        x = Conv2D(32, (3, 3), activation='relu', padding='same')(inputs)
        x = Conv2D(32, (3, 3), activation='relu', padding='same')(x)
        x = MaxPooling2D((2, 2))(x)
        x = Dropout(0.25)(x)
        
        # Second convolutional block - Stroke analysis
        x = Conv2D(64, (3, 3), activation='relu', padding='same')(x)
        x = Conv2D(64, (3, 3), activation='relu', padding='same')(x)
        x = MaxPooling2D((2, 2))(x)
        x = Dropout(0.25)(x)
        
        # Third convolutional block - Deep features
        x = Conv2D(128, (3, 3), activation='relu', padding='same')(x)
        x = Conv2D(128, (3, 3), activation='relu', padding='same')(x)
        x = MaxPooling2D((2, 2))(x)
        x = Dropout(0.25)(x)
        
        # Flatten and dense layers
        x = Flatten()(x)
        x = Dense(512, activation='relu', name='forensic_dense')(x)
        x = Dropout(0.5)(x)
        
        # Output layer for letters (26 classes)
        outputs = Dense(26, activation='softmax')(x)
        
        model = Model(inputs=inputs, outputs=outputs)
        return model

# Now create the participant classifier
def create_participant_classifier(base_model):
    try:
        # Get the output of the forensic_dense layer from base model
        feature_extractor = Model(
            inputs=base_model.model.input,  # Note the change here
            outputs=base_model.model.get_layer('forensic_dense').output  # And here
        )
        
        # Freeze the base model layers
        for layer in feature_extractor.layers:
            layer.trainable = False
        
        # Create new model for participant classification
        inputs = Input(shape=(28, 28, 1))
        x = feature_extractor(inputs)
        x = Dense(256, activation='relu')(x)
        x = Dropout(0.5)(x)
        outputs = Dense(6, activation='softmax')(x)  # 6 participants
        
        participant_model = Model(inputs=inputs, outputs=outputs)
        return participant_model
        
    except Exception as e:
        print(f"Error creating participant classifier: {str(e)}")
        raise

def main():
    try:
        # First create and train the EMNIST model
        print("Creating base analyzer...")
        base_analyzer = ForensicCharacterAnalyzer()
        
        # Load and preprocess EMNIST data
        print("Loading EMNIST data...")
        X_train, y_train, X_test, y_test = load_data()  # Your original load_data function
        
        # Train the base model
        print("Training base model...")
        base_analyzer.model.compile(
            optimizer=Adam(learning_rate=0.001),
            loss='sparse_categorical_crossentropy',
            metrics=['accuracy']
        )
        
        base_history = base_analyzer.model.fit(
            X_train, y_train,
            batch_size=128,
            epochs=10,
            validation_split=0.1,
            verbose=1
        )
        
        # Now train the participant classifier
        print("\nTraining participant classifier...")
        participant_model, history, le = train_participant_classifier(base_analyzer)
        
        print("\nEvaluating classifier...")
        y_true, y_pred = evaluate_participant_classifier(participant_model, le)
        
        # Print classification report
        from sklearn.metrics import classification_report
        print("\nClassification Report:")
        print(classification_report(y_true, y_pred))
        
        return base_analyzer, participant_model, history
        
    except Exception as e:
        print(f"Error in main execution: {str(e)}")
        raise

# Modified train_participant_classifier to accept the analyzer
def train_participant_classifier(base_analyzer):
    try:
        # Load participant data
        print("Loading participant data...")
        X_train, y_train, le = load_and_preprocess_data()
        print(f"Total samples: {len(X_train)}")
        
        # Create and compile participant classifier
        print("Creating participant classifier...")
        participant_model = create_participant_classifier(base_analyzer)
        participant_model.compile(
            optimizer=Adam(learning_rate=0.0001),
            loss='sparse_categorical_crossentropy',
            metrics=['accuracy']
        )
        
        # Train the model
        print("Training participant classifier...")
        history = participant_model.fit(
            X_train, y_train,
            batch_size=8,
            epochs=20,
            validation_split=0.2,
            callbacks=[
                tf.keras.callbacks.EarlyStopping(
                    monitor='val_loss',
                    patience=5,
                    restore_best_weights=True
                )
            ],
            verbose=1
        )
        
        return participant_model, history, le
        
    except Exception as e:
        print(f"Error training participant classifier: {str(e)}")
        raise

# Run everything
if __name__ == "__main__":
    base_analyzer, participant_model, history = main()

In [None]:
import os
from pdf2image import convert_from_path
import numpy as np
import cv2
from PIL import Image
import matplotlib.pyplot as plt

def extract_images_from_pdf(pdf_path, output_dir=None):
    """Extract images from PDF and save them temporarily if output_dir is provided"""
    try:
        # Convert PDF to images
        images = convert_from_path(pdf_path)
        
        if output_dir:
            os.makedirs(output_dir, exist_ok=True)
            saved_paths = []
            for i, image in enumerate(images):
                image_path = os.path.join(output_dir, f'page_{i+1}.png')
                image.save(image_path, 'PNG')
                saved_paths.append(image_path)
            return saved_paths
        return images
    except Exception as e:
        print(f"Error processing PDF {pdf_path}: {str(e)}")
        return []

def process_writer_pdfs(base_dir):
    """Process all PDFs in the personalprofile directory"""
    writer_samples = {}
    
    # Process each writer's directory
    for writer_name in os.listdir(base_dir):
        writer_path = os.path.join(base_dir, writer_name)
        if os.path.isdir(writer_path):
            writer_samples[writer_name] = []
            
            # Process each file in writer's directory
            for file_name in os.listdir(writer_path):
                if file_name.lower().endswith('.pdf'):
                    pdf_path = os.path.join(writer_path, file_name)
                    images = extract_images_from_pdf(pdf_path)
                    
                    # Convert PIL images to format needed for model
                    for img in images:
                        # Convert to grayscale and numpy array
                        img_gray = img.convert('L')
                        img_array = np.array(img_gray)
                        
                        # Resize to 28x28 (model's expected input size)
                        img_resized = cv2.resize(img_array, (28, 28))
                        
                        # Normalize and reshape
                        img_processed = img_resized.astype('float32') / 255.0
                        img_processed = img_processed.reshape(1, 28, 28, 1)
                        
                        writer_samples[writer_name].append(img_processed)
    
    return writer_samples

def compare_writers(model, writer_samples):
    """Compare handwriting samples between writers"""
    results = {}
    
    for writer1 in writer_samples:
        results[writer1] = {}
        for writer2 in writer_samples:
            if writer1 != writer2:
                similarities = []
                
                # Compare each sample from writer1 with each sample from writer2
                for sample1 in writer_samples[writer1]:
                    for sample2 in writer_samples[writer2]:
                        similarity = compare_characters(model, sample1[0], sample2[0])
                        similarities.append(similarity)
                
                # Calculate average similarity
                avg_similarity = np.mean(similarities) if similarities else 0
                results[writer1][writer2] = avg_similarity
    
    return results

def visualize_comparison_results(results):
    """Visualize the comparison results as a heatmap"""
    writers = list(results.keys())
    similarity_matrix = np.zeros((len(writers), len(writers)))
    
    for i, writer1 in enumerate(writers):
        for j, writer2 in enumerate(writers):
            if writer1 != writer2:
                similarity_matrix[i, j] = results[writer1][writer2]
    
    plt.figure(figsize=(10, 8))
    plt.imshow(similarity_matrix, cmap='Blues')
    plt.colorbar(label='Similarity Score')
    plt.xticks(range(len(writers)), writers, rotation=45)
    plt.yticks(range(len(writers)), writers)
    plt.title('Handwriting Similarity Matrix')
    plt.tight_layout()
    plt.show()

# Example usage
base_dir = "/kaggle/input/personalprofile"
writer_samples = process_writer_pdfs(base_dir)
comparison_results = compare_writers(model, writer_samples)
visualize_comparison_results(comparison_results)

# Print detailed results
print("\nDetailed Comparison Results:")
for writer1, comparisons in comparison_results.items():
    print(f"\n{writer1}'s writing compared to:")
    for writer2, similarity in comparisons.items():
        print(f"  {writer2}: {similarity:.3f}")

In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Dense, Flatten, Dropout
from tensorflow.keras.layers import BatchNormalization, Lambda
from tensorflow.keras.optimizers import Adam
import cv2
from scipy.stats import skew, kurtosis

class ForensicCharacterAnalyzer:
    def __init__(self):
        self.model = None
        
    def load_data(self):
        """Load and preprocess EMNIST dataset with enhanced normalization"""
        print("Loading data...")
        train_data = pd.read_csv('/kaggle/input/emnist/emnist-letters-train.csv')
        test_data = pd.read_csv('/kaggle/input/emnist/emnist-letters-test.csv')
        
        X_train = train_data.iloc[:, 1:].values.reshape(-1, 28, 28, 1).astype('float32') / 255.0
        y_train = train_data.iloc[:, 0].values - 1  # Adjust to 0-based indexing
        
        X_test = test_data.iloc[:, 1:].values.reshape(-1, 28, 28, 1).astype('float32') / 255.0
        y_test = test_data.iloc[:, 0].values - 1
        
        return X_train, y_train, X_test, y_test
    
    def create_model(self):
        """Create enhanced model with forensic-specific layers"""
        input_shape = (28, 28, 1)
        inputs = Input(shape=input_shape)

        # Stroke Analysis Branch
        def create_stroke_features(x):
            # Sobel filters for edge detection
            sobel = tf.image.sobel_edges(x)  # Shape: (batch, height, width, channels, 2)
            sobel_x = sobel[..., 0]  # Get x component
            sobel_y = sobel[..., 1]  # Get y component
            
            # Calculate stroke angles and magnitudes
            angles = tf.math.atan2(sobel_y, sobel_x)  # Shape: (batch, height, width, channels)
            magnitude = tf.sqrt(tf.square(sobel_x) + tf.square(sobel_y))
            
            # Remove the extra channel dimension and concatenate
            angles = tf.squeeze(angles, axis=-1)
            magnitude = tf.squeeze(magnitude, axis=-1)
            
            # Add feature channel dimension
            angles = tf.expand_dims(angles, axis=-1)
            magnitude = tf.expand_dims(magnitude, axis=-1)
            
            # Concatenate along the feature dimension
            return tf.concat([angles, magnitude], axis=-1)  # Shape: (batch, height, width, 2)

        # Initial convolution layers - Main branch
        x = Conv2D(32, (3, 3), activation='relu', padding='same')(inputs)
        x = Conv2D(32, (3, 3), activation='relu', padding='same')(x)
        x = BatchNormalization()(x)
        x = MaxPooling2D((2, 2))(x)
        x = Dropout(0.25)(x)

        # Process stroke features in parallel - Stroke branch
        stroke_features = Lambda(create_stroke_features)(inputs)  # Shape: (batch, 28, 28, 2)
        stroke_features = Conv2D(16, (3, 3), activation='relu', padding='same')(stroke_features)
        stroke_features = MaxPooling2D((2, 2))(stroke_features)  # Shape: (batch, 14, 14, 16)

        # Concatenate features from both branches
        # After MaxPooling, x shape is (batch, 14, 14, 32)
        # After MaxPooling, stroke_features shape is (batch, 14, 14, 16)
        x = tf.concat([x, stroke_features], axis=-1)  # Result shape: (batch, 14, 14, 48)

        # Deeper feature extraction
        x = Conv2D(64, (3, 3), activation='relu', padding='same')(x)
        x = Conv2D(64, (3, 3), activation='relu', padding='same')(x)
        x = BatchNormalization()(x)
        x = MaxPooling2D((2, 2))(x)
        x = Dropout(0.25)(x)

        x = Conv2D(128, (3, 3), activation='relu', padding='same')(x)
        x = BatchNormalization()(x)
        x = MaxPooling2D((2, 2))(x)
        x = Dropout(0.25)(x)

        x = Flatten()(x)
        x = Dense(512, activation='relu', name='forensic_features')(x)
        x = Dropout(0.5)(x)

        outputs = Dense(26, activation='softmax')(x)

        self.model = Model(inputs=inputs, outputs=outputs)
        return self.model

    def analyze_stroke_characteristics(self, image):
        """Analyze stroke characteristics including pressure, angles, and consistency"""
        # Convert to numpy array if needed
        if isinstance(image, tf.Tensor):
            image = image.numpy()
        
        # Ensure proper shape
        if len(image.shape) == 4:
            image = image[0]
        if len(image.shape) == 3:
            image = image[:,:,0]
        
        # Convert to uint8 for OpenCV processing
        img_uint8 = (image * 255).astype(np.uint8)
        
        # Edge detection for stroke analysis
        edges = cv2.Canny(img_uint8, 50, 150)
        
        # Calculate gradients for pressure analysis
        sobelx = cv2.Sobel(img_uint8, cv2.CV_64F, 1, 0, ksize=3)
        sobely = cv2.Sobel(img_uint8, cv2.CV_64F, 0, 1, ksize=3)
        
        # Stroke angle analysis
        angles = np.arctan2(sobely, sobelx)
        magnitudes = np.sqrt(sobelx**2 + sobely**2)
        
        # Calculate stroke characteristics
        stroke_analysis = {
            'pressure_mean': np.mean(magnitudes),
            'pressure_std': np.std(magnitudes),
            'angle_mean': np.mean(angles),
            'angle_std': np.std(angles),
            'stroke_consistency': 1.0 / (np.std(magnitudes) + 1e-6),
            'edge_density': np.sum(edges > 0) / edges.size,
            'pressure_skew': skew(magnitudes.flatten()),
            'angle_distribution': np.histogram(angles.flatten(), bins=8)[0].tolist()
        }
        
        return stroke_analysis

    def extract_forensic_features(self, char_image):
        """Extract deep learning features for forensic analysis"""
        if self.model is None:
            raise ValueError("Model must be trained before extracting features")
            
        feature_model = Model(inputs=self.model.input,
                            outputs=self.model.get_layer('forensic_features').output)
        features = feature_model.predict(char_image.reshape(1, 28, 28, 1))
        
        # Calculate statistical measures
        feature_stats = {
            'mean': np.mean(features),
            'std': np.std(features),
            'skewness': skew(features.flatten()),
            'kurtosis': kurtosis(features.flatten()),
            'quartiles': np.percentile(features, [25, 50, 75]).tolist(),
            'feature_vector': features.flatten()
        }
        
        return feature_stats
    
    def compare_characters(self, char1, char2):
        """Enhanced character comparison with multiple similarity metrics"""
        # Get both feature types
        feat1_deep = self.extract_forensic_features(char1)
        feat2_deep = self.extract_forensic_features(char2)
        
        stroke1 = self.analyze_stroke_characteristics(char1)
        stroke2 = self.analyze_stroke_characteristics(char2)
        
        # Calculate multiple similarity metrics
        comparison = {
            'feature_similarity': self._cosine_similarity(
                feat1_deep['feature_vector'],
                feat2_deep['feature_vector']
            ),
            'pressure_similarity': 1 - abs(
                stroke1['pressure_mean'] - stroke2['pressure_mean']
            ) / max(stroke1['pressure_mean'], stroke2['pressure_mean']),
            'stroke_consistency_similarity': 1 - abs(
                stroke1['stroke_consistency'] - stroke2['stroke_consistency']
            ) / max(stroke1['stroke_consistency'], stroke2['stroke_consistency']),
            'angle_similarity': self._compare_angle_distributions(
                stroke1['angle_distribution'],
                stroke2['angle_distribution']
            )
        }
        
        # Calculate weighted overall similarity
        comparison['overall_similarity'] = (
            0.4 * comparison['feature_similarity'] +
            0.3 * comparison['pressure_similarity'] +
            0.2 * comparison['stroke_consistency_similarity'] +
            0.1 * comparison['angle_similarity']
        )
        
        return comparison
    
    def _cosine_similarity(self, v1, v2):
        """Calculate cosine similarity between two vectors"""
        return np.dot(v1, v2) / (np.linalg.norm(v1) * np.linalg.norm(v2))
    
    def _compare_angle_distributions(self, dist1, dist2):
        """Compare angle distributions using Jensen-Shannon divergence"""
        dist1 = np.array(dist1) + 1e-10  # Avoid zero division
        dist2 = np.array(dist2) + 1e-10
        dist1 = dist1 / np.sum(dist1)
        dist2 = dist2 / np.sum(dist2)
        m = 0.5 * (dist1 + dist2)
        js_div = 0.5 * (np.sum(dist1 * np.log(dist1 / m)) + np.sum(dist2 * np.log(dist2 / m)))
        return 1 / (1 + js_div)  # Convert to similarity score

def main():
    # Initialize analyzer
    analyzer = ForensicCharacterAnalyzer()
    
    # Load data
    X_train, y_train, X_test, y_test = analyzer.load_data()
    
    # Create and compile model
    model = analyzer.create_model()
    model.compile(optimizer=Adam(learning_rate=0.001),
                 loss='sparse_categorical_crossentropy',
                 metrics=['accuracy'])
    
    # Train model
    print("Training model...")
    history = model.fit(X_train, y_train,
                       batch_size=128,
                       epochs=10,
                       validation_split=0.1,
                       verbose=1)
    
    # Example analysis
    print("\nPerforming forensic analysis on sample characters...")
    idx1, idx2 = np.random.randint(0, len(X_test), 2)
    comparison = analyzer.compare_characters(X_test[idx1], X_test[idx2])
    
    print("\nForensic Comparison Results:")
    for metric, value in comparison.items():
        print(f"{metric}: {value:.4f}")
    
    return analyzer, history

if __name__ == "__main__":
    analyzer, history = main()