In [10]:
import spacy
from grakel import GraphKernel, Graph
import numpy as np


In [11]:

# Load a spaCy model for dependency parsing
nlp = spacy.load("en_core_web_sm")

In [12]:

def construct_dependency_graphs(sentences):
    graphs = []
    for sentence in sentences:
        doc = nlp(sentence)
        adjacency_list = {}
        node_labels = {}  # A dictionary to store the labels of nodes
        
        for token in doc:
            # We'll use token index as the node identifier
            adjacency_list[token.i] = [child.i for child in token.children]
            node_labels[token.i] = token.lemma_  # Using lemma as a simple node label

        # Create a Grakel Graph with node labels
        g = Graph(adjacency_list, node_labels=node_labels, graph_format='adjacency')
        graphs.append(g)
    return graphs



def calculate_syntactic_diversity(graphs, measure='wl'):
    kernel = GraphKernel(kernel=[{"name": "weisfeiler_lehman", "n_iter": 5}, {"name": "subtree_wl"}], normalize=True)
    # Compute the kernel matrix
    K = kernel.fit_transform(graphs)
    
    if measure == 'wl':  # Weisfeiler-Lehman kernel measure
        # Use kernel matrix to calculate diversity; more similar structures will have higher kernel values
        diversity_scores = 1 - np.mean(K, axis=1)  # Diversity as 1 - average similarity
        syntactic_diversity = np.mean(diversity_scores)
    else:
        raise ValueError("Unsupported measure for syntactic diversity")
    return syntactic_diversity


In [13]:


# Example text 
#PROMPT: Generate a Story about love.
#gpt3.5
file_path1 = "/home/vasi/Documents/BA_Thesis_Experiment/metrics/sample1.txt"
#gpt4
file_path2 = "/home/vasi/Documents/BA_Thesis_Experiment/metrics/sample2.txt"

with open(file_path1, 'r', encoding="utf-8") as file1:
    sentences1 = file1.read().split('.')
    print(sentences1)
with open(file_path2, 'r', encoding="utf-8") as file2:
    sentences2 = file2.read().split('.')

graphs1 = construct_dependency_graphs(sentences1)
graphs2 = construct_dependency_graphs(sentences2)

syntactic_diversity1 = calculate_syntactic_diversity(graphs1)
print("Syntactic diversity 1:", syntactic_diversity1)

syntactic_diversity2 = calculate_syntactic_diversity(graphs2)
print("Syntactic diversity 2:", syntactic_diversity2)


['Once upon a time, in a quaint little village nestled between rolling hills and whispering forests, there lived two souls destined for each other', ' Their names were Emily and Jack', ' Emily was a lively young woman with a heart as boundless as the open sky, while Jack was a gentle soul, with eyes that held the wisdom of ages', '\n\nTheir paths first crossed on a sunny afternoon in the village square', ' Emily had been selling her handcrafted jewelry at the weekly market, her creations shimmering in the sunlight like fragments of a rainbow', ' Jack, a painter by trade, was captivated by her vibrant spirit and the way she seemed to breathe life into the world around her', '\n\nAs weeks turned into months, their chance encounters at the market grew into intentional meetings', ' They shared stories and dreams beneath the shade of an ancient oak tree, their laughter mingling with the rustle of leaves in the breeze', ' With each passing day, their bond deepened, like the roots of the tree

TypeError: cannot unpack non-iterable NoneType object