In [120]:
# task: turn text into non-fluent aphasic speech, based on Misra et al (2022) 
# current: basic code, maybe try to speed up transformation

In [121]:
import spacy
from spacy.matcher import Matcher
import numpy as np
import random
import pyphen
nlp = spacy.load("en_core_web_sm")

In [122]:
vp_pattern = [ [{'POS': 'VERB', 'OP': '?'},
           {'POS': 'ADV', 'OP': '*'},
           {'POS': 'AUX', 'OP': '*'},
           {'POS': 'VERB', 'OP': '+'}] ] 
matcher = Matcher(nlp.vocab)
matcher.add("Verb phrase", vp_pattern)

a = pyphen.Pyphen(lang='en')
fillers = ["ah [...] ", "er [...] ", "oh [...] "]

In [123]:
def aphasic_speech(text, doc):
    n = 15
    aphasic_utt = ""
    
    if len(text.split()) <= n:  
        # get NPs
        noun_phrases = set()
        for nc in doc.noun_chunks:
            for nop in [nc, doc[nc.root.left_edge.i:nc.root.right_edge.i+1]]:
                noun_phrases.add(nop.text.strip())                   
        print(noun_phrases)      
        
        # get VPs
        verb_phrases = matcher(doc)
        verb_phrases = [doc[start:end] for _, start, 
                        end in verb_phrases]
        print(verb_phrases)
        
        try:
            ratio = len(noun_phrases)/len(verb_phrases)
        except:
            print("Division by zero")
            return 
        print(ratio)
        
        X = np.random.uniform(0,1)
        
        if ratio > 2 and X <= 0.8:
            # skip sentence
            return aphasic_utt
        
        else:
            # dont skip sentence
            for tok in doc:
                
                filler_x = np.random.uniform(0,1)                
                if len(aphasic_utt.split())%4 == 0 or filler_x > 0.5:
                    # add filler
                    aphasic_utt += random.choice(fillers)
                
                word_sub = np.random.uniform(0,1)
                # if word_sub > 0.9:
                    # TODO: substitution based on levenshtein distance
                    
                if tok.dep_ in ["det", "prep", "cop", "aux"]: 
                    # determiners, prepositions, copulas
                    Y = np.random.uniform(0,1)
                    if Y > 0.9:
                        aphasic_utt += tok.text + " "
                        
                elif tok.pos_ in ["ADJ", "ADV"]:      
                    # adjectives, adverbs
                    Z = np.random.uniform(0,1)
                    if Z > 0.5:
                        aphasic_utt += tok.text + " "
                
                elif tok.pos_ == "VERB":
                    # verbs
                    aphasic_utt += tok.lemma_ + " "
                
                else:
                    # all other pos
                    aphasic_utt += tok.text + " "
    
    return aphasic_utt

In [124]:
text1 = "I want an apple."
doc1 = nlp(text1)

print(a.inserted('apple'))

for tok in doc1:
    print(tok.pos_, tok.dep_)

apple
PRON nsubj
VERB ROOT
DET det
NOUN dobj
PUNCT punct


In [125]:
print(aphasic_speech(text1, doc1))

{'an apple', 'I'}
[want]
2.0
oh [...] I want ah [...] er [...] apple ah [...] . 
