In [33]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer


In [34]:
data = pd.read_csv('/kaggle/input/dataset0/data.csv')
data_specificity = pd.read_json('/kaggle/input/data-specificity/data-specificity.json')
data_personal_context = pd.read_json('/kaggle/input/personal-context/data-specificity-personal-context.json')
data_sensory_details = pd.read_json('/kaggle/input/sensory/data_sensory_details.json')

 Criteria for defining a personal event memory by Pillemer ( 1998 ) :  
*  (a) present a specific event that took place at a particular time and place, rather than a summary event or extended series of events.
* (b) contain a detailed account of the rememberer's own personal circumstances at the time of the event. 
* (c) evoke sensory images or bodily sensations that contribute to the feeling of "re-experiencing" or "reliving" the event.
* (d) link its details and images to a particular moment or moments of phenomenal experience. 
* (e) be believed to be a truthful representation of what actually transpired.

## 1. Specificity

In [35]:
import nltk
from nltk.tokenize import sent_tokenize, word_tokenize
from nltk.tag import pos_tag
from nltk.chunk import ne_chunk
import spacy

# Tokenization
def tokenize_text(text):
    return sent_tokenize(text)

# Part-of-Speech (POS) Tagging
def pos_tagging(text):
    tokens = word_tokenize(text)
    return pos_tag(tokens)


#  Named Entity Recognition (SpaCy)
def named_entity_recognition_spacy(text):
    nlp = spacy.load('en_core_web_sm')
    doc = nlp(text)
    named_entities = [ent.text for ent in doc.ents]
    return named_entities

# Event Extraction
def extract_events(text):
    tokens = word_tokenize(text)
    tagged = pos_tag(tokens)
    events = []
    current_event = []
    for tag in tagged:
        if tag[1].startswith('VB'):
            current_event.append(tag[0])
        elif current_event:
            events.append(' '.join(current_event))
            current_event = []
    if current_event:
        events.append(' '.join(current_event))
    return events



caused by: ['/opt/conda/lib/python3.10/site-packages/tensorflow_io/python/ops/libtensorflow_io_plugins.so: undefined symbol: _ZN3tsl6StatusC1EN10tensorflow5error4CodeESt17basic_string_viewIcSt11char_traitsIcEENS_14SourceLocationE']
caused by: ['/opt/conda/lib/python3.10/site-packages/tensorflow_io/python/ops/libtensorflow_io.so: undefined symbol: _ZTVN10tensorflow13GcsFileSystemE']


In [36]:
print(data.iloc[0]['Story'])

the loss of my father will forever leave an indelible mark on my heart it also provided me with an unwavering strength. It shaped me into a more resilient and compassionate person capable of facing adversity with newfound determination. I carry my father's memory with me drawing inspiration from his life and the lessons he imparted. Through this turning point I have learned that strength can emerge from even the darkest moments and I am committed to living a life that honors his legacy.


In [37]:
def specificity_spacy(story):
    sentences = tokenize_text(story)
    tagged_sentences = [pos_tagging(sentence) for sentence in sentences]
    named_entities = [named_entity_recognition_spacy(sentence) for sentence in sentences]
    print( "named entities", named_entities)
    events = [extract_events(sentence) for sentence in sentences]
    print("events", events)
    total_named_entities = sum(len(ne_list) for ne_list in named_entities)
    total_events = sum(len(event_list) for event_list in events)
    total_sentences = len(sentences)
    
    threshold = total_sentences // 5
    
    # Check if total_named_entities and total_events are greater than the threshold
    is_specific_story = ( total_named_entities > threshold ) & ( total_events > threshold )
    
    return is_specific_story

Testing the specifity_ne_spacy function :

In [38]:


# Example usage
story_text = "Once upon a time, in a faraway land, there lived a brave knight named Sir Lancelot..."
is_specific_story = specificity_spacy(story_text)
print("Is the story specific?", is_specific_story)


named entities [['Lancelot']]
events [['lived', 'named']]
Is the story specific? True


In [39]:


# Example usage
story_text = 'The annual science fair showcased impressive projects from young innovators. Students presented their research findings on various topics. A robot that can solve complex puzzles was a highlight of the event. Researchers discussed cutting-edge advancements in artificial intelligence. The fair concluded with an awards ceremony honoring the top projects.'
is_specific_story = specificity_spacy(story_text)
print("Is the story specific?", is_specific_story)


named entities [[], [], [], [], []]
events [['showcased'], ['presented'], ['solve', 'was'], ['discussed'], ['concluded', 'honoring']]
Is the story specific? False


In [40]:


# Example usage
story_text = 'The annual science called Gitex fair showcased impressive projects from young innovators. Students presented their research findings on various topics. A robot that can solve complex puzzles was a highlight of the event. Researchers discussed cutting-edge advancements in artificial intelligence. The fair concluded with an awards ceremony honoring the top projects.'
is_specific_story = specificity_spacy(story_text)
print("Is the story specific?", is_specific_story)


named entities [['Gitex'], [], [], [], []]
events [['called', 'showcased'], ['presented'], ['solve', 'was'], ['discussed'], ['concluded', 'honoring']]
Is the story specific? False


In [41]:
def evaluate_specificity_dataframe(data_frame):
    correct_predictions = 0
    total_stories = len(data_frame)

    for index, row in data_frame.iterrows():
        story = row['story']
        ground_truth_label = row['is_specific']
        
        predicted_label = specificity_spacy(story)
        
        # Convert boolean to string
        predicted_label_str = "yes" if predicted_label else "no"
        
        if predicted_label_str == ground_truth_label:
            correct_predictions += 1

    accuracy = correct_predictions / total_stories

    return accuracy

accuracy = evaluate_specificity_dataframe(data_specificity)
print(f"Accuracy: {accuracy:.2f}")


named entities [['Gitex'], [], [], [], []]
events [['called', 'showcased'], ['presented'], ['solve', 'was'], ['discussed'], ['concluded', 'honoring']]
named entities [[], [], [], [], []]
events [['set'], ['turned'], ['flew'], ['descended'], ['was captivating']]
named entities [[], [], [], [], []]
events [['walked'], ['honked', 'shouted'], ['took'], ['gave'], ['cheered']]
named entities [[], [], [], []]
events [['embarked'], ['sought', 'save'], ['encountered', 'forged'], ['were']]
named entities [['midnight'], ['Ali'], [], [], []]
events [['struck'], ['crossed'], ['walked'], ['whispered'], ['emerged']]
Accuracy: 0.60


In [42]:
def evaluate_precision(data_frame):
    true_positives = 0
    false_positives = 0

    for index, row in data_frame.iterrows():
        story = row['story']
        ground_truth_label = row['is_specific']

        predicted_label = specificity_spacy(story)
        
        if predicted_label and ground_truth_label == "yes":
            true_positives += 1
        elif predicted_label and ground_truth_label == "no":
            false_positives += 1

    precision = true_positives / (true_positives + false_positives)
    return precision

precision = evaluate_precision(data_specificity)
print(f"Precision: {precision:.2f}")


named entities [['Gitex'], [], [], [], []]
events [['called', 'showcased'], ['presented'], ['solve', 'was'], ['discussed'], ['concluded', 'honoring']]
named entities [[], [], [], [], []]
events [['set'], ['turned'], ['flew'], ['descended'], ['was captivating']]
named entities [[], [], [], [], []]
events [['walked'], ['honked', 'shouted'], ['took'], ['gave'], ['cheered']]
named entities [[], [], [], []]
events [['embarked'], ['sought', 'save'], ['encountered', 'forged'], ['were']]
named entities [['midnight'], ['Ali'], [], [], []]
events [['struck'], ['crossed'], ['walked'], ['whispered'], ['emerged']]
Precision: 1.00


In [43]:
def evaluate_recall(data_frame):
    true_positives = 0
    false_negatives = 0

    for index, row in data_frame.iterrows():
        story = row['story']
        ground_truth_label = row['is_specific']

        predicted_label = specificity_spacy(story)
        
        if predicted_label and ground_truth_label == "yes":
            true_positives += 1
        elif not predicted_label and ground_truth_label == "yes":
            false_negatives += 1

    recall = true_positives / (true_positives + false_negatives)
    return recall

recall = evaluate_recall(data_specificity)
print(f"Recall: {recall:.2f}")


named entities [['Gitex'], [], [], [], []]
events [['called', 'showcased'], ['presented'], ['solve', 'was'], ['discussed'], ['concluded', 'honoring']]
named entities [[], [], [], [], []]
events [['set'], ['turned'], ['flew'], ['descended'], ['was captivating']]
named entities [[], [], [], [], []]
events [['walked'], ['honked', 'shouted'], ['took'], ['gave'], ['cheered']]
named entities [[], [], [], []]
events [['embarked'], ['sought', 'save'], ['encountered', 'forged'], ['were']]
named entities [['midnight'], ['Ali'], [], [], []]
events [['struck'], ['crossed'], ['walked'], ['whispered'], ['emerged']]
Recall: 0.33


Here's what recall reflects:

Completeness: Recall indicates how well your model is capturing all instances of the positive class (in your case, the specific stories). A higher recall means that your model is successfully identifying most of the specific stories present in the dataset.

Missed Positive Cases: A low recall value suggests that your model is missing a significant portion of the positive cases. This could mean that the model is not sensitive enough to detect the specific stories, leading to false negatives.

Trade-off with Precision: Recall is often in conflict with precision. A high recall could lead to more false positives (cases incorrectly classified as positive), as the model may be more inclusive in classifying instances as positive. Balancing recall and precision is important depending on your use case.

# 2.Personal Context 

In [44]:
import nltk
from nltk.sentiment import SentimentIntensityAnalyzer
from gensim import corpora, models

def personal_context(text):
    # Tokenize text into sentences
    sentences = nltk.sent_tokenize(text)

    # Sentiment Analysis
    sia = SentimentIntensityAnalyzer()
    sentiment_scores = [sia.polarity_scores(sentence)["compound"] for sentence in sentences]

    # Filter emotional sentences based on sentiment threshold
    emotional_sentences = [sentence for i, sentence in enumerate(sentences) ]
    print(emotional_sentences)

    # Initialize the list to store teller's feelings
    teller_feelings = []
    
    # Pronoun and Verbal Analysis
    for sentence in emotional_sentences:
        words = nltk.word_tokenize(sentence.lower())
        person_pronoun = ["i", "me", "my", "mine"]
        verbal_indicators = ["felt", "was", "experienced", "sensed","loved","hated"]
        
        if any(pronoun in words for pronoun in person_pronoun ) and any(indicator in words for indicator in verbal_indicators):
            teller_feelings.append(sentence)
            #print(sentence)

    # Topic Modeling (only if there are teller's feelings)
    if teller_feelings:
        tokenized_sentences = [nltk.word_tokenize(sentence.lower()) for sentence in teller_feelings]
        dictionary = corpora.Dictionary(tokenized_sentences)
        corpus = [dictionary.doc2bow(tokens) for tokens in tokenized_sentences]
        lda_model = models.LdaModel(corpus, num_topics=3, id2word=dictionary)

        # Extract most significant topics
        topics = [lda_model.get_document_topics(doc) for doc in corpus]
        most_significant_topics = [max(topic, key=lambda x: x[1]) for topic in topics]

        # Get actual topics
        actual_topics = [lda_model.print_topic(topic[0]) for topic in most_significant_topics]
    else:
        actual_topics = []

    # Return emotional sentences, sentiment scores, and actual topics
    return teller_feelings, sentiment_scores, actual_topics

# Call the function with your input text
result = personal_context("The mine annual science called Gitex fair showcased impressive projects from young innovators. Students presented their research findings on various topics. A robot that can solve complex puzzles was a highlight of the event. Researchers discussed cutting-edge advancements in artificial intelligence. The fair concluded with an awards ceremony honoring the top projects. i loved it")
print(result)


['The mine annual science called Gitex fair showcased impressive projects from young innovators.', 'Students presented their research findings on various topics.', 'A robot that can solve complex puzzles was a highlight of the event.', 'Researchers discussed cutting-edge advancements in artificial intelligence.', 'The fair concluded with an awards ceremony honoring the top projects.', 'i loved it']
(['i loved it'], [0.6808, 0.0, 0.4939, 0.4767, 0.8555, 0.5994], ['0.338*"i" + 0.333*"loved" + 0.329*"it"'])


In [45]:
def has_personal_context(story, sentiment_threshold=0.5):
    # Run the personal_context function to get the results
    teller_feelings, sentiment_scores, actual_topics = personal_context(story)
    
    # Check if teller_stories is not empty and sentiment score is above threshold
    if teller_feelings and any(score > sentiment_threshold for score in sentiment_scores):
        return "yes"
    else:
        return "no"

# Call the has_personal_context function with your input story
result = has_personal_context("The annual science called Gitex fair showcased impressive projects from young innovators. Students presented their research findings on various topics. A robot that can solve complex puzzles was a highlight of the event. Researchers discussed cutting-edge advancements in artificial intelligence. The fair concluded with an awards ceremony honoring the top projects. i loved it")
print(result)


['The annual science called Gitex fair showcased impressive projects from young innovators.', 'Students presented their research findings on various topics.', 'A robot that can solve complex puzzles was a highlight of the event.', 'Researchers discussed cutting-edge advancements in artificial intelligence.', 'The fair concluded with an awards ceremony honoring the top projects.', 'i loved it']
yes


In [46]:
def accuracy_personal_context(data_frame):
    correct_predictions = 0
    total_stories = len(data_frame)

    for index, row in data_frame.iterrows():
        story = row['story']
        ground_truth_label = row['has_personal_context']
        
        predicted_label = has_personal_context(story)
        
        
        if predicted_label == ground_truth_label:
            correct_predictions += 1

    accuracy = correct_predictions / total_stories

    return accuracy

accuracy = accuracy_personal_context(data_personal_context)
print(f"Accuracy: {accuracy:.2f}")


['The annual science called Gitex fair showcased impressive projects from young innovators.', 'Students presented their research findings on various topics.', 'A robot that can solve complex puzzles was a highlight of the event.', 'Researchers discussed cutting-edge advancements in artificial intelligence.', 'The fair concluded with an awards ceremony honoring the top projects.']
['The sun set behind the mountains.', 'The sky turned into shades of orange and pink.', 'Birds flew back to their nests.', 'A peaceful silence descended upon the landscape.', 'The beauty of nature was captivating.']
['In a bustling city, people hurriedly walked down the streets.', 'Cars honked and street vendors shouted.', 'A political rally took place at the central square.', 'The mayor gave an impassioned speech.', 'Citizens cheered for their favorite candidates.', 'I loved it a lot.', 'It was very moving']
['Im a young sorcerer who embarked on a quest.', 'I sought a magical artifact to save my kingdom.', 'A

In [47]:
def precision_personal_context(data_frame):
    
    true_positives = 0 
    false_positives = 0

    for index, row in data_frame.iterrows():
        story = row['story']
        ground_truth_label = row['has_personal_context']
        
        predicted_label = has_personal_context(story)
        
        
        if predicted_label and ground_truth_label == "yes":
            true_positives += 1
        elif predicted_label and ground_truth_label == "no":
            false_positives += 1

    precision = true_positives / (true_positives + false_positives)
    return precision

precision = precision_personal_context(data_personal_context)
print(f"Precision: {precision:.2f}")


['The annual science called Gitex fair showcased impressive projects from young innovators.', 'Students presented their research findings on various topics.', 'A robot that can solve complex puzzles was a highlight of the event.', 'Researchers discussed cutting-edge advancements in artificial intelligence.', 'The fair concluded with an awards ceremony honoring the top projects.']
['The sun set behind the mountains.', 'The sky turned into shades of orange and pink.', 'Birds flew back to their nests.', 'A peaceful silence descended upon the landscape.', 'The beauty of nature was captivating.']
['In a bustling city, people hurriedly walked down the streets.', 'Cars honked and street vendors shouted.', 'A political rally took place at the central square.', 'The mayor gave an impassioned speech.', 'Citizens cheered for their favorite candidates.', 'I loved it a lot.', 'It was very moving']
['Im a young sorcerer who embarked on a quest.', 'I sought a magical artifact to save my kingdom.', 'A

In [48]:
def recall_personal_context(data_frame):
    true_positives = 0 
    false_negatives = 0 

    for index, row in data_frame.iterrows():
        story = row['story']
        ground_truth_label = row['has_personal_context']
        
        predicted_label = has_personal_context(story)
        
        
        if predicted_label and ground_truth_label == "yes":
            true_positives += 1
        elif not predicted_label and ground_truth_label == "yes":
            false_negatives += 1

    recall = true_positives / (true_positives + false_negatives)
    return recall

recall = recall_personal_context(data_personal_context)
print(f"Recall: {recall:.2f}")


['The annual science called Gitex fair showcased impressive projects from young innovators.', 'Students presented their research findings on various topics.', 'A robot that can solve complex puzzles was a highlight of the event.', 'Researchers discussed cutting-edge advancements in artificial intelligence.', 'The fair concluded with an awards ceremony honoring the top projects.']
['The sun set behind the mountains.', 'The sky turned into shades of orange and pink.', 'Birds flew back to their nests.', 'A peaceful silence descended upon the landscape.', 'The beauty of nature was captivating.']
['In a bustling city, people hurriedly walked down the streets.', 'Cars honked and street vendors shouted.', 'A political rally took place at the central square.', 'The mayor gave an impassioned speech.', 'Citizens cheered for their favorite candidates.', 'I loved it a lot.', 'It was very moving']
['Im a young sorcerer who embarked on a quest.', 'I sought a magical artifact to save my kingdom.', 'A

## 3.Sensory details 

In [49]:
import nltk

def analyze_sensory_details(text):
    sentences = nltk.sent_tokenize(text)

    sensory_keywords = {
        'sight': ['see', 'look', 'watch', 'observe', 'gaze', 'view', 'stare'],
        'sound': ['hear', 'listen', 'sound', 'noise', 'auditory', 'echo'],
        'smell': ['smell', 'scent', 'aroma', 'fragrance', 'odor', 'perfume'],
        'taste': ['taste', 'flavor', 'savor', 'palate', 'tasty', 'delicious'],
        'touch': ['feel', 'touch', 'texture', 'tactile', 'surface', 'contact']
    }

    sensory_details = []
    for sentence in sentences:
        lower_sentence = sentence.lower()
        for sense, keywords in sensory_keywords.items():
            for keyword in keywords:
                if keyword in lower_sentence:
                    sensory_details.append({'sense': sense, 'sentence': sentence})
                    break

    return sensory_details


In [50]:
def has_sensory_details(text):
    sensory_details = analyze_sensory_details(text)
    return "yes" if sensory_details else "no"

text = "The sun sets over the mountains, casting a warm orange glow. Birds sing in the trees, and the scent of blooming flowers fills the air."
result = has_sensory_details(text)
print(result)

yes


In [51]:
has_sensory_details(data.iloc[2]['Story'])

'yes'

In [52]:
def accuracy_sensory_details(data_frame):
    correct_predictions = 0
    total_stories = len(data_frame)

    for index, row in data_frame.iterrows():
        story = row['story']
        ground_truth_label = row['has_sensory_details']
        
        predicted_label = has_personal_context(story)
        
        
        if predicted_label == ground_truth_label:
            correct_predictions += 1

    accuracy = correct_predictions / total_stories

    return accuracy

accuracy = accuracy_sensory_details(data_sensory_details)
print(f"Accuracy: {accuracy:.2f}")


['The annual science called Gitex fair showcased impressive projects from young innovators.', 'Students presented their research findings on various topics.', 'A robot that can solve complex puzzles was a highlight of the event.', 'Researchers discussed cutting-edge advancements in artificial intelligence.', 'The fair concluded with an awards ceremony honoring the top projects.']
['The sun set behind the mountains.', 'I saw the sky turning into shades of orange and pink.', 'Birds flew back to their nests.', 'A peaceful silence descended upon the landscape.', 'The beauty of nature was captivating.']
['In a bustling city, people hurriedly walked down the streets.', 'Cars honked and street vendors shouted.', 'A political rally took place at the central square.', 'The mayor gave an impassioned speech.', 'Citizens cheered for their favorite candidates.', 'I loved it a lot.', 'It was very moving']
['Im a young sorcerer who embarked on a quest.', 'I sought a magical artifact to save my kingdo

In [53]:
def precision_sensory_details(data_frame):
    
    true_positives = 0 
    false_positives = 0

    for index, row in data_frame.iterrows():
        story = row['story']
        ground_truth_label = row['has_sensory_details']
        
        predicted_label = has_sensory_details(story)
        
        
        if predicted_label and ground_truth_label == "yes":
            true_positives += 1
        elif predicted_label and ground_truth_label == "no":
            false_positives += 1

    precision = true_positives / (true_positives + false_positives)
    return precision

precision = precision_sensory_details(data_sensory_details)
print(f"Precision: {precision:.2f}")


Precision: 0.60


In [54]:
def recall_sensory_details(data_frame):
    true_positives = 0 
    false_negatives = 0 

    for index, row in data_frame.iterrows():
        story = row['story']
        ground_truth_label = row['has_sensory_details']
        
        predicted_label = has_personal_context(story)
        
        
        if predicted_label and ground_truth_label == "yes":
            true_positives += 1
        elif not predicted_label and ground_truth_label == "yes":
            false_negatives += 1

    recall = true_positives / (true_positives + false_negatives)
    return recall

recall = recall_sensory_details(data_sensory_details)
print(f"Recall: {recall:.2f}")


['The annual science called Gitex fair showcased impressive projects from young innovators.', 'Students presented their research findings on various topics.', 'A robot that can solve complex puzzles was a highlight of the event.', 'Researchers discussed cutting-edge advancements in artificial intelligence.', 'The fair concluded with an awards ceremony honoring the top projects.']
['The sun set behind the mountains.', 'I saw the sky turning into shades of orange and pink.', 'Birds flew back to their nests.', 'A peaceful silence descended upon the landscape.', 'The beauty of nature was captivating.']
['In a bustling city, people hurriedly walked down the streets.', 'Cars honked and street vendors shouted.', 'A political rally took place at the central square.', 'The mayor gave an impassioned speech.', 'Citizens cheered for their favorite candidates.', 'I loved it a lot.', 'It was very moving']
['Im a young sorcerer who embarked on a quest.', 'I sought a magical artifact to save my kingdo