In [7]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.decomposition import PCA


In [11]:
data = pd.read_csv('/kaggle/input/dataset0/data.csv')

 Criteria for defining a personal event memory by Pillemer ( 1998 ) :  
*  (a) present a specific event that took place at a particular time and place, rather than a summary event or extended series of events.
* (b) contain a detailed account of the rememberer's own personal circumstances at the time of the event. 
* (c) evoke sensory images or bodily sensations that contribute to the feeling of "re-experiencing" or "reliving" the event.
* (d) link its details and images to a particular moment or moments of phenomenal experience. 
* (e) be believed to be a truthful representation of what actually transpired.

## 1. Specificity

In [8]:
import nltk
from nltk.tokenize import sent_tokenize, word_tokenize
from nltk.tag import pos_tag
from nltk.chunk import ne_chunk

# Tokenization
def tokenize_text(text):
    return sent_tokenize(text)

# Part-of-Speech (POS) Tagging
def pos_tagging(text):
    tokens = word_tokenize(text)
    return pos_tag(tokens)


#  Named Entity Recognition (NER)
def named_entity_recognition(text):
    tokens = word_tokenize(text)
    tagged = pos_tag(tokens)
    ne_tree = ne_chunk(tagged)
    named_entities = []
    for chunk in ne_tree:
        if hasattr(chunk, 'label') and chunk.label() == 'NE':
            named_entities.append(' '.join(c[0] for c in chunk))
    return named_entities

# Event Extraction
def extract_events(text):
    tokens = word_tokenize(text)
    tagged = pos_tag(tokens)
    events = []
    current_event = []
    for tag in tagged:
        if tag[1].startswith('VB'):
            current_event.append(tag[0])
        elif current_event:
            events.append(' '.join(current_event))
            current_event = []
    if current_event:
        events.append(' '.join(current_event))
    return events





### Testing function for specificity

In [9]:
print(data.iloc[0]['Story'])

NameError: name 'data' is not defined

In [22]:
def specificity_ne(story):
    sentences = tokenize_text(story)
    print('Sentences:', sentences)
    print()
    tagged_sentences = [pos_tagging(sentence) for sentence in sentences]
    print('Tagged Sentences:', tagged_sentences)
    print()
    named_entities = [named_entity_recognition(sentence) for sentence in sentences]
    print('Named Entities:', named_entities)
    print()
    events = [extract_events(sentence) for sentence in sentences]
    print('Events:', events)
    print()


Testing

In [23]:
specificity_ne(data.iloc[0]['Story'])

Sentences: ['the loss of my father will forever leave an indelible mark on my heart it also provided me with an unwavering strength it shaped me into a more resilient and compassionate person capable of facing adversity with newfound determination i carry my fathers memory with me drawing inspiration from his life and the lessons he imparted through this turning point i have learned that strength can emerge from even the darkest moments and i am committed to living a life that honors his legacy']

Tagged Sentences: [[('the', 'DT'), ('loss', 'NN'), ('of', 'IN'), ('my', 'PRP$'), ('father', 'NN'), ('will', 'MD'), ('forever', 'VB'), ('leave', 'VB'), ('an', 'DT'), ('indelible', 'JJ'), ('mark', 'NN'), ('on', 'IN'), ('my', 'PRP$'), ('heart', 'NN'), ('it', 'PRP'), ('also', 'RB'), ('provided', 'VBD'), ('me', 'PRP'), ('with', 'IN'), ('an', 'DT'), ('unwavering', 'JJ'), ('strength', 'NN'), ('it', 'PRP'), ('shaped', 'VBD'), ('me', 'PRP'), ('into', 'IN'), ('a', 'DT'), ('more', 'RBR'), ('resilient', 

The model is not able to capture named entities. Let's try with another example  

In [24]:
specificity_ne(data.iloc[1]['Story'])

Sentences: ['i adopted a cat 7 months ago i wasnt planning it and it was like a dream to me and one day my mom entered the house with a kitten in her hands and it was the happiest day of my life i couldnt believe it and since then lili  my cat became the only being that i really love from the bottom of my heart']

Tagged Sentences: [[('i', 'NN'), ('adopted', 'VBD'), ('a', 'DT'), ('cat', 'JJ'), ('7', 'CD'), ('months', 'NNS'), ('ago', 'IN'), ('i', 'JJ'), ('wasnt', 'VBP'), ('planning', 'VBG'), ('it', 'PRP'), ('and', 'CC'), ('it', 'PRP'), ('was', 'VBD'), ('like', 'IN'), ('a', 'DT'), ('dream', 'NN'), ('to', 'TO'), ('me', 'PRP'), ('and', 'CC'), ('one', 'CD'), ('day', 'NN'), ('my', 'PRP$'), ('mom', 'NN'), ('entered', 'VBD'), ('the', 'DT'), ('house', 'NN'), ('with', 'IN'), ('a', 'DT'), ('kitten', 'NN'), ('in', 'IN'), ('her', 'PRP$'), ('hands', 'NNS'), ('and', 'CC'), ('it', 'PRP'), ('was', 'VBD'), ('the', 'DT'), ('happiest', 'JJS'), ('day', 'NN'), ('of', 'IN'), ('my', 'PRP$'), ('life', 'NN'), (

 NE was not able to capture any named entity. Let's try with other python library. 
 Let's start with SpaCy

In [25]:
import spacy

def named_entity_recognition_spacy(text):
    nlp = spacy.load('en_core_web_sm')
    doc = nlp(text)
    named_entities = [ent.text for ent in doc.ents]
    return named_entities


In [26]:
named_entities = [named_entity_recognition_spacy(sentence) for sentence in sentences]
print('Named Entities:', named_entities)

Named Entities: [['7 months ago']]


In [27]:
def specificity_ne_spacy(story):
    sentences = tokenize_text(story)
    print('Sentences:', sentences)
    tagged_sentences = [pos_tagging(sentence) for sentence in sentences]
    print('Tagged Sentences:', tagged_sentences)
    named_entities = [named_entity_recognition_spacy(sentence) for sentence in sentences]
    print('Named Entities:', named_entities)
    events = [extract_events(sentence) for sentence in sentences]
    print('Events:', events)


Testing the specifity_ne_spacy function :

In [28]:
specificity_ne_spacy(data.iloc[0]['Story'])

Sentences: ['the loss of my father will forever leave an indelible mark on my heart it also provided me with an unwavering strength it shaped me into a more resilient and compassionate person capable of facing adversity with newfound determination i carry my fathers memory with me drawing inspiration from his life and the lessons he imparted through this turning point i have learned that strength can emerge from even the darkest moments and i am committed to living a life that honors his legacy']
Tagged Sentences: [[('the', 'DT'), ('loss', 'NN'), ('of', 'IN'), ('my', 'PRP$'), ('father', 'NN'), ('will', 'MD'), ('forever', 'VB'), ('leave', 'VB'), ('an', 'DT'), ('indelible', 'JJ'), ('mark', 'NN'), ('on', 'IN'), ('my', 'PRP$'), ('heart', 'NN'), ('it', 'PRP'), ('also', 'RB'), ('provided', 'VBD'), ('me', 'PRP'), ('with', 'IN'), ('an', 'DT'), ('unwavering', 'JJ'), ('strength', 'NN'), ('it', 'PRP'), ('shaped', 'VBD'), ('me', 'PRP'), ('into', 'IN'), ('a', 'DT'), ('more', 'RBR'), ('resilient', '

In [29]:
specificity_ne_spacy(data.iloc[1]['Story'])

Sentences: ['i adopted a cat 7 months ago i wasnt planning it and it was like a dream to me and one day my mom entered the house with a kitten in her hands and it was the happiest day of my life i couldnt believe it and since then lili  my cat became the only being that i really love from the bottom of my heart']
Tagged Sentences: [[('i', 'NN'), ('adopted', 'VBD'), ('a', 'DT'), ('cat', 'JJ'), ('7', 'CD'), ('months', 'NNS'), ('ago', 'IN'), ('i', 'JJ'), ('wasnt', 'VBP'), ('planning', 'VBG'), ('it', 'PRP'), ('and', 'CC'), ('it', 'PRP'), ('was', 'VBD'), ('like', 'IN'), ('a', 'DT'), ('dream', 'NN'), ('to', 'TO'), ('me', 'PRP'), ('and', 'CC'), ('one', 'CD'), ('day', 'NN'), ('my', 'PRP$'), ('mom', 'NN'), ('entered', 'VBD'), ('the', 'DT'), ('house', 'NN'), ('with', 'IN'), ('a', 'DT'), ('kitten', 'NN'), ('in', 'IN'), ('her', 'PRP$'), ('hands', 'NNS'), ('and', 'CC'), ('it', 'PRP'), ('was', 'VBD'), ('the', 'DT'), ('happiest', 'JJS'), ('day', 'NN'), ('of', 'IN'), ('my', 'PRP$'), ('life', 'NN'), ('

# 2.Personal Context 

In [12]:
import nltk
from nltk.sentiment import SentimentIntensityAnalyzer
from gensim import corpora, models

def evaluate_personal_context(text):
    # Tokenize text into sentences
    sentences = nltk.sent_tokenize(text)

    # Sentiment Analysis
    sia = SentimentIntensityAnalyzer()
    sentiment_scores = [sia.polarity_scores(sentence)["compound"] for sentence in sentences]

    # Topic Modeling
    tokenized_sentences = [nltk.word_tokenize(sentence.lower()) for sentence in sentences]
    dictionary = corpora.Dictionary(tokenized_sentences)
    corpus = [dictionary.doc2bow(tokens) for tokens in tokenized_sentences]
    lda_model = models.LdaModel(corpus, num_topics=3, id2word=dictionary)

    # Extract most significant topics
    topics = [lda_model.get_document_topics(doc) for doc in corpus]
    most_significant_topics = [max(topic, key=lambda x: x[1]) for topic in topics]

    # Get actual topics
    actual_topics = [lda_model.print_topic(topic[0]) for topic in most_significant_topics]

    # Return sentiment scores and actual topics
    return sentiment_scores, actual_topics


In the context of topic modeling with LDA, the weights assigned to each word in a topic represent the importance or prevalence of that word within the topic. In the output you provided

Understanding the significance of individual words and their weights within a topic can help provide insights into the key themes and subjects present in the text.

In [13]:
def test_personal_context(text):
    sentiment_scores, actual_topics = evaluate_personal_context(text)
    print("Sentiment Scores:", sentiment_scores)
    print()
    print("Actual Topics:")
    for topic in actual_topics:
        print(topic)

In [14]:
test_personal_context(data.iloc[0]['Story'])

Sentiment Scores: [0.1779, 0.7152, 0.5267, 0.6597]

Actual Topics:
0.039*"my" + 0.033*"an" + 0.028*"the" + 0.028*"." + 0.027*"strength" + 0.026*"it" + 0.025*"of" + 0.025*"with" + 0.024*"me" + 0.023*"will"
0.038*"and" + 0.036*"." + 0.035*"i" + 0.032*"with" + 0.030*"me" + 0.028*"life" + 0.027*"the" + 0.027*"from" + 0.026*"his" + 0.023*"a"
0.038*"and" + 0.036*"." + 0.035*"i" + 0.032*"with" + 0.030*"me" + 0.028*"life" + 0.027*"the" + 0.027*"from" + 0.026*"his" + 0.023*"a"
0.038*"and" + 0.036*"." + 0.035*"i" + 0.032*"with" + 0.030*"me" + 0.028*"life" + 0.027*"the" + 0.027*"from" + 0.026*"his" + 0.023*"a"


In [15]:
test_personal_context("i lost my best friend and im so happy")

Sentiment Scores: [0.8966]

Actual Topics:
0.115*"im" + 0.113*"lost" + 0.112*"my" + 0.111*"and" + 0.111*"so" + 0.111*"i" + 0.110*"friend" + 0.110*"happy" + 0.106*"best"


the word "my" has the highest weight of 0.102 for the extracted topic.

A high weight for the word "my" suggests that it is a significant term within the topic identified by the LDA model. This means that the word "my" occurs frequently and carries substantial importance within the text when discussing the particular topic associated with that topic index.

In this case, it indicates that personal ownership or possession, likely related to the topic of loss and enduring emotional impact, plays a prominent role in the text. The word "my" may be indicating a personal connection or the speaker's individual perspective in relation to the topic being discussed.

## 3.Sensory details 

In [21]:
import nltk

def analyze_sensory_details(text):
    sentences = nltk.sent_tokenize(text)

    sensory_keywords = {
        'sight': ['see', 'look', 'watch'],
        'sound': ['hear', 'listen', 'sound'],
        'smell': ['smell', 'scent', 'aroma'],
        'taste': ['taste', 'flavor'],
        'touch': ['feel', 'touch', 'texture']
    }

    sensory_details = []
    for sentence in sentences:
        lower_sentence = sentence.lower()
        for sense, keywords in sensory_keywords.items():
            for keyword in keywords:
                if keyword in lower_sentence:
                    sensory_details.append({'sense': sense, 'sentence': sentence})
                    break

    return sensory_details


In [22]:
def test_sensory_details(text):
    sensory_details = analyze_sensory_details(text)
    print("Sensory Details:")
    for detail in sensory_details:
        print(f"{detail['sense']}: {detail['sentence']}")

In [25]:
test_sensory_details(data.iloc[2]['Story'])

Sensory Details:
sight: It would range from learning random science things to make myself look like I'm some sort of a prodigy child to stuff like learning how to play as many sports as possible to look like I was athletic (I wasn't that good at any of them just okay).


## 4.Phenominal experience 

using emotional analysis 

In [25]:
from nltk.sentiment import SentimentIntensityAnalyzer

def analyze_emotional_tone(text):
    sia = SentimentIntensityAnalyzer()
    sentiment_scores = sia.polarity_scores(text)
    return sentiment_scores

speech = "The loss of my father will forever leave an indelible mark on my heart. But im so happy that he died"

emotion_scores = analyze_emotional_tone(speech)
print("Emotion Scores:", emotion_scores)


Emotion Scores: {'neg': 0.246, 'neu': 0.589, 'pos': 0.164, 'compound': -0.0922}


## Truthfulness