In [123]:
from dotenv import load_dotenv

load_dotenv()

True

In [124]:
responses = [
"I organized weekly food drives to ensure that everyone in our neighborhood had enough to eat during the economic downturn.",

"During the floods, I volunteered my boat for rescue operations to help evacuate families stranded in their homes.",

"I set up a community-wide online platform for sharing resources and information to keep everyone connected and informed during the pandemic.",

"As a teacher, I held free online tutoring sessions to assist students with their studies when schools were closed.",

"I spearheaded a local campaign to support small businesses struggling to survive during the lockdowns, encouraging residents to buy local.",

"By offering my garage as a collection point, I helped coordinate the distribution of emergency supplies to those affected by the wildfires.",

"I used my skills as a nurse to provide first aid training to community members, preparing them for potential medical emergencies.",

"During the power outage, I invited neighbors to charge their devices using my solar-powered generator, keeping them connected with their loved ones.",

"I contributed by translating emergency alerts and health advisories into several languages, helping our diverse community stay safe and informed.",

"I organized peaceful community dialogues to address tensions and promote understanding among different groups in our area.",

"By volunteering at the local shelter, I helped provide a safe space and necessary resources for the homeless during the severe winter storms.",

"I launched a mental health support group to help my community cope with the stress and anxiety brought on by continuous uncertainty.",

"I facilitated pet rescue operations and temporary shelters for animals displaced by the hurricane, ensuring they were not forgotten.",

"During the crisis, I donated part of my warehouse for use as a community center, where people could gather for support and to receive aid.",

"I offered free legal advice to residents facing eviction, helping them navigate their rights and options in these challenging times.",

"As a local journalist, I reported on the ground realities, bringing national attention to the needs of our community during the disaster.",

"I helped elderly neighbors with grocery shopping and essential errands, especially when it was unsafe for them to go outside during the health crisis.",

"By creating art murals around the city, I aimed to uplift spirits and foster a sense of hope and resilience among community members.",

"I organized a series of skill-sharing workshops to empower residents with new abilities to tackle the economic challenges ahead.",

"By coordinating with local authorities, I helped streamline aid processes, ensuring quicker and more efficient relief distribution.",
]

In [125]:
import yake
from sklearn.decomposition import LatentDirichletAllocation
from sklearn.feature_extraction.text import CountVectorizer

def extract_keywords(text):
    kw_extractor = yake.KeywordExtractor()
    keywords = kw_extractor.extract_keywords(text)
    return [kw[0] for kw in keywords if kw[1] < 0.1]  # Filter to get more relevant keywords


def identify_topics(texts, num_topics=5):
    vectorizer = CountVectorizer(max_df=0.95, min_df=2, stop_words='english')
    doc_term_matrix = vectorizer.fit_transform(texts)
    
    lda = LatentDirichletAllocation(n_components=num_topics, random_state=0)
    lda.fit(doc_term_matrix)
    
    for i, topic in enumerate(lda.components_):
        print(f"Top words in topic #{i}:")
        print([vectorizer.get_feature_names_out()[i] for i in topic.argsort()[-10:]])
        

identify_topics(responses)


Top words in topic #0:
['informed', 'sharing', 'helped', 'aid', 'support', 'members', 'provide', 'resources', 'local', 'community']
Top words in topic #1:
['community', 'local', 'aid', 'emergency', 'help', 'helped', 'distribution', 'ensuring', 'operations', 'rescue']
Top words in topic #2:
['emergency', 'health', 'sharing', 'informed', 'safe', 'community', 'economic', 'helping', 'residents', 'organized']
Top words in topic #3:
['helped', 'help', 'residents', 'health', 'crisis', 'online', 'free', 'support', 'community', 'aid']
Top words in topic #4:
['free', 'aid', 'residents', 'help', 'community', 'crisis', 'connected', 'helped', 'health', 'neighbors']


In [127]:
import spacy
nlp = spacy.load("en_core_web_sm")  # Load SpaCy English model

def extract_entities(responses):
    for response in responses:
        doc = nlp(response)
        objects = []
        communities = []
        for token in doc:
            # Extracting Objects based on dependency parsing
            if token.dep_ in ['dobj', 'attr', 'pobj']:  # direct objects, attributes, object of preposition
                objects.append(token.text)
            # Extracting Groups/Communities if mentioned explicitly
            if token.ent_type_ in ['NORP', 'ORG', 'GPE']:  # nationalities, organizations, geopolitical entities
                communities.append(token.text)
            # Additional heuristic: Check for noun chunks that might be groups
            for chunk in doc.noun_chunks:
                if 'community' in chunk.text or 'neighborhood' in chunk.text or 'families' in chunk.text:
                    communities.append(chunk.text)

        print(f"Response: {response}")
        print("Extracted Objects:", list(set(objects)))
        print("Extracted Groups/Communities:", list(set(communities)))
        print()

extract_entities(responses)

Response: I organized weekly food drives to ensure that everyone in our neighborhood had enough to eat during the economic downturn.
Extracted Objects: ['downturn', 'enough', 'neighborhood', 'drives']
Extracted Groups/Communities: ['our neighborhood']

Response: During the floods, I volunteered my boat for rescue operations to help evacuate families stranded in their homes.
Extracted Objects: ['floods', 'homes', 'families', 'operations', 'boat']
Extracted Groups/Communities: ['families']

Response: I set up a community-wide online platform for sharing resources and information to keep everyone connected and informed during the pandemic.
Extracted Objects: ['platform', 'resources', 'everyone', 'pandemic']
Extracted Groups/Communities: ['a community-wide online platform']

Response: As a teacher, I held free online tutoring sessions to assist students with their studies when schools were closed.
Extracted Objects: ['studies', 'teacher', 'sessions', 'students']
Extracted Groups/Communitie

In [114]:
import os
from neo4j import GraphDatabase, Driver, Transaction

# Create a driver instance
driver = GraphDatabase.driver(
    os.getenv("NEO4J_URI"),
    auth=(os.getenv("NEO4J_USERNAME"), os.getenv("NEO4J_PASSWORD"))
)
# Optionally, check if you want to verify connectivity at the start
driver.verify_connectivity()

# Clear the database
driver.session().run("MATCH (n) DETACH DELETE n")

<neo4j._sync.work.result.Result at 0x153ece310>

In [115]:
# Function to create nodes in the database
def create_response_nodes(driver:Driver, labels, responses):
    # Create a session using the driver
    with driver.session() as session:
        for i in set(labels):
            session.run("MERGE (:Cluster {id: $id, description: $description})", id=i, description=f"Cluster {i} Description")

        # Create responses and link to clusters
        for response, label in zip(responses, labels):
            session.run(
                """
                CREATE (r:Response {content: $content})
                WITH r
                MATCH (c:Cluster {id: $clusterId})
                CREATE (r)-[:BELONGS_TO]->(c)
                """,
                content=response, clusterId=label
            )

# Define a transaction function to execute a Cypher query
def create_node(tx:Transaction, response, label):
    cypher_query = """
    CREATE (n:Response {content: $content, cluster: $cluster})
    """
    tx.run(cypher_query, content=response, cluster=label)

# Call the function to create nodes
create_response_nodes(driver, labels, responses)

In [116]:
res = driver.execute_query("""
MATCH (n:Response)-[b:BELONGS_TO]->(c:Cluster)
RETURN n,b,c
""")

clusters = {}
for record in res.records:
    cluster = record['c']['id']
    content = record['n']['content']
    if cluster not in clusters:
        clusters[cluster] = []
    clusters[cluster].append(content)
    
clusters

{0: ['I find strength by remembering the stories of our ancestors’ perseverance through tough times.',
  'Sharing stories of survival and heroism inspires me to be resilient.',
  'Keeping a daily routine and setting small, achievable goals help maintain normalcy and resilience.'],
 1: ['Resilience is built through unity and the collective spirit of our community.',
  'Resilience comes from the hope and vision of a peaceful future for our nation.',
  'Organizing community support activities and helping those in need gives me strength.',
  'Through art and creative expression, we can channel our emotions and rebuild our spirit.',
  'Personal resilience is built by facing fears directly and learning to overcome them.',
  'Resilience means adapting to change while holding onto the values that define us.',
  'Building strong networks of communication and support are essential for resilience.',
  'I practice resilience by focusing on solutions rather than dwelling on problems.',
  'Volunteer

In [117]:
from sklearn.feature_extraction.text import CountVectorizer
import numpy as np
def extract_key_terms(data):
    vectorizer = CountVectorizer(stop_words='english')
    for cluster, texts in data.items():
        X = vectorizer.fit_transform(texts)
        terms = vectorizer.get_feature_names_out()
        frequencies = X.toarray().sum(axis=0)
        sorted_indices = np.argsort(frequencies)[::-1]
        print(f"Cluster {cluster} key terms: {terms[sorted_indices][:5]}")  # Top 5 terms

extract_key_terms(clusters)

Cluster 0 key terms: ['stories' 'tough' 'perseverance' 'ancestors' 'daily']
Cluster 1 key terms: ['resilience' 'community' 'spirit' 'support' 'built']
Cluster 2 key terms: ['face' 'staying' 'setbacks' 'resilience' 'recovering']
Cluster 3 key terms: ['helps' 'youth' 'supporting' 'struggles' 'small']
Cluster 4 key terms: ['resilience' 'maintaining' 'wellness' 'traditions' 'stress']
