In [45]:
from dotenv import load_dotenv

load_dotenv()

True

In [46]:
responses = [
  "Resilience is built through unity and the collective spirit of our community.",
  "I find strength by remembering the stories of our ancestors’ perseverance through tough times.",
  "Supporting each other in small everyday actions helps build a resilient community.",
  "By staying informed and engaged, I feel empowered to face challenges.",
  "Resilience comes from the hope and vision of a peaceful future for our nation.",
  "Educating our youth about our history and struggles helps foster resilience.",
  "I build resilience by maintaining cultural traditions that remind us of our identity and strength.",
  "Organizing community support activities and helping those in need gives me strength.",
  "Through art and creative expression, we can channel our emotions and rebuild our spirit.",
  "Personal resilience is built by facing fears directly and learning to overcome them.",
  "Sharing stories of survival and heroism inspires me to be resilient.",
  "Resilience means adapting to change while holding onto the values that define us.",
  "Physical wellness and mental health are key to maintaining resilience under stress.",
  "Building strong networks of communication and support are essential for resilience.",
  "I practice resilience by focusing on solutions rather than dwelling on problems.",
  "Volunteering in community defense initiatives has strengthened my resolve and resilience.",
  "Keeping a daily routine and setting small, achievable goals help maintain normalcy and resilience.",
  "Resilience is about recovering from setbacks and being ready to face the next challenge.",
  "Faith and spirituality play a crucial role in my personal resilience.",
  "Continuing to celebrate life’s moments, big and small, reinforces the resilience of our community.",
]

In [47]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.cluster import KMeans


# Convert text data to TF-IDF features
vectorizer = TfidfVectorizer(stop_words='english')
X = vectorizer.fit_transform(responses)

# Clustering
kmeans = KMeans(n_clusters=5)
kmeans.fit(X)
labels = kmeans.labels_

# Now, labels can be used to define relationships in Neo4j
labels


array([0, 1, 1, 4, 4, 4, 3, 0, 0, 2, 1, 4, 3, 4, 4, 0, 4, 4, 2, 0],
      dtype=int32)

In [48]:
import os
from neo4j import GraphDatabase, Driver, Transaction

# Create a driver instance
driver = GraphDatabase.driver(
    os.getenv("NEO4J_URI"),
    auth=(os.getenv("NEO4J_USERNAME"), os.getenv("NEO4J_PASSWORD"))
)
# Optionally, check if you want to verify connectivity at the start
driver.verify_connectivity()

# Clear the database
driver.session().run("MATCH (n) DETACH DELETE n")

<neo4j._sync.work.result.Result at 0x14e208090>

In [49]:
# Function to create nodes in the database
def create_response_nodes(driver:Driver, labels, responses):
    # Create a session using the driver
    with driver.session() as session:
        for i in set(labels):
            session.run("MERGE (:Cluster {id: $id, description: $description})", id=i, description=f"Cluster {i} Description")

        # Create responses and link to clusters
        for response, label in zip(responses, labels):
            session.run(
                """
                CREATE (r:Response {content: $content})
                WITH r
                MATCH (c:Cluster {id: $clusterId})
                CREATE (r)-[:BELONGS_TO]->(c)
                """,
                content=response, clusterId=label
            )

# Define a transaction function to execute a Cypher query
def create_node(tx:Transaction, response, label):
    cypher_query = """
    CREATE (n:Response {content: $content, cluster: $cluster})
    """
    tx.run(cypher_query, content=response, cluster=label)

# Call the function to create nodes
create_response_nodes(driver, labels, responses)

In [63]:
res = driver.execute_query("""
MATCH (n:Response)-[b:BELONGS_TO]->(c:Cluster)
RETURN n,b,c
""")

clusters = {}
for record in res.records:
    cluster = record['c']['id']
    content = record['n']['content']
    if cluster not in clusters:
        clusters[cluster] = []
    clusters[cluster].append(content)
    
clusters

<Node element_id='4:bddd43fd-6dd0-4438-a337-0f607146615f:5' labels=frozenset({'Response'}) properties={'content': 'Resilience is built through unity and the collective spirit of our community.'}>


{0: ['Resilience is built through unity and the collective spirit of our community.',
  'Organizing community support activities and helping those in need gives me strength.',
  'Through art and creative expression, we can channel our emotions and rebuild our spirit.',
  'Volunteering in community defense initiatives has strengthened my resolve and resilience.',
  'Continuing to celebrate life’s moments, big and small, reinforces the resilience of our community.'],
 1: ['I find strength by remembering the stories of our ancestors’ perseverance through tough times.',
  'Supporting each other in small everyday actions helps build a resilient community.',
  'Sharing stories of survival and heroism inspires me to be resilient.'],
 2: ['Personal resilience is built by facing fears directly and learning to overcome them.',
  'Faith and spirituality play a crucial role in my personal resilience.'],
 3: ['I build resilience by maintaining cultural traditions that remind us of our identity and 

In [64]:
from sklearn.feature_extraction.text import CountVectorizer
import numpy as np
def extract_key_terms(data):
    vectorizer = CountVectorizer(stop_words='english')
    for cluster, texts in data.items():
        X = vectorizer.fit_transform(texts)
        terms = vectorizer.get_feature_names_out()
        frequencies = X.toarray().sum(axis=0)
        sorted_indices = np.argsort(frequencies)[::-1]
        print(f"Cluster {cluster} key terms: {terms[sorted_indices][:5]}")  # Top 5 terms

extract_key_terms(clusters)

Cluster 0 key terms: ['community' 'resilience' 'spirit' 'volunteering' 'gives']
Cluster 1 key terms: ['stories' 'resilient' 'tough' 'inspires' 'ancestors']
Cluster 2 key terms: ['resilience' 'personal' 'spirituality' 'role' 'play']
Cluster 3 key terms: ['resilience' 'maintaining' 'wellness' 'traditions' 'stress']
Cluster 4 key terms: ['resilience' 'face' 'youth' 'empowered' 'helps']
