In [1]:
import sqlite3
import pandas as pd

In [2]:
# Connect to the SQLite database
conn = sqlite3.connect('social_network_anonymized.db')

cursor = conn.cursor()
cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
table_name = cursor.fetchall()

print (table_name)

# Read the entire database into a pandas DataFrame
media = pd.read_sql_query(f"SELECT * FROM Media", conn)
profile_activity = pd.read_sql_query(f"SELECT * FROM ProfileActivity", conn)
activity_media = pd.read_sql_query(f"SELECT * FROM ActivityMedia", conn)
profile_connection = pd.read_sql_query(f"SELECT * FROM ProfileConnection", conn)
profiles = pd.read_sql_query(f"SELECT * FROM Profiles", conn)
activity = pd.read_sql_query(f"SELECT * FROM Activity", conn)

# Close the connection
conn.close()

[('Media',), ('ProfileActivity',), ('ActivityMedia',), ('ProfileConnection',), ('Profiles',), ('Activity',)]


In [3]:
def pair_profile_activities(profile_activity_df, profile_df):
    sources = profile_activity_df[profile_activity_df.relationship_type == "source"]
    targets = profile_activity_df[profile_activity_df.relationship_type == "target"]

    sources = pd.merge(
        sources,
        profile_df,
        left_on="profile_id",
        right_on="id"
    )

    targets = pd.merge(
        targets,
        profile_df,
        left_on="profile_id",
        right_on="id"
    )

    merged = pd.merge(
        sources,
        targets,
        on="activity_id",
        suffixes=("_source", "_target")
    )

    return merged

def merge_activities(df, activity_df, left_on):
    return pd.merge(
        df,
        activity_df,
        left_on=left_on,
        right_on="id"
    )

In [4]:
profile_pairs = pair_profile_activities(profile_activity, profiles)
profile_pairs_activity = merge_activities(profile_pairs, activity, left_on="activity_id")

In [48]:
import networkx as nx
import matplotlib.pyplot as plt

G = nx.from_pandas_edgelist(profile_pairs_activity, 
                            source='name_source', 
                            target='name_target',
                            create_using=nx.DiGraph(),
                            edge_attr=True)

In [98]:
GEMINI_API_KEY = "AIzaSyBFydB_L6Esm9t-Xrbs9KzDPqicW1jBQIU"

In [106]:
from langchain.vectorstores import FAISS

text = []
for source, target, data in G.edges(data=True):
    text += f"{source} connects to {target}"


In [None]:
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains import RetrievalQA
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.text_splitter import RecursiveCharacterTextSplitter

# Step 1: Initialize Hugging Face Embeddings
# Ensure you have the `sentence_transformers` package installed: pip install sentence-transformers
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=100,
    chunk_overlap=20,
    length_function=len
)
documents = text_splitter.split_text(text)

# Step 3: Create a vector store (FAISS) and store embeddings
vectorstore = FAISS.from_texts(documents, embeddings)

# Step 4: Create a retriever from the vector store
retriever = vectorstore.as_retriever()

# Step 5: Initialize an LLM (e.g., OpenAI GPT)
llm = ChatGoogleGenerativeAI(model="gemini-1.5-pro", api_key=GEMINI_API_KEY)

# Step 6: Build the RetrievalQA chain
rag_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,
    verbose=True
)

# Step 7: Query the RAG system
query = "Who is michael?"
response = rag_chain.run(query)

print("Query:", query)
print("Response:", response)


In [None]:
import graphistry

graphistry.register(api=3, protocol="https", server='hub.graphistry.com', personal_key_id='PFMM2XISJ1', personal_key_secret='VL97ANQ8KN1WJ17Z')

g = graphistry.edges(profile_pairs_activity[:10000]) \
    .bind(source="name_source", 
          destination="name_target")

g.plot()