In [1]:
import sqlite3
import pandas as pd
import networkx

In [2]:
# Connect to the SQLite database
conn = sqlite3.connect('social_network_anonymized.db')

cursor = conn.cursor()
cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
table_name = cursor.fetchall()

print (table_name)

# Read the entire database into a pandas DataFrame
media = pd.read_sql_query(f"SELECT * FROM Media", conn)
profile_activity = pd.read_sql_query(f"SELECT * FROM ProfileActivity", conn)
activity_media = pd.read_sql_query(f"SELECT * FROM ActivityMedia", conn)
profile_connection = pd.read_sql_query(f"SELECT * FROM ProfileConnection", conn)
profiles = pd.read_sql_query(f"SELECT * FROM Profiles", conn)
activity = pd.read_sql_query(f"SELECT * FROM Activity", conn)

# Close the connection
conn.close()

[('Media',), ('ProfileActivity',), ('ActivityMedia',), ('ProfileConnection',), ('Profiles',), ('Activity',)]


In [3]:
def pair_profile_activities(profile_activity_df, profile_df):
    sources = profile_activity_df[profile_activity_df.relationship_type == "source"]
    targets = profile_activity_df[profile_activity_df.relationship_type == "target"]

    sources = pd.merge(
        sources,
        profile_df,
        left_on="profile_id",
        right_on="id"
    )

    targets = pd.merge(
        targets,
        profile_df,
        left_on="profile_id",
        right_on="id"
    )

    merged = pd.merge(
        sources,
        targets,
        on="activity_id",
        suffixes=("_source", "_target")
    )

    return merged

def merge_activities(df, activity_df, left_on):
    return pd.merge(
        df,
        activity_df,
        left_on=left_on,
        right_on="id"
    )

In [4]:
profile_pairs = pair_profile_activities(profile_activity, profiles)
profile_pairs_activity = merge_activities(profile_pairs, activity, left_on="activity_id")

In [5]:
import networkx as nx
import matplotlib.pyplot as plt

G = nx.from_pandas_edgelist(profile_pairs_activity, 
                            source='profile_id_source', 
                            target='profile_id_target')

In [16]:
G.number_of_edges(), G.number_of_nodes()

(18677, 17216)

In [None]:
import graphistry

graphistry.register(api=3, protocol="https", server='hub.graphistry.com', personal_key_id='PFMM2XISJ1', personal_key_secret='VL97ANQ8KN1WJ17Z')

g = graphistry.edges(profile_pairs_activity[:100]) \
    .bind(source="profile_id_source", 
          destination="profile_id_target")

g.plot()