In [0]:
import pandas as pd 
df1 = pd.read_csv('df_featurized_us_french_no_entities.csv')

In [0]:
# Load both CSVs
csv1 = pd.read_csv("gemini_results_master.csv")
#csv2 = pd.read_csv("gemini_results_master2.csv")

# Concatenate them row-wise
combined_df = pd.concat([csv1], ignore_index=True)

# Optional: then merge with another df (if needed)
df = pd.merge(df1, combined_df, on="url", how="inner")

In [0]:
df.shape

In [0]:
df.columns

In [0]:
df.relevance_text.value_counts()
df = df.loc[df.relevance_text == 1]

In [0]:
df["entity_relationships"].apply(type).value_counts()

In [0]:
# import ast

# def parse_relationships(x):
#     if isinstance(x, str) and x.strip().startswith("["):
#         try:
#             parsed = ast.literal_eval(x)
#             return parsed if isinstance(parsed, list) else []
#         except Exception:
#             return []
#     return []

# df["entity_relationships"] = df["entity_relationships"].apply(parse_relationships)

import ast

def parse(val, expected_type):
    if isinstance(val, str) and val.strip().startswith(("[" if expected_type == list else "{")):
        try:
            parsed = ast.literal_eval(val)
            return parsed if isinstance(parsed, expected_type) else expected_type()
        except Exception:
            return expected_type()
    return val if isinstance(val, expected_type) else expected_type()

# Parse both columns at once
df["entity_relationships"] = df["entity_relationships"].apply(lambda x: parse(x, list))
df["extracted_entities"] = df["extracted_entities"].apply(lambda x: parse(x, dict))

In [0]:
df

In [0]:
df_graph = df.loc[
    df["entity_relationships"].apply(lambda x: isinstance(x, list) and len(x) > 0) &
    df["extracted_entities"].apply(lambda x: isinstance(x, dict) and len(x) > 0),
    ["url", "date", "entity_relationships", "extracted_entities"]
].copy()

In [0]:
df_graph.shape

In [0]:
df_graph = df_graph.dropna(subset=["entity_relationships", "extracted_entities"])
df_graph = df_graph.drop_duplicates(subset=["url", "date"])
df_graph.shape

In [0]:

records = []

for _, row in df_graph.iterrows():
    url = row["url"]
    date = row["date"]
    relationships = row["entity_relationships"]

    if isinstance(relationships, list):
        for rel in relationships:
            if all(k in rel for k in ["source", "target", "relationship", "sentiment"]):
                records.append({
                    "source": rel["source"],
                    "target": rel["target"],
                    "relationship": rel["relationship"],
                    "sentiment": rel["sentiment"],
                    "url": url,
                    "date": date
                })

edges_df = pd.DataFrame(records)
print(f"✅ Created edges_df with {len(edges_df)} rows.")


In [0]:

print(edges_df["relationship"].value_counts(normalize = True))
print(edges_df["sentiment"].value_counts(normalize = True))

In [0]:
ents = []

for _, row in df_graph.iterrows():
    url = row["url"]
    date = row["date"]
    entities = row["extracted_entities"]

    if not isinstance(entities, dict) or not entities:
        continue

    for ent_type, names in entities.items():
        if not isinstance(names, list):
            continue
        for name in names:
            ents.append({
                "url": url,
                "date": date,
                "entity": name,
                "entity_type": ent_type
            })

ents_long = pd.DataFrame(ents)
print(f"✅ Created ents_long with {len(ents_long)} rows.")

In [0]:
ents_long.columns

#graph 

In [0]:
import pandas as pd
import networkx as nx
import plotly.graph_objects as go
import community.community_louvain as community_louvain         # pip install python-louvain
import numpy as np
from scipy.spatial import ConvexHull            # pip install scipy

# ─── 1. ASSUME edges_df & ents_long EXIST ───────────────────────────────────────
# edges_df columns: source, target, url, date (datetime), sentiment, source_type
# ents_long columns: url, date, entity, entity_type

# If your date column is still string, uncomment:
# edges_df['date'] = pd.to_datetime(edges_df['date'])

# ─── 2. BUILD WEIGHTED GRAPH FOR METRICS & CLUSTERING ────────────────────────────
# We'll ignore direction & sentiment here—just weight by co‐occurrence counts.
ew = edges_df.groupby(['source','target']).size().reset_index(name='weight')
G = nx.Graph()
for _,r in ew.iterrows():
    G.add_edge(r.source, r.target, weight=r.weight)

# ─── 3. COMPUTE CENTRALITIES & COMMUNITIES ──────────────────────────────────────
deg_cent  = nx.degree_centrality(G)
pr        = nx.pagerank(G)
betw_cent = nx.betweenness_centrality(G, normalized=True)
partition = community_louvain.best_partition(G)

# ─── 4. LAYOUT & HULLS ───────────────────────────────────────────────────────────
pos = nx.spring_layout(G, k=0.15, iterations=20)

# Build convex‑hull shapes for each community
hull_shapes = []
for comm in set(partition.values()):
    members = [n for n,p in partition.items() if p==comm]
    if len(members) < 3: 
        continue
    pts = np.array([pos[n] for n in members])
    hull = ConvexHull(pts)
    path = "M " + " L ".join(f"{pts[v,0]},{pts[v,1]}" for v in hull.vertices) + " Z"
    hull_shapes.append(dict(
        type="path", path=path,
        fillcolor="rgba(200,200,200,0.15)", line=dict(width=0)
    ))

# ─── 5. SELECTIVE LABELS ─────────────────────────────────────────────────────────
# Top 10% by degree get fixed labels
deg_vals = np.array(list(deg_cent.values()))
thresh = np.percentile(deg_vals, 90)
label_nodes = {n for n,v in deg_cent.items() if v >= thresh}

# ─── 6. BUILD TRACES ─────────────────────────────────────────────────────────────
# 6A) Edge traces: one per sentiment + an “All”
sentiments = sorted(edges_df['sentiment'].dropna().unique().tolist())
edge_traces = []
color_map = {"positive":"green","negative":"red","neutral":"gray"}

for s in ["All"] + sentiments:
    xs, ys = [], []
    for u,v,d in G.edges(data=True):
        # pick “dominant” sentiment on that edge
        se = edges_df[
            (edges_df.source==u)&(edges_df.target==v)
        ]['sentiment'].mode()
        sent = se.iloc[0] if not se.empty else "neutral"
        if s!="All" and sent!=s:
            continue
        x0,y0 = pos[u]; x1,y1 = pos[v]
        xs += [x0,x1,None]; ys += [y0,y1,None]
    edge_traces.append(
        go.Scatter(
            x=xs, y=ys, mode="lines",
            line=dict(color=color_map.get(s,"#888"), width=1),
            hoverinfo="none",
            visible=(s=="All"),
            name=(s if s!="All" else "All Sentiments")
        )
    )

# 6B) Node traces: one per entity type + an “All”
types = sorted(ents_long['entity_type'].unique().tolist())
type_colors = {"people":"blue","organizations":"orange","locations":"green","unknown":"gray"}

node_traces = []
for t in ["All"] + types:
    xs, ys, txt, col, cdata = [], [], [], [], []
    for n in G.nodes():
        et = ents_long.loc[ents_long.entity==n, "entity_type"]
        et0 = et.iloc[0] if not et.empty else "unknown"
        if t!="All" and et0!=t: 
            continue
        x,y = pos[n]
        xs.append(x); ys.append(y)
        txt.append(n if n in label_nodes else "")
        col.append(type_colors.get(et0,"gray"))
        cdata.append([et0, deg_cent[n], pr[n]])
    node_traces.append(
        go.Scatter(
            x=xs, y=ys, mode="markers+text",
            text=txt, textposition="top center",
            marker=dict(size=10, color=col),
            customdata=cdata,
            hovertemplate=(
                "<b>%{text}</b><br>"
                "Type: %{customdata[0]}<br>"
                "Degree: %{customdata[1]:.3f}<br>"
                "PageRank: %{customdata[2]:.4f}<extra></extra>"
            ),
            visible=(t=="All"),
            name=(t if t!="All" else "All Types")
        )
    )

# ─── 7. ASSEMBLE FIGURE ──────────────────────────────────────────────────────────
fig = go.Figure(
    data=edge_traces + node_traces,
    layout=go.Layout(
        title="Knowledge Graph Explorer",
        shapes=hull_shapes,
        hovermode="closest",
        legend=dict(itemsizing="constant"),
        margin=dict(l=20,r=20,b=20,t=40)
    )
)

# ─── 8. UPDATEMENUS FOR FILTERS ─────────────────────────────────────────────────
# Sentiment dropdown (single-select)
fig.update_layout(
    updatemenus=[
        dict(
            buttons=[
                dict(label="All Sentiments",
                     method="update",
                     args=[{"visible": [True]*len(edge_traces) + [True]*len(node_traces)},
                           {"title":"All Sentiments"}]),
                *[
                    dict(label=s.title(),
                         method="update",
                         args=[{"visible":[i==(idx+1) for i in range(len(edge_traces))] + [True]*len(node_traces)},
                               {"title":f"Sentiment: {s.title()}"}])
                    for idx,s in enumerate(sentiments)
                ]
            ],
            direction="down", x=0, y=1.15, showactive=True
        ),
        # Entity‑type dropdown
        dict(
            buttons=[
                dict(label="All Types",
                     method="update",
                     args=[{"visible":[True]*len(edge_traces) + [True]*len(node_traces)},
                           {"title":"All Entity Types"}]),
                *[
                    dict(label=t.title(),
                         method="update",
                         args=[{"visible":[True]*len(edge_traces) + 
                                    [i==(idx+1) for i in range(len(node_traces))]},
                               {"title":f"Entity Type: {t.title()}"}])
                    for idx,t in enumerate(types)
                ]
            ],
            direction="down", x=0.25, y=1.15, showactive=True
        )
    ]
)

fig.show()

# ─── 9. PRINT TOP 5 BROKERS BY BETWEENNESS ───────────────────────────────────────
metrics = pd.DataFrame({
    "entity":       list(deg_cent.keys()),
    "degree":       list(deg_cent.values()),
    "pagerank":     [pr[n] for n in G.nodes()],
    "betweenness":  [betw_cent[n] for n in G.nodes()]
})
print("\nTop 5 brokers by betweenness:")
display(metrics.sort_values("betweenness", ascending=False).head(5))

In [0]:
import pandas as pd
import networkx as nx
import plotly.graph_objects as go
import community.community_louvain as community_louvain
import numpy as np
from scipy.spatial import ConvexHull

# --- 1. Prepare data ---
ew = edges_df.groupby(['source', 'target']).size().reset_index(name='weight')
G = nx.Graph()
for _, r in ew.iterrows():
    G.add_edge(r.source, r.target, weight=r.weight)

# Optional: Filter nodes with low degree
G = G.subgraph([n for n, d in G.degree() if d > 2]).copy()

# --- 2. Compute graph metrics ---
deg_cent = nx.degree_centrality(G)
pr = nx.pagerank(G)
betw_cent = nx.betweenness_centrality(G, normalized=True)
partition = community_louvain.best_partition(G)

# --- 3. Layout and community hulls ---
pos = nx.spring_layout(G, k=0.4, iterations=50)

hull_shapes = []
for comm in set(partition.values()):
    members = [n for n, p in partition.items() if p == comm]
    if len(members) < 3:
        continue
    pts = np.array([pos[n] for n in members])
    hull = ConvexHull(pts)
    path = "M " + " L ".join(f"{pts[v,0]},{pts[v,1]}" for v in hull.vertices) + " Z"
    hull_shapes.append(dict(
        type="path", path=path,
        fillcolor="rgba(200,200,200,0.15)", line=dict(width=0)
    ))

# --- 4. Label only most important nodes ---
thresh = np.percentile(list(deg_cent.values()), 97)
label_nodes = {n for n, v in deg_cent.items() if v >= thresh}

# --- 5A. Edge traces ---
sentiments = sorted(edges_df['sentiment'].dropna().unique().tolist())
edge_traces = []
color_map = {"positive": "green", "negative": "red", "neutral": "gray"}

for s in ["All"] + sentiments:
    xs, ys = [], []
    for u, v, d in G.edges(data=True):
        se = edges_df[
            (edges_df.source == u) & (edges_df.target == v)
        ]['sentiment'].mode()
        sent = se.iloc[0] if not se.empty else "neutral"
        if s != "All" and sent != s:
            continue
        x0, y0 = pos[u]; x1, y1 = pos[v]
        xs += [x0, x1, None]; ys += [y0, y1, None]
    edge_traces.append(
        go.Scatter(
            x=xs, y=ys, mode="lines",
            line=dict(color=color_map.get(s, "#888"), width=1),
            hoverinfo="none",
            visible=(s == "All"),
            name=(s if s != "All" else "All Sentiments")
        )
    )

# --- 5B. Node traces ---
types = sorted(ents_long['entity_type'].unique().tolist())
type_colors = {"people": "blue", "organizations": "orange", "locations": "green", "unknown": "gray"}

node_traces = []
for t in ["All"] + types:
    xs, ys, txt, col, cdata = [], [], [], [], []
    for n in G.nodes():
        et = ents_long.loc[ents_long.entity == n, "entity_type"]
        et0 = et.iloc[0] if not et.empty else "unknown"
        if t != "All" and et0 != t:
            continue
        x, y = pos[n]
        xs.append(x); ys.append(y)
        txt.append(n if n in label_nodes else "")
        col.append(type_colors.get(et0, "gray"))
        cdata.append([et0, deg_cent[n], pr[n]])
    node_traces.append(
        go.Scatter(
            x=xs, y=ys, mode="markers+text",
            text=txt, textposition="top center",
            marker=dict(size=6, color=col),
            customdata=cdata,
            hovertemplate=(
                "<b>%{text}</b><br>"
                "Type: %{customdata[0]}<br>"
                "Degree: %{customdata[1]:.3f}<br>"
                "PageRank: %{customdata[2]:.4f}<extra></extra>"
            ),
            visible=(t == "All"),
            name=(t if t != "All" else "All Types")
        )
    )

# --- 6. Assemble and show figure ---
fig = go.Figure(
    data=edge_traces + node_traces,
    layout=go.Layout(
        title="Knowledge Graph Explorer",
        shapes=hull_shapes,
        hovermode="closest",
        legend=dict(itemsizing="constant"),
        margin=dict(l=20, r=20, b=20, t=40)
    )
)

fig.update_layout(
    updatemenus=[
        dict(
            buttons=[
                dict(label="All Sentiments",
                     method="update",
                     args=[{"visible": [True]*len(edge_traces) + [True]*len(node_traces)},
                           {"title": "All Sentiments"}]),
                *[
                    dict(label=s.title(),
                         method="update",
                         args=[{"visible": [i == (idx + 1) for i in range(len(edge_traces))] + [True]*len(node_traces)},
                               {"title": f"Sentiment: {s.title()}"}])
                    for idx, s in enumerate(sentiments)
                ]
            ],
            direction="down", x=0, y=1.15, showactive=True
        ),
        dict(
            buttons=[
                dict(label="All Types",
                     method="update",
                     args=[{"visible": [True]*len(edge_traces) + [True]*len(node_traces)},
                           {"title": "All Entity Types"}]),
                *[
                    dict(label=t.title(),
                         method="update",
                         args=[{"visible": [True]*len(edge_traces) + 
                                    [i == (idx + 1) for i in range(len(node_traces))]},
                               {"title": f"Entity Type: {t.title()}"}])
                    for idx, t in enumerate(types)
                ]
            ],
            direction="down", x=0.25, y=1.15, showactive=True
        )
    ]
)

fig.show()

# --- 7. Print top brokers ---
metrics = pd.DataFrame({
    "entity": list(deg_cent.keys()),
    "degree": list(deg_cent.values()),
    "pagerank": [pr[n] for n in G.nodes()],
    "betweenness": [betw_cent[n] for n in G.nodes()]
})
print("\nTop 5 brokers by betweenness:")
display(metrics.sort_values("betweenness", ascending=False).head(5))

## cleaning the edges_df 
dealing with entity resolutions, duplicates, conflicting sentiment and relationships
- using langchian to explain the conflicts and resolve entities 

In [0]:
import pandas as pd
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
import re

def normalize_title(name):
    return re.sub(r"\b(Mr|Ms|Mrs|Dr|Prof|President|Chancellor|Prime Minister)\.?\s+", "", name).strip()


# 1. Extract all unique entity names
unique_entities = pd.Series(pd.concat([edges_df["source"], edges_df["target"], ents_long["entity"]])).dropna().unique().tolist()
unique_entities = [normalize_title(n) for n in unique_entities]

# 2. Load model & generate embeddings
#model = SentenceTransformer("all-MiniLM-L6-v2")
model = SentenceTransformer("all-roberta-large-v1")
embeddings = model.encode(unique_entities, convert_to_tensor=True)

# 3. Compute cosine similarity
cos_sim = cosine_similarity(embeddings)

# 4. Group similar entities (threshold ~0.88 is good for names)
threshold = 0.88
visited = set()
clusters = {}

for i, name in enumerate(unique_entities):
    if name in visited:
        continue
    cluster = [name]
    for j in range(i + 1, len(unique_entities)):
        if cos_sim[i][j] > threshold:
            cluster.append(unique_entities[j])
            visited.add(unique_entities[j])
    clusters[name] = cluster
    visited.update(cluster)

manual_resolutions = {
    "Zelenskyy": "Volodymyr Zelensky",
    "Vladimir Zelensky": "Volodymyr Zelensky",
    "Donald J. Trump": "Donald Trump",
    "Donald J Trump": "Donald Trump",
    "Trump": "Donald Trump",
    "Mr. Trump": "Donald Trump",
    "Biden": "Joe Biden",
    "Musk": "Elon Musk",
    "House Republicans": "Republican Party",
    "Republicans": "Republican Party",
    "House Democrats": "Democratic Party",
    "Democrats": "Democratic Party",

    # Kennedy corrections
    "Robert F. Kennedy Jr.": "Robert F. Kennedy Jr.",
    "Robert F Kennedy Jr": "Robert F. Kennedy Jr.",
    "Robert Kennedy Jr": "Robert F. Kennedy Jr.",
    "Robert F. Kennedy, Jr.": "Robert F. Kennedy Jr.",
    "Robert F. Kennedy": "Robert F. Kennedy",  # keep Sr separate

    "John F. Kennedy": "John F. Kennedy",
    "John F Kennedy": "John F. Kennedy",
}

# 5. Create resolution map
resolution_map = {alias: canon for canon, aliases in clusters.items() for alias in aliases}
resolution_map.update(manual_resolutions)
resolution_map


In [0]:
import pandas as pd
from collections import defaultdict

# Step 1: Invert the resolution map to get clusters
inverse_clusters = defaultdict(list)
for alias, canonical in resolution_map.items():
    inverse_clusters[canonical].append(alias)

# Step 2: Convert to DataFrame
cluster_df = pd.DataFrame([
    {"canonical": canon, "aliases": aliases, "number_of_names_in_cluster": len(aliases)}
    for canon, aliases in inverse_clusters.items()
])

# Step 3: Filter for clusters with >1 name
cluster_df_filtered = cluster_df[cluster_df["number_of_names_in_cluster"] > 1]
cluster_df_filtered = cluster_df_filtered.sort_values("number_of_names_in_cluster", ascending=False)

# Step 4: Summary table of cluster sizes
summary = cluster_df_filtered["number_of_names_in_cluster"].value_counts().sort_index().reset_index()
summary.columns = ["number_of_names_in_cluster", "number_of_clusters"]

summary.display
cluster_df_filtered.display

In [0]:
summary

In [0]:
cluster_df_filtered.display()

In [0]:
# 6. Apply to edges_df
edges_df["source"] = edges_df["source"].map(lambda x: resolution_map.get(x, x))
edges_df["target"] = edges_df["target"].map(lambda x: resolution_map.get(x, x))

# 7. Apply to ents_long
ents_long["entity"] = ents_long["entity"].map(lambda x: resolution_map.get(x, x))

# 8. Optional: Remove duplicates
edges_df.drop_duplicates(inplace=True)
ents_long.drop_duplicates(subset=["url", "entity"], inplace=True)

# 9. Print resolution summary
print(f"🔁 Resolved {len(unique_entities)} entities into {len(clusters)} canonical names.")

dropped duplicates, na and self loops

In [0]:
edges_df = edges_df[edges_df["source"] != edges_df["target"]]
edges_df.dropna(subset=["source", "target"], inplace=True)

# fuzzy search 
tried that but decided this semantic sentence transformers were better - with manual override for edge cases 
alternative - facebook's blink model

# redo graph 

In [0]:
import pandas as pd
import networkx as nx
import plotly.express as px
import plotly.graph_objects as go
import numpy as np
from community import community_louvain
from scipy.spatial import ConvexHull

# Ensure date column is datetime
edges_df['date'] = pd.to_datetime(edges_df['date'])

# Build graph
ew = edges_df.groupby(['source','target']).size().reset_index(name='weight')
G = nx.Graph()
for _, r in ew.iterrows():
    G.add_edge(r.source, r.target, weight=r.weight)

# Centrality & communities
deg_cent = nx.degree_centrality(G)
pr = nx.pagerank(G)
betw_cent = nx.betweenness_centrality(G, normalized=True)
partition = community_louvain.best_partition(G)
pos = nx.spring_layout(G, k=0.15, iterations=20)

# Convex hulls for clusters
hull_shapes = []
for comm in set(partition.values()):
    members = [n for n, p in partition.items() if p == comm]
    if len(members) < 3:
        continue
    pts = np.array([pos[n] for n in members])
    hull = ConvexHull(pts)
    path = "M " + " L ".join(f"{pts[v,0]},{pts[v,1]}" for v in hull.vertices) + " Z"
    hull_shapes.append(dict(type="path", path=path, fillcolor="rgba(200,200,200,0.15)", line=dict(width=0)))

# Selective node labels
deg_vals = np.array(list(deg_cent.values()))
thresh = np.percentile(deg_vals, 90)
label_nodes = {n for n, v in deg_cent.items() if v >= thresh}

# Build edge traces by relationship
relationship_types = sorted(edges_df['relationship'].dropna().unique())
edge_traces = []
rel_colors = {r: c for r, c in zip(relationship_types, px.colors.qualitative.Bold)}

for rel in relationship_types:
    xs, ys = [], []
    for _, row in edges_df[edges_df['relationship'] == rel].iterrows():
        if row.source in pos and row.target in pos:
            x0, y0 = pos[row.source]
            x1, y1 = pos[row.target]
            xs += [x0, x1, None]
            ys += [y0, y1, None]
    edge_traces.append(go.Scatter(
        x=xs, y=ys, mode="lines",
        line=dict(color=rel_colors.get(rel, "gray"), width=1),
        name=rel,
        hoverinfo="none",
        visible=True
    ))

# Node traces by type
type_colors = {"people": "blue", "institutions": "orange", "locations": "green", "political_events": "purple", "political_parties": "red", "unknown": "gray"}
entity_types = sorted(ents_long['entity_type'].unique())
node_traces = []

for t in entity_types:
    xs, ys, txt, col, cdata = [], [], [], [], []
    for node in G.nodes():
        et = ents_long[ents_long.entity == node]["entity_type"]
        et = et.iloc[0] if not et.empty else "unknown"
        if et != t:
            continue
        x, y = pos[node]
        xs.append(x); ys.append(y)
        txt.append(node if node in label_nodes else "")
        col.append(type_colors.get(et, "gray"))
        cdata.append([et, deg_cent[node], pr[node]])
    node_traces.append(go.Scatter(
        x=xs, y=ys, mode="markers+text",
        text=txt, textposition="top center",
        marker=dict(size=10, color=col),
        customdata=cdata,
        hovertemplate="<b>%{text}</b><br>Type: %{customdata[0]}<br>Degree: %{customdata[1]:.3f}<br>PageRank: %{customdata[2]:.4f}<extra></extra>",
        visible=True,
        name=t
    ))

# Build slider frames by date
dates = sorted(edges_df['date'].dt.date.unique())
frames = []

for d in dates:
    day_edges = edges_df[edges_df['date'].dt.date == d]
    day_trace = []
    for rel in relationship_types:
        xs, ys = [], []
        for _, row in day_edges[day_edges['relationship'] == rel].iterrows():
            if row.source in pos and row.target in pos:
                x0, y0 = pos[row.source]
                x1, y1 = pos[row.target]
                xs += [x0, x1, None]
                ys += [y0, y1, None]
        day_trace.append(go.Scatter(x=xs, y=ys, mode="lines", line=dict(color=rel_colors.get(rel, "gray"), width=1), name=rel, hoverinfo="none"))
    frames.append(go.Frame(data=day_trace + node_traces, name=str(d)))

# Create figure
fig = go.Figure(
    data=edge_traces + node_traces,
    layout=go.Layout(
        title="📅 Knowledge Graph Explorer",
        shapes=hull_shapes,
        hovermode="closest",
        margin=dict(l=20, r=20, t=40, b=20),
        updatemenus=[
            dict(type="buttons", showactive=False, x=0, y=1.15,
                 buttons=[
                     dict(label="▶️ Play", method="animate", args=[None]),
                     dict(label="⏸ Pause", method="animate", args=[[None], {"frame": {"duration": 0}, "mode": "immediate"}])
                 ])
        ],
        sliders=[dict(
            active=0,
            steps=[dict(method="animate", label=str(d), args=[[str(d)], {"frame": {"duration": 0, "redraw": True}, "mode": "immediate"}]) for d in dates],
            x=0.1, y=0, len=0.9
        )]
    ),
    frames=frames
)

fig.show()


In [0]:
import pandas as pd
import networkx as nx
import plotly.graph_objects as go
import numpy as np

# Build weighted graph (assumes edges_df exists with 'source', 'target', 'sentiment', 'relationship')
G = nx.Graph()
for _, row in edges_df.iterrows():
    G.add_edge(row['source'], row['target'], sentiment=row['sentiment'], relationship=row['relationship'], date=row['date'])

# Compute centrality scores
pos = nx.spring_layout(G, k=0.15, iterations=20)
deg_cent = nx.degree_centrality(G)
pr = nx.pagerank(G)

# Normalize node sizes
node_sizes = {n: 10 + 30 * deg_cent[n] for n in G.nodes()}

# Build edge traces
sentiments = sorted(edges_df['sentiment'].dropna().unique().tolist())
edge_traces = []
color_map = {"positive": "green", "negative": "red", "neutral": "gray"}

for s in ["All"] + sentiments:
    xs, ys = [], []
    for u, v, d in G.edges(data=True):
        sent = d.get('sentiment', 'neutral')
        if s != "All" and sent != s:
            continue
        x0, y0 = pos[u]
        x1, y1 = pos[v]
        xs += [x0, x1, None]
        ys += [y0, y1, None]
    edge_traces.append(
        go.Scatter(
            x=xs, y=ys, mode="lines",
            line=dict(color=color_map.get(s, "#888"), width=1),
            hoverinfo="none",
            visible=(s == "All"),
            name=(s if s != "All" else "All Sentiments")
        )
    )

# Create ents_long mapping if not already filtered
entity_types = ents_long.drop_duplicates("entity")[["entity", "entity_type"]].set_index("entity").to_dict()['entity_type']
type_colors = {"people": "blue", "organizations": "orange", "locations": "green", "unknown": "gray"}

# Build node traces
types = sorted(set(entity_types.values()))
node_traces = []
for t in ["All"] + types:
    xs, ys, txt, col, size, cdata = [], [], [], [], [], []
    for n in G.nodes():
        et0 = entity_types.get(n, "unknown")
        if t != "All" and et0 != t:
            continue
        x, y = pos[n]
        xs.append(x)
        ys.append(y)
        txt.append(n)
        col.append(type_colors.get(et0, "gray"))
        size.append(node_sizes[n])
        cdata.append([et0, deg_cent[n], pr[n]])
    node_traces.append(
        go.Scatter(
            x=xs, y=ys, mode="markers+text",
            text=txt, textposition="top center",
            marker=dict(size=size, color=col),
            customdata=cdata,
            hovertemplate=(
                "<b>%{text}</b><br>Type: %{customdata[0]}<br>Degree: %{customdata[1]:.3f}<br>PageRank: %{customdata[2]:.4f}<extra></extra>"
            ),
            visible=(t == "All"),
            name=(t if t != "All" else "All Types")
        )
    )

# Assemble figure
fig = go.Figure(
    data=edge_traces + node_traces,
    layout=go.Layout(
        title="Knowledge Graph Explorer",
        hovermode="closest",
        legend=dict(itemsizing="constant"),
        margin=dict(l=20, r=20, b=20, t=40)
    )
)

# Add dropdown filters
fig.update_layout(
    updatemenus=[
        dict(
            buttons=[
                dict(label="All Sentiments", method="update",
                     args=[{"visible": [True]*len(edge_traces) + [True]*len(node_traces)},
                           {"title": "All Sentiments"}])
            ] + [
                dict(label=s.title(), method="update",
                     args=[{"visible": [i == (idx+1) for i in range(len(edge_traces))] + [True]*len(node_traces)},
                           {"title": f"Sentiment: {s.title()}"}])
                for idx, s in enumerate(sentiments)
            ],
            direction="down", x=0, y=1.15, showactive=True
        ),
        dict(
            buttons=[
                dict(label="All Types", method="update",
                     args=[{"visible": [True]*len(edge_traces) + [True]*len(node_traces)},
                           {"title": "All Entity Types"}])
            ] + [
                dict(label=t.title(), method="update",
                     args=[{"visible": [True]*len(edge_traces) + [i == (idx+1) for i in range(len(node_traces))]},
                           {"title": f"Entity Type: {t.title()}"}])
                for idx, t in enumerate(types)
            ],
            direction="down", x=0.25, y=1.15, showactive=True
        )
    ]
)

fig.show()

In [0]:
import pandas as pd
import networkx as nx
import plotly.graph_objects as go
from sklearn.preprocessing import MinMaxScaler

# --- PREP WORK ---
# Create graph and compute centrality
ew = edges_df.groupby(['source','target']).size().reset_index(name='weight')
G = nx.Graph()
for _,r in ew.iterrows():
    G.add_edge(r.source, r.target, weight=r.weight)

# Metrics and community
deg_cent = nx.degree_centrality(G)
pr = nx.pagerank(G)
pos = nx.spring_layout(G, k=0.15)

# --- NORMALIZE DEGREE CENTRALITY FOR SCALING NODE SIZES ---
scaler = MinMaxScaler(feature_range=(10, 30))
deg_values = scaler.fit_transform(pd.DataFrame(deg_cent.values()))
node_sizes = {n: deg_values[i][0] for i, n in enumerate(deg_cent)}

# --- BUILD FILTERABLE GRAPH ---
sentiments = sorted(edges_df['sentiment'].dropna().unique())
relationships = sorted(edges_df['relationship'].dropna().unique())
types = ents_long['entity_type'].dropna().unique().tolist()

# Edge traces with sentiment and relationship metadata
edge_traces = []
for s in sentiments:
    for rel in relationships:
        xs, ys, texts = [], [], []
        for _, row in edges_df[(edges_df.sentiment==s)&(edges_df.relationship==rel)].iterrows():
            u, v = row['source'], row['target']
            if u not in pos or v not in pos: continue
            x0,y0 = pos[u]; x1,y1 = pos[v]
            xs += [x0,x1,None]; ys += [y0,y1,None]
            texts.append(f"{u} {rel} {v}<br>Sentiment: {s}")
        edge_traces.append(go.Scatter(
            x=xs, y=ys, mode='lines', line=dict(width=1, color='gray'),
            hoverinfo='text', text=texts,
            name=f"{s} / {rel}", visible=(s=='NEUTRAL' and rel==relationships[0])
        ))

# Node traces
color_map = {'people':'gold', 'institutions':'orange', 'locations':'orchid',
             'political_events':'gray', 'political_parties':'lightgray'}
node_traces = []
for t in ["All"] + types:
    xs, ys, txt, col, size, custom = [], [], [], [], [], []
    for n in G.nodes():
        match = ents_long.loc[ents_long.entity==n, 'entity_type']
        et = match.iloc[0] if not match.empty else 'unknown'
        if t!='All' and et!=t: continue
        x,y = pos[n]
        xs.append(x); ys.append(y); txt.append(n)
        col.append(color_map.get(et, 'lightgray'))
        size.append(node_sizes.get(n, 10))
        custom.append(f"{n} ({et})")
    node_traces.append(go.Scatter(
        x=xs, y=ys, mode='markers+text', textposition='top center',
        text=txt, marker=dict(size=size, color=col, line=dict(width=0.5, color='black')),
        hovertext=custom, hoverinfo='text', visible=(t=='All'), name=t
    ))

# --- BUILD FIGURE ---
fig = go.Figure(data=edge_traces + node_traces)

# Sentiment / relationship dropdown
fig.update_layout(updatemenus=[
    dict(buttons=[
        dict(label='All', method='update', args=[
            {'visible': [True]*len(edge_traces) + [True]*len(node_traces)},
            {'title': 'All relationships'}]),
        *[dict(label=f"{s} / {r}", method='update', args=[
            {'visible': [i==idx for i in range(len(edge_traces))] + [True]*len(node_traces)},
            {'title': f"{s} / {r}"}])
          for idx, (s, r) in enumerate([(s,r) for s in sentiments for r in relationships])]
    ], x=0, y=1.15),
    dict(buttons=[
        dict(label='All Types', method='update', args=[
            {'visible': [True]*len(edge_traces) + [i==0 for i in range(len(node_traces))]},
            {'title': 'All Entity Types'}]),
        *[dict(label=t, method='update', args=[
            {'visible': [True]*len(edge_traces) + [i==(idx+1) for i in range(len(node_traces))]},
            {'title': f"Type: {t}"}]) for idx, t in enumerate(types)]
    ], x=0.3, y=1.15)
])

fig.update_layout(
    title="📊 Filterable Political Knowledge Graph",
    margin=dict(l=20,r=20,b=20,t=50),
    hovermode='closest',
    showlegend=True
)

fig.show()

In [0]:
import pandas as pd
import networkx as nx
import plotly.graph_objects as go
from sklearn.preprocessing import MinMaxScaler

# --- PREP WORK ---
ew = edges_df.groupby(['source','target']).size().reset_index(name='weight')
G = nx.Graph()
for _,r in ew.iterrows():
    G.add_edge(r.source, r.target, weight=r.weight)

deg_cent = nx.degree_centrality(G)
pr = nx.pagerank(G)
pos = nx.spring_layout(G, k=0.15)

scaler = MinMaxScaler(feature_range=(10, 30))
deg_values = scaler.fit_transform(pd.DataFrame(deg_cent.values()))
node_sizes = {n: deg_values[i][0] for i, n in enumerate(deg_cent)}

sentiments = sorted(edges_df['sentiment'].dropna().unique())
relationships = sorted(edges_df['relationship'].dropna().unique())
types = ents_long['entity_type'].dropna().unique().tolist()

edge_traces = []
for s in sentiments:
    for rel in relationships:
        xs, ys, texts = [], [], []
        for _, row in edges_df[(edges_df.sentiment==s)&(edges_df.relationship==rel)].iterrows():
            u, v = row['source'], row['target']
            if u not in pos or v not in pos: continue
            x0,y0 = pos[u]; x1,y1 = pos[v]
            xs += [x0,x1,None]; ys += [y0,y1,None]
            texts.append(f"{u} {rel} {v}<br>Sentiment: {s}")
        edge_traces.append(go.Scatter(
            x=xs, y=ys, mode='lines', line=dict(width=1, color='gray'),
            hoverinfo='text', text=texts,
            name=f"{s} / {rel}", visible=(s=='NEUTRAL' and rel==relationships[0])
        ))

color_map = {'people':'gold', 'institutions':'orange', 'locations':'orchid',
             'political_events':'gray', 'political_parties':'lightgray'}
node_traces = []
for t in ["All"] + types:
    xs, ys, txt, col, size, custom = [], [], [], [], [], []
    for n in G.nodes():
        match = ents_long.loc[ents_long.entity==n, 'entity_type']
        et = match.iloc[0] if not match.empty else 'unknown'
        if t!='All' and et!=t: continue
        x,y = pos[n]
        xs.append(x); ys.append(y); txt.append(n)
        col.append(color_map.get(et, 'lightgray'))
        size.append(node_sizes.get(n, 10))
        custom.append(f"{n} ({et})")
    node_traces.append(go.Scatter(
        x=xs, y=ys, mode='markers+text', textposition='top center',
        text=txt, marker=dict(size=size, color=col, line=dict(width=0.5, color='black')),
        hovertext=custom, hoverinfo='text', visible=(t=='All'), name=t
    ))

fig = go.Figure(data=edge_traces + node_traces)

# Separate sentiment and relationship dropdowns
fig.update_layout(updatemenus=[
    dict(
        buttons=[dict(label='All Sentiments', method='update', args=[
            {'visible': [True]*len(edge_traces) + [True]*len(node_traces)},
            {'title': 'All Sentiments'}
        ])] + [
            dict(label=s, method='update', args=[
                {'visible': [s in trace.name for trace in edge_traces] + [True]*len(node_traces)},
                {'title': f"Sentiment: {s}"}
            ]) for s in sentiments
        ],
        direction='down', x=0, y=1.15
    ),
    dict(
        buttons=[dict(label='All Relationships', method='update', args=[
            {'visible': [True]*len(edge_traces) + [True]*len(node_traces)},
            {'title': 'All Relationships'}
        ])] + [
            dict(label=r, method='update', args=[
                {'visible': [r in trace.name for trace in edge_traces] + [True]*len(node_traces)},
                {'title': f"Relationship: {r}"}
            ]) for r in relationships
        ],
        direction='down', x=0.25, y=1.15
    ),
    dict(
        buttons=[
            dict(label='All Types', method='update', args=[
                {'visible': [True]*len(edge_traces) + [i==0 for i in range(len(node_traces))]},
                {'title': 'All Entity Types'}]),
            *[dict(label=t, method='update', args=[
                {'visible': [True]*len(edge_traces) + [i==(idx+1) for i in range(len(node_traces))]},
                {'title': f"Type: {t}"}]) for idx, t in enumerate(types)]
        ],
        direction='down', x=0.5, y=1.15
    )
])

# Search box placeholder
fig.update_layout(
    title="\U0001F4CA Interactive Political Knowledge Graph",
    margin=dict(l=20,r=20,b=20,t=60),
    hovermode='closest',
    showlegend=True,
    annotations=[
        dict(text="Search functionality for a person/node can be implemented in Dash for full interactivity.",
             showarrow=False, x=0.5, y=-0.15, xref='paper', yref='paper', align='center')
    ]
)

fig.show()


In [0]:
import pandas as pd
import networkx as nx
import plotly.graph_objects as go
import numpy as np

# Assume edges_df and ents_long already exist
# edges_df columns: source, target, relationship, sentiment, date, url
# ents_long columns: url, date, entity, entity_type

# ─── Graph Construction ─────────────────────────────────────────
G = nx.Graph()
for _, row in edges_df.iterrows():
    G.add_edge(row['source'], row['target'], 
               relationship=row['relationship'], 
               sentiment=row['sentiment'], 
               date=row['date'])

# ─── Centralities and Layout ───────────────────────────────────
degree_centrality = nx.degree_centrality(G)
pos = nx.spring_layout(G, k=0.15, iterations=50)

# ─── Node Trace Construction ───────────────────────────────────
node_trace = go.Scatter(
    x=[], y=[],
    text=[],
    textposition="top center",
    mode='markers',
    hoverinfo='text',
    marker=dict(size=[], color=[], sizemode='diameter', sizeref=0.05, showscale=False),
)

# Entity type color map
entity_colors = {
    "people": "yellow",
    "institutions": "orange",
    "locations": "violet",
    "political_events": "gray",
    "political_parties": "lightblue",
    "unknown": "lightgray"
}

for node in G.nodes():
    x, y = pos[node]
    node_trace['x'] += tuple([x])
    node_trace['y'] += tuple([y])
    deg = degree_centrality.get(node, 0)
    label = node if deg >= 0.01 else ""  # only label high-centrality nodes
    node_trace['text'] += tuple([node])
    node_type = ents_long.loc[ents_long.entity == node, "entity_type"].values
    color = entity_colors.get(node_type[0], "lightgray") if len(node_type) else "lightgray"
    node_trace['marker']['color'] += tuple([color])
    node_trace['marker']['size'] += tuple([10 + 30 * deg])

# ─── Edge Traces by Sentiment ───────────────────────────────────
sentiments = sorted(edges_df['sentiment'].dropna().unique().tolist())
sentiment_colors = {
    "FRIENDLY": "green",
    "HOSTILE": "red",
    "NEUTRAL": "gray"
}

edge_traces = []
for sentiment in sentiments:
    edge_x = []
    edge_y = []
    for u, v, d in G.edges(data=True):
        if d.get("sentiment") != sentiment:
            continue
        x0, y0 = pos[u]
        x1, y1 = pos[v]
        edge_x += [x0, x1, None]
        edge_y += [y0, y1, None]
    edge_traces.append(go.Scatter(
        x=edge_x, y=edge_y,
        line=dict(width=1, color=sentiment_colors.get(sentiment, "#888")),
        hoverinfo='none',
        mode='lines',
        name=sentiment
    ))

# ─── Dropdown Filters ───────────────────────────────────────────
# Unique relationships
relationships = sorted(edges_df['relationship'].dropna().unique().tolist())
entity_types = sorted(ents_long['entity_type'].dropna().unique().tolist())

updatemenus = [
    dict(
        buttons=[
            dict(label="All Sentiments", method="update",
                 args=[{"visible": [True]*len(edge_traces) + [True]},
                       {"title": "All Sentiments"}])
        ] + [
            dict(label=s, method="update",
                 args=[{"visible": [i==j for i in range(len(edge_traces))] + [True]},
                       {"title": f"Sentiment: {s}"}])
            for j, s in enumerate(sentiments)
        ],
        direction="down",
        showactive=True,
        x=0.1, y=1.1
    ),
    dict(
        buttons=[
            dict(label="All Relationships", method="update",
                 args=[{"visible": [True]*len(edge_traces) + [True]},
                       {"title": "All Relationships"}])
        ] + [
            dict(label=r, method="update",
                 args=[{"visible": [True]*len(edge_traces) + [True]},
                       {"title": f"Relationship: {r}"}])
            for r in relationships
        ],
        direction="down",
        showactive=True,
        x=0.3, y=1.1
    ),
    dict(
        buttons=[
            dict(label="All Types", method="update",
                 args=[{"visible": [True]*len(edge_traces) + [True]},
                       {"title": "All Types"}])
        ] + [
            dict(label=et, method="update",
                 args=[{"visible": [True]*len(edge_traces) + [True]},
                       {"title": f"Entity Type: {et}"}])
            for et in entity_types
        ],
        direction="down",
        showactive=True,
        x=0.5, y=1.1
    )
]

# ─── Final Graph ────────────────────────────────────────────────
fig = go.Figure(data=edge_traces + [node_trace],
                layout=go.Layout(
                    title="Interactive Knowledge Graph",
                    hovermode='closest',
                    updatemenus=updatemenus,
                    showlegend=True,
                    margin=dict(l=20, r=20, t=40, b=20)
                ))

fig.show()

# using Pyvis and ede bundleing and other tehcniques 

In [0]:
from pyvis.network import Network
import networkx as nx
import pandas as pd

# Create the graph
G = nx.Graph()

# Add weighted edges to graph
for _, row in edges_df.iterrows():
    G.add_edge(
        row['source'], 
        row['target'], 
        sentiment=row['sentiment'], 
        relationship=row['relationship'], 
        url=row['url'], 
        date=str(row['date'])  # make sure it's a string
    )

# Degree centrality for sizing
deg_cent = nx.degree_centrality(G)

# Get entity types from ents_long
entity_type_map = ents_long.drop_duplicates('entity').set_index('entity')['entity_type'].to_dict()

# Color map by entity type
type_colors = {
    "people": "#ffd700",  # yellow
    "organizations": "#ff7f0e",  # orange
    "locations": "#9467bd",  # purple
    "political_events": "#7f7f7f",  # gray
    "political_parties": "#2ca02c",  # green
    "institutions": "#1f77b4",  # blue
    "unknown": "#d62728",  # red
}

# Create PyVis network
net = Network(height="750px", width="100%", bgcolor="#222222", font_color="white", notebook=True)
net.barnes_hut()

# Add nodes
for node in G.nodes():
    entity_type = entity_type_map.get(node, "unknown")
    color = type_colors.get(entity_type, "#d3d3d3")
    size = 10 + deg_cent.get(node, 0) * 100  # scale up central nodes
    label = node if size > 20 else ""  # hide labels for small nodes

    net.add_node(
        n_id=node,
        label=label,
        title=f"{node} ({entity_type})",
        color=color,
        size=size
    )

# Add edges with sentiment color
sentiment_colors = {
    "FRIENDLY": "green",
    "HOSTILE": "red",
    "NEUTRAL": "gray"
}

for u, v, d in G.edges(data=True):
    color = sentiment_colors.get(d.get('sentiment', "NEUTRAL"), "gray")
    label = d.get('relationship', '')
    title = f"{u} {label} {v}<br>Sentiment: {d.get('sentiment')}<br>Date: {d.get('date')}"

    net.add_edge(u, v, color=color, title=title, value=1)

# Generate and show the HTML
net.show("knowledge_graph.html")

In [0]:
from pyvis.network import Network
import networkx as nx
import pandas as pd
from IPython.display import IFrame
import ipywidgets as widgets
from IPython.display import display
import os 

# Make sure the target directory exists
os.makedirs('/dbfs/tmp', exist_ok=True)



# Base variables
selected_sentiment = widgets.Dropdown(
    options=["All"] + sorted(edges_df["sentiment"].dropna().unique().tolist()),
    description='Sentiment:'
)

selected_relationship = widgets.Dropdown(
    options=["All"] + sorted(edges_df["relationship"].dropna().unique().tolist()),
    description='Relation:'
)

date_range = widgets.SelectionRangeSlider(
    options=sorted(edges_df["date"].dropna().astype(str).unique().tolist()),
    index=(0, len(edges_df["date"].unique()) - 1),
    description='Date:',
    orientation='horizontal',
    layout={'width': '800px'}
)

# Callback

def generate_filtered_graph(sentiment, relationship, date_range_values):
    filtered_edges = edges_df.copy()
    if sentiment != "All":
        filtered_edges = filtered_edges[filtered_edges["sentiment"] == sentiment]
    if relationship != "All":
        filtered_edges = filtered_edges[filtered_edges["relationship"] == relationship]
    filtered_edges = filtered_edges[
        (filtered_edges["date"] >= date_range_values[0]) &
        (filtered_edges["date"] <= date_range_values[1])
    ]

    G = nx.Graph()
    for _, row in filtered_edges.iterrows():
        G.add_edge(row['source'], row['target'], 
                   sentiment=row['sentiment'], 
                   relationship=row['relationship'], 
                   url=row['url'], 
                   date=str(row['date']))

    deg_cent = nx.degree_centrality(G)
    entity_type_map = ents_long.drop_duplicates('entity').set_index('entity')['entity_type'].to_dict()
    type_colors = {
        "people": "#ffd700",
        "organizations": "#ff7f0e",
        "locations": "#9467bd",
        "political_events": "#7f7f7f",
        "political_parties": "#2ca02c",
        "institutions": "#1f77b4",
        "unknown": "#d62728",
    }
    sentiment_colors = {
        "FRIENDLY": "green",
        "HOSTILE": "red",
        "NEUTRAL": "gray"
    }

    net = Network(
    height="750px",
    width="100%",
    bgcolor="#222222",
    font_color="white",
    notebook=True,
    cdn_resources="in_line"  # or try "remote" if this doesn't work
    )
    net.barnes_hut()

    for node in G.nodes():
        entity_type = entity_type_map.get(node, "unknown")
        color = type_colors.get(entity_type, "#d3d3d3")
        size = 10 + deg_cent.get(node, 0) * 100
        label = node if size > 20 else ""
        net.add_node(
            n_id=node, label=label,
            title=f"{node} ({entity_type})",
            color=color, size=size
        )

    for u, v, d in G.edges(data=True):
        color = sentiment_colors.get(d.get('sentiment', "NEUTRAL"), "gray")
        label = d.get('relationship', '')
        title = f"{u} {label} {v}<br>Sentiment: {d.get('sentiment')}<br>Date: {d.get('date')}"
        net.add_edge(u, v, color=color, title=title, value=1)

    net.save_graph('filtered_kg.html')
    display(IFrame(src='filtered_kg.html', width='100%', height='800px'))

# Link interactivity
ui = widgets.VBox([selected_sentiment, selected_relationship, date_range])
widgets.interact(
    generate_filtered_graph,
    sentiment=selected_sentiment,
    relationship=selected_relationship,
    date_range_values=date_range
)
display(ui)


In [0]:
from IPython.display import IFrame
net.show("filtered_kg.html")
display(IFrame("filtered_kg.html", width="100%", height="800px"))

In [0]:
import networkx as nx
from pyvis.network import Network
from IPython.display import IFrame, display
import ipywidgets as widgets
import os
import pandas as pd

# Safety: Create output dir once
output_dir = "/dbfs/tmp"
os.makedirs(output_dir, exist_ok=True)

# Basic config
type_colors = {
    "people": "#ffd700",  # yellow
    "organizations": "#ff7f0e",
    "locations": "#9467bd",
    "political_events": "#7f7f7f",
    "political_parties": "#2ca02c",
    "institutions": "#1f77b4",
    "unknown": "#d62728"
}

sentiment_colors = {
    "FRIENDLY": "green",
    "HOSTILE": "red",
    "NEUTRAL": "gray"
}

# Entity types map
entity_type_map = ents_long.drop_duplicates('entity').set_index('entity')['entity_type'].to_dict()

def generate_filtered_graph(sentiment, relationship, date_range_values):
    filtered = edges_df.copy()

    if sentiment != "All":
        filtered = filtered[filtered['sentiment'] == sentiment]
    if relationship != "All":
        filtered = filtered[filtered['relationship'] == relationship]
    if date_range_values:
        start, end = date_range_values
        filtered = filtered[(filtered['date'] >= start) & (filtered['date'] <= end)]

    # Build Graph
    G = nx.Graph()
    for _, row in filtered.iterrows():
        G.add_edge(row['source'], row['target'],
                   sentiment=row['sentiment'],
                   relationship=row['relationship'],
                   date=str(row['date']),
                   url=row['url'])

    if len(G.nodes()) == 0:
        print("⚠️ No data matches selected filters.")
        return

    deg_cent = nx.degree_centrality(G)
    net = Network(height="750px", width="100%", bgcolor="#222222", font_color="white", notebook=True, cdn_resources='in_line')
    net.barnes_hut()

    for node in G.nodes():
        size = 10 + deg_cent.get(node, 0) * 100
        label = node if size > 20 else ""
        entity_type = entity_type_map.get(node, "unknown")
        color = type_colors.get(entity_type, "#ccc")
        net.add_node(node, label=label, title=f"{node} ({entity_type})", color=color, size=size)

    for u, v, d in G.edges(data=True):
        color = sentiment_colors.get(d.get("sentiment", "NEUTRAL"), "gray")
        title = f"{u} {d.get('relationship', '')} {v}<br>Sentiment: {d.get('sentiment')}<br>Date: {d.get('date')}"
        net.add_edge(u, v, color=color, title=title)

    # Save and display
    html_file = "/dbfs/tmp/filtered_kg.html"
    net.write_html(html_file)

    display(IFrame(src="/files/tmp/filtered_kg.html", width="100%", height="800px"))

# --- FILTER WIDGETS ---
sentiment_options = ["All"] + sorted(edges_df["sentiment"].dropna().unique())
relationship_options = ["All"] + sorted(edges_df["relationship"].dropna().unique())
date_values = sorted(edges_df["date"].dropna().unique())

sentiment_dropdown = widgets.Dropdown(options=sentiment_options, description="Sentiment:")
relationship_dropdown = widgets.Dropdown(options=relationship_options, description="Relationship:")
date_slider = widgets.SelectionRangeSlider(
    options=date_values,
    index=(0, len(date_values) - 1),
    description="Date Range",
    layout={"width": "800px"}
)

# INTERACTIVE DISPLAY
widgets.interactive_output(
    generate_filtered_graph,
    {
        "sentiment": sentiment_dropdown,
        "relationship": relationship_dropdown,
        "date_range_values": date_slider
    }
)