<a href="https://colab.research.google.com/github/dadashzadeh/Keyword-Suggestion-Tool/blob/main/wikipedia/wikipedia_graph.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!!pip install httpx
!!pip install pyvis
!!pip install networkx



In [3]:
import os
import httpx
import networkx as nx
import pandas as pd
from datetime import datetime
from pyvis.network import Network
from google.colab import files

In [4]:

async def fetch_search_results(query: str) -> list:
    """Fetch search results from Wikipedia API."""
    url = f"https://fa.wikipedia.org/w/api.php?action=opensearch&format=json&formatversion=2&search={query}&namespace=0&limit=10&origin=*"
    async with httpx.AsyncClient() as client:
        response = await client.get(url, follow_redirects=True)

    if response.status_code == 200:
        return response.json()[1]
    else:
        print(f"Error fetching search results: {response.status_code}")
        return []


async def fetch_related_pages(title: str) -> list:
    """Fetch related pages from Wikipedia API."""
    url = f"https://fa.wikipedia.org/api/rest_v1/page/related/{title}"
    async with httpx.AsyncClient() as client:
        response = await client.get(url, follow_redirects=True)

    if response.status_code == 200:
        try:
            related_pages = response.json().get("pages", [])
            return [page['title'] for page in related_pages]
        except ValueError as e:
            print(f"Error parsing JSON for title {title}: {e}")
            return []
    else:
        print(f"Error fetching related pages for {title}: {response.status_code}")
        return []


async def build_graph(query: str, depth: int = 2) -> nx.Graph:
    """Create a graph from the Wikipedia API results."""
    graph = nx.Graph()
    visited = set()

    async def explore_related_pages(page: str, current_depth: int):
        """Recursively explore related pages and build the graph."""
        if current_depth > depth or page in visited:
            return

        visited.add(page)
        graph.add_node(page.replace("_", " "))  # Replace underscores with spaces

        related_pages = await fetch_related_pages(page)
        for related_page in related_pages:
            graph.add_edge(page.replace("_", " "), related_page.replace("_", " "))
            await explore_related_pages(related_page, current_depth + 1)

    search_results = await fetch_search_results(query)
    for page in search_results:
        await explore_related_pages(page, 1)

    return graph


async def generate_interactive_graph(query: str, depth: int = 2):
    """Generate an interactive graph and save it to an HTML file and Excel."""
    graph = await build_graph(query, depth)

    # Generate interactive graph
    net = Network(height='750px', width='100%', bgcolor='#ffffff', font_color='black', notebook=False, cdn_resources="local")
    net.from_nx(graph)
    net.write_html("wikipedia_graph.html")
    print("Interactive graph saved to 'wikipedia_graph.html'.")

    # Prepare data for Excel export
    edges = list(graph.edges())
    nodes = list(graph.nodes())

    edges_df = pd.DataFrame(edges, columns=["Source", "Target"])
    nodes_df = pd.DataFrame({"Node": nodes})

    # Generate a unique file path for saving
    timestamp = datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
    os.makedirs('export', exist_ok=True)
    file_path = f'export/{query}-wikipedia_graph_{timestamp}.xlsx'

    # Save data to Excel with separate sheets for nodes and edges
    with pd.ExcelWriter(file_path) as writer:
        nodes_df.to_excel(writer, sheet_name="Nodes", index=False)
        edges_df.to_excel(writer, sheet_name="Edges", index=False)

    print(f"Graph data saved to '{file_path}'.")
    files.download(file_path)
    files.download("wikipedia_graph.html")

# Example usage:
query = "بهینه‌سازی موتور جستجو"
depth = 2  # Search depth
await generate_interactive_graph(query, depth)


Interactive graph saved to 'wikipedia_graph.html'.
Graph data saved to 'export/بهینه‌سازی موتور جستجو-wikipedia_graph_2024-11-08_16-30-40.xlsx'.


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>