<a href="https://colab.research.google.com/github/ak2742/mlplay/blob/Fine-Tuning/13)_ScrapeGraph_Web_Scraper.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#Install package and load the extension
!pip install colab-xterm
%load_ext colabxterm
%xterm

# #- Install/run Ollama in terminal

# !curl -fsSL https://ollama.com/install.sh | sh
# !ollama serve & ollama pull nomic-embed-text

In [None]:
#@title Install libraries

!pip install -q scrapegraphai
!apt install -q chromium-chromedriver
!pip install -q nest_asyncio
!pip install -q playwright
!pip install -q gradio
!playwright install


import nest_asyncio
nest_asyncio.apply()

In [None]:
#@title Create Graph

from scrapegraphai.graphs import SmartScraperGraph, SearchGraph, SpeechGraph
from scrapegraphai.utils import convert_to_csv, convert_to_json

def run_graph(prompt, config, graph_type, source=None):
    if graph_type == "smart_scraper":
        graph = SmartScraperGraph(
            prompt=prompt,
            source=source,
            config=config
        )
    elif graph_type == "search":
        graph = SearchGraph(
            prompt=prompt,
            config=config
        )
    elif graph_type == "speech":
        graph = SpeechGraph(
            prompt=prompt,
            config=config
        )

    # Run the graph
    result = graph.run()

    result_format(result)

    return result

def result_format(result):
    convert_to_csv(result, "result")
    convert_to_json(result, "result")

In [None]:
#@title Get Config

from google.colab import userdata

GOOGLE_API_KEY = userdata.get('GEMINI_KEY')

def get_openai_config(model_type, openai_key):
    return {
        "llm": {
            "api_key": openai_key,
            "model": model_type,
        },
        "tts_model": {
            "api_key": openai_key,
            "model": "tts-1",
            "voice": "alloy"
        },
        "output_path": "audio_summary.mp3",
    }

def get_ollama_config(model_type, graph_type):
    base_config = {
        "llm": {
            "model": f"ollama/{model_type}",
            "temperature": 0.1,
            "format": "json",  # Ollama needs the format to be specified explicitly
            "base_url": "http://localhost:11434",  # set Ollama URL
        },
        "embeddings": {
            "model": "ollama/nomic-embed-text",
            "base_url": "http://localhost:11434",  # set Ollama URL
        },
        "verbose": True,
    }
    if graph_type == "search":
        base_config["max_results"] = 5
    return base_config

def get_gemini_pro_config(graph_type, gemini_pro_key):
    base_config = {
        "llm": {
            "api_key": gemini_pro_key,
            "model": "gemini-pro",
        },
    }
    if graph_type == "smart_scraper":
        base_config["embeddings"] = {
            "model": "ollama/nomic-embed-text",
            "base_url": "http://localhost:11434",  # set Ollama URL
        }
    elif graph_type == "search":
        base_config.update({
            "temperature": 0.1,
            "streaming": True,
            "max_results": 5,
            "verbose": True,
        })
    return base_config


In [None]:

def main(model, model_type, graph_type, question, source=None):
    print(f"Model: {model}, Model Type: {model_type}, Type: {graph_type}, Question: {question}, Source: {source}")
    try:
        graph_config = None
        if model == "ollama":
            graph_config = get_ollama_config(model_type, graph_type)
        elif model == "OpenAI":
            graph_config = get_openai_config(model_type, openai_key)
        elif model == "gemini-pro":
            graph_config = get_gemini_pro_config(graph_type, GOOGLE_API_KEY)

        print(f"Graph Config: {graph_config}")

        result = run_graph(question, graph_config, graph_type, source)

        return result
    except Exception as e:
        raise e


In [None]:
main("ollama", "llama3", "search", "Who is the president of the United States?")