In [12]:
import requests
import json

def generate_questions(chunk, model_name="llama3.2", num_questions=3):
    """
    Generate questions from a given text chunk using an Ollama model.
    
    Args:
        chunk (str): The text chunk to generate questions from
        model_name (str): The name of the Ollama model to use (default: "llama3.2")
        num_questions (int): Number of questions to generate (default: 3)
        
    Returns:
        str: Generated questions
    """
    # Ollama API endpoint (default is localhost on port 11434)
    api_url = "http://localhost:11434/api/generate"
    
    # Create the prompt for question generation
    prompt = f"""
    You are a helpful assistant that generates questions from text.
    
    Generate exactly {num_questions} relevant questions from the following text:
    
    {chunk}
    
    Format requirements:
    1. Output exactly one question per line
    2. Include ONLY the questions themselves
    3. Do not include ANY numbering, bullets, prefixes, or explanatory text
    4. Do not include phrases like "Question:" or "Here are the questions:"
    5. Each line should be a complete, standalone question
    """

    
    # Prepare the request payload
    payload = {
        "model": model_name,
        "prompt": prompt,
        "stream": False
    }
    
    try:
        # Make the API request to Ollama
        response = requests.post(api_url, json=payload)
        response.raise_for_status()
        
        # Parse the response
        result = response.json()
        return result["response"]
    
    except requests.exceptions.RequestException as e:
        print(f"Error communicating with Ollama: {e}")
        return None
    except (KeyError, json.JSONDecodeError) as e:
        print(f"Error processing Ollama response: {e}")
        return None


In [None]:
# chunk = "The Los Angeles Dodgers won the World Series in 2020. The games were played in Arlington, Texas, at Globe Life Field due to the COVID-19 pandemic."

# questions = generate_questions(chunk, model_name="llama3.2", num_questions=1)
# print(questions)

Where were the Los Angeles Dodgers' World Series games played in 2020?


In [25]:
# questions.strip().split("\n")

["Where were the Los Angeles Dodgers' World Series games played in 2020?"]

In [None]:
## read csv file
import pandas as pd

df = pd.read_csv("general_info.csv", lineterminator='\n')
# print(df["text"][0])
for i in range(len(df)):
    question = generate_questions(df["text"][i], model_name="llama3.2", num_questions=1)
    print(question)
    print("-------- ---------")
    break


What is the name of Pittsburgh's principal city in the greater Pittsburgh–Weirton–Steubenville combined statistical area? 
Why did Pittsburgh develop as a vital link between the Atlantic coast and Midwest?
How many bridges does the city of Pittsburgh have?
-------- ---------
