In [24]:
import requests
import json

def generate_question(chunk, model_name="llama3.2"):
    """
    Generate a single question-answer pair from a given text chunk using an Ollama model.
    
    Args:
        chunk (str): The text chunk to generate a QA pair from
        model_name (str): The name of the Ollama model to use (default: "llama3.2")
        
    Returns:
        str: Generated question-answer pair in tuple format
    """
    # Ollama API endpoint (default is localhost on port 11434)
    api_url = "http://localhost:11434/api/generate"
    
    # Create the prompt for question and answer generation
    prompt = f"""
    You are an expert question-answer pair generator. Your task is to create exactly ONE high-quality question and its corresponding answer based on the provided text chunk.

    TEXT CHUNK:
    {chunk}

    INSTRUCTIONS:
    1. Generate ONE question-answer pair that captures key information from the text chunk.
    2. The answer must be comprehensive, accurate, and fully supported by the text.
    3. The answer should be 1 sentence long - informative but concise.
    4. The question should belong to one of these categories:
       - Basic factual questions (e.g., "When was Carnegie Mellon University founded?")
       - Document-enhanced questions that benefit from specific textual context (e.g., "What is the name of the annual pickle festival held in Pittsburgh?")
       - Specific information questions that require exact details from the text (e.g., "When was the Pittsburgh Soul Food Festival established?")
       - Time-sensitive questions related to events, schedules, or dates (e.g., "Who is performing at City Theatre on May 15th?")
    5. Vary the question types across different chunks to ensure diversity.
    
    OUTPUT FORMAT:
    Return ONLY a Python tuple with proper formatting: ("Question text", "Answer text")
    - Include proper quotes and parentheses
    - No explanations, comments, or additional text
    - No numbering or prefixes
    
    Generate a question-answer pair now:
    """
    
    # Prepare the request payload
    payload = {
        "model": model_name,
        "prompt": prompt,
        "stream": False
    }
    
    try:
        # Make the API request to Ollama
        response = requests.post(api_url, json=payload)
        response.raise_for_status()  # Raise exception for HTTP errors
        
        # Parse the response
        result = response.json()
        return result["response"].strip()
    
    except requests.exceptions.RequestException as e:
        print(f"Error communicating with Ollama: {e}")
        return None
    except (KeyError, json.JSONDecodeError) as e:
        print(f"Error processing Ollama response: {e}")
        return None

In [25]:
chunk = "The Los Angeles Dodgers won the World Series in 2020. The games were played in Arlington, Texas, at Globe Life Field due to the COVID-19 pandemic."

questions = generate_question(chunk, model_name="gemma3:12b")
print(questions)

("Where did the Los Angeles Dodgers play the World Series in 2020?", "The Los Angeles Dodgers played the World Series in 2020 in Arlington, Texas, at Globe Life Field due to the COVID-19 pandemic.")


In [14]:
import re
tuple_match = re.search(r'\(\".*?\", \".*?\"\)', questions)
tuple_str = tuple_match.group(0)
tuple_str
eval(tuple_str)

('What year did the Los Angeles Dodgers win the World Series?',
 'The Los Angeles Dodgers won the World Series in 2020.')

In [68]:
# questions.strip().split("\n")

In [21]:
## read csv file
import pandas as pd
from tqdm import tqdm
df = pd.read_csv("all_combined.csv", lineterminator='\n')
print(len(df["text"]))



12370


In [30]:
def generate_questions_for_dataset(df, model_name="llama3.3:latest", max_attempts=5, save_path="all_combined.csv"):
    """
    Generate questions for each text in the dataframe using the specified model.
    
    Args:
        df: DataFrame containing a 'text' column
        model_name: Name of the model to use for question generation
        max_attempts: Maximum number of attempts for each text
        save_path: Path to save the updated dataframe
        
    Returns:
        Updated DataFrame with a new column containing generated questions
    """

    
    # Create a list to store all questions
    all_questions = []
    all_answers = []
    # Create a file to log generation results
    log_file = open(f"{model_name}_question_generation_log.txt", "w")

    # Add tqdm progress bar
    for i in tqdm(range(len(df)), desc=f"Generating questions with {model_name}"):
        # if i % 100 == 0:
        #     print("current row index: ", i)
        attempts = 0
        question = None
        
        while attempts < max_attempts:
            try:
                response = generate_question(df["text"][i], model_name=model_name)
                # print("The text is: ", df["text"][i])
                if model_name == "deepseek-r1":
                    tuple_match = re.search(r'\(\".*?\", \".*?\"\)', response)
                    tuple_str = tuple_match.group(0)
                    question, answer = eval(tuple_str)
                else:
                    question, answer = eval(response)
                if question and answer:  # If we got a valid response
                    # print("-------- ---------")
                    # print(question)
                    # print(answer)
                    # print("-------- ---------")
                    break
                raise Exception("Empty response received")
                
            except Exception as e:
                attempts += 1
                print(f"Attempt {attempts} failed: {str(e)}")
                if attempts == max_attempts:
                    print(f"Failed to generate question after {max_attempts} attempts")
        
        # Add the question to our list (empty string if generation failed)
        if question and answer:
            all_questions.append(question)
            all_answers.append(answer)
            log_file.write(f"{i}\t{question}\t{answer}\n")
        else:
            all_questions.append("")
            all_answers.append("")
            log_file.write(f"{i}\t\n")
    # Close the log file
    log_file.close()

    # After collecting all questions, add them as a new column
    column_name = f"{model_name}_question"
    df[column_name] = all_questions
    column_name = f"{model_name}_answer"
    df[column_name] = all_answers

    # Save the final dataframe once at the end
    df.to_csv(save_path, index=False)
    
    return df

In [31]:
generate_questions_for_dataset(df, model_name="gemma3:12b", save_path="all_combined_gemma3.csv")

Generating questions with gemma3:12b:   0%|          | 57/12370 [00:50<2:41:35,  1.27it/s]

Attempt 1 failed: unterminated string literal (detected at line 1) (<string>, line 1)


Generating questions with gemma3:12b:   0%|          | 58/12370 [00:52<3:34:46,  1.05s/it]

Attempt 1 failed: invalid syntax. Perhaps you forgot a comma? (<string>, line 1)
Attempt 2 failed: invalid syntax. Perhaps you forgot a comma? (<string>, line 1)
Attempt 3 failed: invalid syntax. Perhaps you forgot a comma? (<string>, line 1)
Attempt 4 failed: invalid syntax. Perhaps you forgot a comma? (<string>, line 1)


Generating questions with gemma3:12b:   0%|          | 59/12370 [00:58<8:23:53,  2.46s/it]

Attempt 5 failed: invalid syntax. Perhaps you forgot a comma? (<string>, line 1)
Failed to generate question after 5 attempts


Generating questions with gemma3:12b:   1%|          | 115/12370 [01:49<2:56:57,  1.15it/s]

Attempt 1 failed: invalid syntax. Perhaps you forgot a comma? (<string>, line 1)
Attempt 2 failed: invalid syntax. Perhaps you forgot a comma? (<string>, line 1)
Attempt 3 failed: invalid syntax. Perhaps you forgot a comma? (<string>, line 1)
Attempt 4 failed: invalid syntax. Perhaps you forgot a comma? (<string>, line 1)


Generating questions with gemma3:12b:   2%|▏         | 268/12370 [04:14<3:08:27,  1.07it/s]

Attempt 1 failed: unterminated string literal (detected at line 1) (<string>, line 1)


Generating questions with gemma3:12b:   2%|▏         | 285/12370 [04:32<3:28:48,  1.04s/it]

Attempt 1 failed: invalid syntax. Perhaps you forgot a comma? (<string>, line 1)
Attempt 2 failed: invalid syntax. Perhaps you forgot a comma? (<string>, line 1)
Attempt 3 failed: invalid syntax. Perhaps you forgot a comma? (<string>, line 1)
Attempt 4 failed: invalid syntax. Perhaps you forgot a comma? (<string>, line 1)


Generating questions with gemma3:12b:   2%|▏         | 286/12370 [04:36<6:32:49,  1.95s/it]

Attempt 5 failed: invalid syntax. Perhaps you forgot a comma? (<string>, line 1)
Failed to generate question after 5 attempts


Generating questions with gemma3:12b:   3%|▎         | 349/12370 [05:31<2:36:45,  1.28it/s]

Attempt 1 failed: unterminated string literal (detected at line 1) (<string>, line 1)


Generating questions with gemma3:12b:   3%|▎         | 382/12370 [06:00<2:33:02,  1.31it/s]

Attempt 1 failed: invalid syntax. Perhaps you forgot a comma? (<string>, line 1)
Attempt 2 failed: invalid syntax. Perhaps you forgot a comma? (<string>, line 1)
Attempt 3 failed: invalid syntax. Perhaps you forgot a comma? (<string>, line 1)
Attempt 4 failed: invalid syntax. Perhaps you forgot a comma? (<string>, line 1)


Generating questions with gemma3:12b:   3%|▎         | 383/12370 [06:03<5:36:45,  1.69s/it]

Attempt 5 failed: invalid syntax. Perhaps you forgot a comma? (<string>, line 1)
Failed to generate question after 5 attempts


Generating questions with gemma3:12b:   4%|▍         | 474/12370 [07:17<2:36:45,  1.26it/s]

Error communicating with Ollama: 500 Server Error: Internal Server Error for url: http://localhost:11434/api/generate
Attempt 1 failed: eval() arg 1 must be a string, bytes or code object


Generating questions with gemma3:12b:   4%|▍         | 479/12370 [07:28<4:30:48,  1.37s/it]

Attempt 1 failed: invalid syntax. Perhaps you forgot a comma? (<string>, line 1)


Generating questions with gemma3:12b:   5%|▌         | 624/12370 [09:29<3:12:29,  1.02it/s]

Attempt 1 failed: invalid syntax. Perhaps you forgot a comma? (<string>, line 1)


Generating questions with gemma3:12b:   5%|▌         | 654/12370 [09:57<3:06:56,  1.04it/s]

Attempt 1 failed: unterminated string literal (detected at line 1) (<string>, line 1)


Generating questions with gemma3:12b:   5%|▌         | 664/12370 [10:08<3:03:37,  1.06it/s]

Error communicating with Ollama: 500 Server Error: Internal Server Error for url: http://localhost:11434/api/generate
Attempt 1 failed: eval() arg 1 must be a string, bytes or code object


Generating questions with gemma3:12b:   7%|▋         | 888/12370 [13:04<2:05:20,  1.53it/s] 

Error communicating with Ollama: 500 Server Error: Internal Server Error for url: http://localhost:11434/api/generate
Attempt 1 failed: eval() arg 1 must be a string, bytes or code object


Generating questions with gemma3:12b:   9%|▉         | 1131/12370 [16:26<2:44:21,  1.14it/s]

Error communicating with Ollama: 500 Server Error: Internal Server Error for url: http://localhost:11434/api/generate
Attempt 1 failed: eval() arg 1 must be a string, bytes or code object


Generating questions with gemma3:12b:  11%|█         | 1345/12370 [19:14<2:30:18,  1.22it/s]

Error communicating with Ollama: 500 Server Error: Internal Server Error for url: http://localhost:11434/api/generate
Attempt 1 failed: eval() arg 1 must be a string, bytes or code object


Generating questions with gemma3:12b:  12%|█▏        | 1469/12370 [20:50<2:21:26,  1.28it/s]

Attempt 1 failed: invalid syntax. Perhaps you forgot a comma? (<string>, line 1)
Attempt 2 failed: invalid syntax. Perhaps you forgot a comma? (<string>, line 1)


Generating questions with gemma3:12b:  12%|█▏        | 1488/12370 [21:08<2:20:30,  1.29it/s]

Error communicating with Ollama: 500 Server Error: Internal Server Error for url: http://localhost:11434/api/generate
Attempt 1 failed: eval() arg 1 must be a string, bytes or code object


Generating questions with gemma3:12b:  13%|█▎        | 1644/12370 [23:11<2:05:16,  1.43it/s]

Error communicating with Ollama: 500 Server Error: Internal Server Error for url: http://localhost:11434/api/generate
Attempt 1 failed: eval() arg 1 must be a string, bytes or code object


Generating questions with gemma3:12b:  13%|█▎        | 1656/12370 [23:29<2:25:06,  1.23it/s]

Error communicating with Ollama: 500 Server Error: Internal Server Error for url: http://localhost:11434/api/generate
Attempt 1 failed: eval() arg 1 must be a string, bytes or code object


Generating questions with gemma3:12b:  14%|█▎        | 1699/12370 [24:11<2:17:25,  1.29it/s]

Error communicating with Ollama: 500 Server Error: Internal Server Error for url: http://localhost:11434/api/generate
Attempt 1 failed: eval() arg 1 must be a string, bytes or code object


Generating questions with gemma3:12b:  18%|█▊        | 2166/12370 [29:34<2:26:58,  1.16it/s]

Error communicating with Ollama: 500 Server Error: Internal Server Error for url: http://localhost:11434/api/generate
Attempt 1 failed: eval() arg 1 must be a string, bytes or code object


Generating questions with gemma3:12b:  18%|█▊        | 2260/12370 [30:55<2:14:09,  1.26it/s]

Error communicating with Ollama: 500 Server Error: Internal Server Error for url: http://localhost:11434/api/generate
Attempt 1 failed: eval() arg 1 must be a string, bytes or code object


Generating questions with gemma3:12b:  19%|█▉        | 2335/12370 [32:01<2:23:31,  1.17it/s]

Error communicating with Ollama: 500 Server Error: Internal Server Error for url: http://localhost:11434/api/generate
Attempt 1 failed: eval() arg 1 must be a string, bytes or code object


Generating questions with gemma3:12b:  20%|█▉        | 2437/12370 [33:30<2:04:27,  1.33it/s]

Error communicating with Ollama: 500 Server Error: Internal Server Error for url: http://localhost:11434/api/generate
Attempt 1 failed: eval() arg 1 must be a string, bytes or code object


Generating questions with gemma3:12b:  20%|██        | 2516/12370 [34:39<2:13:52,  1.23it/s]

Error communicating with Ollama: 500 Server Error: Internal Server Error for url: http://localhost:11434/api/generate
Attempt 1 failed: eval() arg 1 must be a string, bytes or code object


Generating questions with gemma3:12b:  23%|██▎       | 2815/12370 [38:32<1:49:19,  1.46it/s]

Error communicating with Ollama: 500 Server Error: Internal Server Error for url: http://localhost:11434/api/generate
Attempt 1 failed: eval() arg 1 must be a string, bytes or code object


Generating questions with gemma3:12b:  25%|██▌       | 3148/12370 [42:56<2:01:14,  1.27it/s]

Error communicating with Ollama: 500 Server Error: Internal Server Error for url: http://localhost:11434/api/generate
Attempt 1 failed: eval() arg 1 must be a string, bytes or code object


Generating questions with gemma3:12b:  26%|██▌       | 3156/12370 [43:09<2:17:27,  1.12it/s]

Error communicating with Ollama: 500 Server Error: Internal Server Error for url: http://localhost:11434/api/generate
Attempt 1 failed: eval() arg 1 must be a string, bytes or code object


Generating questions with gemma3:12b:  26%|██▋       | 3252/12370 [44:30<1:39:15,  1.53it/s]

Error communicating with Ollama: 500 Server Error: Internal Server Error for url: http://localhost:11434/api/generate
Attempt 1 failed: eval() arg 1 must be a string, bytes or code object


Generating questions with gemma3:12b:  28%|██▊       | 3454/12370 [47:14<1:56:28,  1.28it/s]

Error communicating with Ollama: 500 Server Error: Internal Server Error for url: http://localhost:11434/api/generate
Attempt 1 failed: eval() arg 1 must be a string, bytes or code object


Generating questions with gemma3:12b:  29%|██▉       | 3623/12370 [49:34<1:44:52,  1.39it/s]

Error communicating with Ollama: 500 Server Error: Internal Server Error for url: http://localhost:11434/api/generate
Attempt 1 failed: eval() arg 1 must be a string, bytes or code object


Generating questions with gemma3:12b:  31%|███       | 3789/12370 [51:56<1:37:55,  1.46it/s]

Attempt 1 failed: invalid syntax. Perhaps you forgot a comma? (<string>, line 1)
Attempt 2 failed: invalid syntax. Perhaps you forgot a comma? (<string>, line 1)
Attempt 3 failed: invalid syntax. Perhaps you forgot a comma? (<string>, line 1)
Attempt 4 failed: invalid syntax. Perhaps you forgot a comma? (<string>, line 1)


Generating questions with gemma3:12b:  31%|███       | 3790/12370 [51:59<3:20:02,  1.40s/it]

Attempt 5 failed: invalid syntax. Perhaps you forgot a comma? (<string>, line 1)
Failed to generate question after 5 attempts


Generating questions with gemma3:12b:  31%|███       | 3802/12370 [52:09<1:47:39,  1.33it/s]

Attempt 1 failed: invalid syntax. Perhaps you forgot a comma? (<string>, line 1)


Generating questions with gemma3:12b:  31%|███       | 3822/12370 [52:28<2:12:42,  1.07it/s]

Attempt 1 failed: invalid syntax. Perhaps you forgot a comma? (<string>, line 1)
Attempt 2 failed: invalid syntax. Perhaps you forgot a comma? (<string>, line 1)
Attempt 3 failed: invalid syntax. Perhaps you forgot a comma? (<string>, line 1)
Attempt 4 failed: invalid syntax. Perhaps you forgot a comma? (<string>, line 1)


Generating questions with gemma3:12b:  31%|███       | 3858/12370 [53:02<1:51:25,  1.27it/s]

Attempt 1 failed: invalid syntax. Perhaps you forgot a comma? (<string>, line 1)


Generating questions with gemma3:12b:  31%|███       | 3863/12370 [53:07<2:07:22,  1.11it/s]

Error communicating with Ollama: 500 Server Error: Internal Server Error for url: http://localhost:11434/api/generate
Attempt 1 failed: eval() arg 1 must be a string, bytes or code object




Attempt 1 failed: 'str' object is not callable


Generating questions with gemma3:12b:  34%|███▎      | 4161/12370 [56:46<1:36:34,  1.42it/s]

Attempt 1 failed: invalid syntax. Perhaps you forgot a comma? (<string>, line 1)
Attempt 2 failed: invalid syntax. Perhaps you forgot a comma? (<string>, line 1)
Attempt 3 failed: invalid syntax. Perhaps you forgot a comma? (<string>, line 1)
Attempt 4 failed: invalid syntax. Perhaps you forgot a comma? (<string>, line 1)


Generating questions with gemma3:12b:  34%|███▎      | 4162/12370 [56:51<4:20:19,  1.90s/it]

Attempt 5 failed: invalid syntax. Perhaps you forgot a comma? (<string>, line 1)
Failed to generate question after 5 attempts


Generating questions with gemma3:12b:  36%|███▋      | 4490/12370 [1:00:36<1:32:53,  1.41it/s]

Error communicating with Ollama: 500 Server Error: Internal Server Error for url: http://localhost:11434/api/generate
Attempt 1 failed: eval() arg 1 must be a string, bytes or code object


Generating questions with gemma3:12b:  38%|███▊      | 4684/12370 [1:02:59<1:52:53,  1.13it/s]

Attempt 1 failed: invalid syntax. Perhaps you forgot a comma? (<string>, line 1)
Attempt 2 failed: invalid syntax. Perhaps you forgot a comma? (<string>, line 1)
Attempt 3 failed: invalid syntax. Perhaps you forgot a comma? (<string>, line 1)
Attempt 4 failed: invalid syntax. Perhaps you forgot a comma? (<string>, line 1)


Generating questions with gemma3:12b:  38%|███▊      | 4685/12370 [1:03:05<5:01:06,  2.35s/it]

Attempt 5 failed: invalid syntax. Perhaps you forgot a comma? (<string>, line 1)
Failed to generate question after 5 attempts


Generating questions with gemma3:12b:  38%|███▊      | 4695/12370 [1:03:12<1:25:50,  1.49it/s]

Error communicating with Ollama: 500 Server Error: Internal Server Error for url: http://localhost:11434/api/generate
Attempt 1 failed: eval() arg 1 must be a string, bytes or code object


Generating questions with gemma3:12b:  46%|████▌     | 5715/12370 [1:16:01<1:24:36,  1.31it/s]

Error communicating with Ollama: 500 Server Error: Internal Server Error for url: http://localhost:11434/api/generate
Attempt 1 failed: eval() arg 1 must be a string, bytes or code object


Generating questions with gemma3:12b:  50%|█████     | 6210/12370 [1:22:05<1:10:12,  1.46it/s]

Attempt 1 failed: invalid syntax. Perhaps you forgot a comma? (<string>, line 1)
Attempt 2 failed: invalid syntax. Perhaps you forgot a comma? (<string>, line 1)
Attempt 3 failed: invalid syntax. Perhaps you forgot a comma? (<string>, line 1)
Attempt 4 failed: invalid syntax. Perhaps you forgot a comma? (<string>, line 1)


Generating questions with gemma3:12b:  50%|█████     | 6211/12370 [1:22:08<2:43:38,  1.59s/it]

Attempt 5 failed: invalid syntax. Perhaps you forgot a comma? (<string>, line 1)
Failed to generate question after 5 attempts


Generating questions with gemma3:12b:  52%|█████▏    | 6473/12370 [1:25:17<1:11:55,  1.37it/s]

Attempt 1 failed: unterminated string literal (detected at line 1) (<string>, line 1)
Attempt 2 failed: unterminated string literal (detected at line 1) (<string>, line 1)
Attempt 3 failed: unterminated string literal (detected at line 1) (<string>, line 1)
Attempt 4 failed: unterminated string literal (detected at line 1) (<string>, line 1)


Generating questions with gemma3:12b:  52%|█████▏    | 6474/12370 [1:25:22<2:59:59,  1.83s/it]

Attempt 5 failed: unterminated string literal (detected at line 1) (<string>, line 1)
Failed to generate question after 5 attempts


Generating questions with gemma3:12b:  52%|█████▏    | 6486/12370 [1:25:32<1:16:57,  1.27it/s]

Attempt 1 failed: unterminated string literal (detected at line 1) (<string>, line 1)
Attempt 2 failed: unterminated string literal (detected at line 1) (<string>, line 1)
Attempt 3 failed: unterminated string literal (detected at line 1) (<string>, line 1)
Attempt 4 failed: unterminated string literal (detected at line 1) (<string>, line 1)


Generating questions with gemma3:12b:  52%|█████▏    | 6487/12370 [1:25:36<2:49:30,  1.73s/it]

Attempt 5 failed: unterminated string literal (detected at line 1) (<string>, line 1)
Failed to generate question after 5 attempts


Generating questions with gemma3:12b:  54%|█████▎    | 6621/12370 [1:27:40<1:13:53,  1.30it/s]

Attempt 1 failed: unterminated string literal (detected at line 1) (<string>, line 1)
Attempt 2 failed: unterminated string literal (detected at line 1) (<string>, line 1)
Attempt 3 failed: unterminated string literal (detected at line 1) (<string>, line 1)
Attempt 4 failed: unterminated string literal (detected at line 1) (<string>, line 1)


Generating questions with gemma3:12b:  54%|█████▎    | 6622/12370 [1:27:44<2:48:09,  1.76s/it]

Attempt 5 failed: unterminated string literal (detected at line 1) (<string>, line 1)
Failed to generate question after 5 attempts


Generating questions with gemma3:12b:  54%|█████▎    | 6634/12370 [1:27:53<1:15:38,  1.26it/s]

Attempt 1 failed: unterminated string literal (detected at line 1) (<string>, line 1)
Attempt 2 failed: unterminated string literal (detected at line 1) (<string>, line 1)
Attempt 3 failed: unterminated string literal (detected at line 1) (<string>, line 1)
Attempt 4 failed: unterminated string literal (detected at line 1) (<string>, line 1)


Generating questions with gemma3:12b:  54%|█████▎    | 6635/12370 [1:27:58<3:10:23,  1.99s/it]

Attempt 5 failed: unterminated string literal (detected at line 1) (<string>, line 1)
Failed to generate question after 5 attempts


Generating questions with gemma3:12b:  54%|█████▍    | 6660/12370 [1:28:18<1:18:45,  1.21it/s]

Attempt 1 failed: unterminated string literal (detected at line 1) (<string>, line 1)


Generating questions with gemma3:12b:  54%|█████▍    | 6692/12370 [1:28:46<1:10:48,  1.34it/s]

Attempt 1 failed: unterminated string literal (detected at line 1) (<string>, line 1)


Generating questions with gemma3:12b:  54%|█████▍    | 6719/12370 [1:29:08<1:14:07,  1.27it/s]

Error communicating with Ollama: 500 Server Error: Internal Server Error for url: http://localhost:11434/api/generate
Attempt 1 failed: eval() arg 1 must be a string, bytes or code object


Generating questions with gemma3:12b:  56%|█████▋    | 6964/12370 [1:32:38<1:10:58,  1.27it/s]

Error communicating with Ollama: 500 Server Error: Internal Server Error for url: http://localhost:11434/api/generate
Attempt 1 failed: eval() arg 1 must be a string, bytes or code object


Generating questions with gemma3:12b:  57%|█████▋    | 6997/12370 [1:33:14<1:20:48,  1.11it/s]

Attempt 1 failed: unterminated string literal (detected at line 1) (<string>, line 1)
Attempt 2 failed: unterminated string literal (detected at line 1) (<string>, line 1)
Attempt 3 failed: unterminated string literal (detected at line 1) (<string>, line 1)
Attempt 4 failed: unterminated string literal (detected at line 1) (<string>, line 1)


Generating questions with gemma3:12b:  57%|█████▋    | 6998/12370 [1:33:18<2:43:58,  1.83s/it]

Attempt 5 failed: unterminated string literal (detected at line 1) (<string>, line 1)
Failed to generate question after 5 attempts


Generating questions with gemma3:12b:  57%|█████▋    | 7020/12370 [1:33:37<1:17:15,  1.15it/s]

Error communicating with Ollama: 500 Server Error: Internal Server Error for url: http://localhost:11434/api/generate
Attempt 1 failed: eval() arg 1 must be a string, bytes or code object


Generating questions with gemma3:12b:  59%|█████▉    | 7289/12370 [1:37:41<1:15:07,  1.13it/s]

Attempt 1 failed: invalid syntax. Perhaps you forgot a comma? (<string>, line 1)
Attempt 2 failed: invalid syntax. Perhaps you forgot a comma? (<string>, line 1)


Generating questions with gemma3:12b:  64%|██████▎   | 7861/12370 [1:44:43<57:47,  1.30it/s]  

Attempt 1 failed: invalid syntax. Perhaps you forgot a comma? (<string>, line 1)
Attempt 2 failed: invalid syntax. Perhaps you forgot a comma? (<string>, line 1)
Attempt 3 failed: invalid syntax. Perhaps you forgot a comma? (<string>, line 1)
Attempt 4 failed: invalid syntax. Perhaps you forgot a comma? (<string>, line 1)


Generating questions with gemma3:12b:  64%|██████▎   | 7862/12370 [1:44:50<3:14:18,  2.59s/it]

Attempt 5 failed: invalid syntax. Perhaps you forgot a comma? (<string>, line 1)
Failed to generate question after 5 attempts


Generating questions with gemma3:12b:  69%|██████▊   | 8498/12370 [1:52:25<44:51,  1.44it/s]  

Attempt 1 failed: invalid syntax. Perhaps you forgot a comma? (<string>, line 1)
Attempt 2 failed: invalid syntax. Perhaps you forgot a comma? (<string>, line 1)
Attempt 3 failed: unterminated string literal (detected at line 1) (<string>, line 1)




Attempt 1 failed: 'str' object is not callable




Attempt 1 failed: 'str' object is not callable


Generating questions with gemma3:12b:  72%|███████▏  | 8884/12370 [1:57:15<50:54,  1.14it/s]

Attempt 1 failed: invalid syntax. Perhaps you forgot a comma? (<string>, line 1)


Generating questions with gemma3:12b:  73%|███████▎  | 9000/12370 [1:58:42<36:19,  1.55it/s]  

Attempt 1 failed: invalid syntax. Perhaps you forgot a comma? (<string>, line 1)


Generating questions with gemma3:12b:  75%|███████▍  | 9222/12370 [2:01:29<38:37,  1.36it/s]

Attempt 1 failed: invalid syntax. Perhaps you forgot a comma? (<string>, line 1)


Generating questions with gemma3:12b:  75%|███████▍  | 9225/12370 [2:01:31<42:11,  1.24it/s]

Error communicating with Ollama: 500 Server Error: Internal Server Error for url: http://localhost:11434/api/generate
Attempt 1 failed: eval() arg 1 must be a string, bytes or code object


Generating questions with gemma3:12b:  75%|███████▌  | 9279/12370 [2:02:16<41:06,  1.25it/s]  

Error communicating with Ollama: 500 Server Error: Internal Server Error for url: http://localhost:11434/api/generate
Attempt 1 failed: eval() arg 1 must be a string, bytes or code object


Generating questions with gemma3:12b:  76%|███████▌  | 9409/12370 [2:03:57<27:49,  1.77it/s]  

Error communicating with Ollama: 500 Server Error: Internal Server Error for url: http://localhost:11434/api/generate
Attempt 1 failed: eval() arg 1 must be a string, bytes or code object


Generating questions with gemma3:12b:  77%|███████▋  | 9545/12370 [2:05:43<32:16,  1.46it/s]  

Error communicating with Ollama: 500 Server Error: Internal Server Error for url: http://localhost:11434/api/generate
Attempt 1 failed: eval() arg 1 must be a string, bytes or code object


Generating questions with gemma3:12b:  80%|███████▉  | 9840/12370 [2:09:26<31:36,  1.33it/s]  

Attempt 1 failed: invalid syntax. Perhaps you forgot a comma? (<string>, line 1)
Attempt 2 failed: invalid syntax. Perhaps you forgot a comma? (<string>, line 1)
Attempt 3 failed: invalid syntax. Perhaps you forgot a comma? (<string>, line 1)
Attempt 4 failed: invalid syntax. Perhaps you forgot a comma? (<string>, line 1)


Generating questions with gemma3:12b:  80%|███████▉  | 9841/12370 [2:09:30<1:08:52,  1.63s/it]

Attempt 5 failed: invalid syntax. Perhaps you forgot a comma? (<string>, line 1)
Failed to generate question after 5 attempts


Generating questions with gemma3:12b:  82%|████████▏ | 10138/12370 [2:13:12<30:04,  1.24it/s] 

Attempt 1 failed: invalid syntax. Perhaps you forgot a comma? (<string>, line 1)
Attempt 2 failed: invalid syntax. Perhaps you forgot a comma? (<string>, line 1)
Attempt 3 failed: invalid syntax. Perhaps you forgot a comma? (<string>, line 1)
Attempt 4 failed: invalid syntax. Perhaps you forgot a comma? (<string>, line 1)


Generating questions with gemma3:12b:  82%|████████▏ | 10139/12370 [2:13:17<1:09:29,  1.87s/it]

Attempt 5 failed: invalid syntax. Perhaps you forgot a comma? (<string>, line 1)
Failed to generate question after 5 attempts


Generating questions with gemma3:12b:  83%|████████▎ | 10281/12370 [2:15:00<26:41,  1.30it/s]  

Attempt 1 failed: invalid syntax. Perhaps you forgot a comma? (<string>, line 1)


Generating questions with gemma3:12b:  83%|████████▎ | 10294/12370 [2:15:10<25:01,  1.38it/s]

Attempt 1 failed: invalid syntax. Perhaps you forgot a comma? (<string>, line 1)
Attempt 2 failed: invalid syntax. Perhaps you forgot a comma? (<string>, line 1)
Attempt 3 failed: invalid syntax. Perhaps you forgot a comma? (<string>, line 1)
Attempt 4 failed: invalid syntax. Perhaps you forgot a comma? (<string>, line 1)


Generating questions with gemma3:12b:  83%|████████▎ | 10295/12370 [2:15:15<1:06:47,  1.93s/it]

Attempt 5 failed: invalid syntax. Perhaps you forgot a comma? (<string>, line 1)
Failed to generate question after 5 attempts


Generating questions with gemma3:12b:  84%|████████▎ | 10346/12370 [2:15:51<25:35,  1.32it/s]  

Error communicating with Ollama: 500 Server Error: Internal Server Error for url: http://localhost:11434/api/generate
Attempt 1 failed: eval() arg 1 must be a string, bytes or code object


Generating questions with gemma3:12b:  84%|████████▍ | 10405/12370 [2:16:40<21:32,  1.52it/s]  

Error communicating with Ollama: 500 Server Error: Internal Server Error for url: http://localhost:11434/api/generate
Attempt 1 failed: eval() arg 1 must be a string, bytes or code object


Generating questions with gemma3:12b:  87%|████████▋ | 10823/12370 [2:21:57<22:28,  1.15it/s]  

Error communicating with Ollama: 500 Server Error: Internal Server Error for url: http://localhost:11434/api/generate
Attempt 1 failed: eval() arg 1 must be a string, bytes or code object


Generating questions with gemma3:12b:  88%|████████▊ | 10869/12370 [2:22:39<17:34,  1.42it/s]  

Attempt 1 failed: unterminated string literal (detected at line 1) (<string>, line 1)




Attempt 1 failed: 'str' object is not callable




Attempt 2 failed: 'str' object is not callable




Attempt 3 failed: 'str' object is not callable




Attempt 4 failed: 'str' object is not callable


Generating questions with gemma3:12b:  93%|█████████▎| 11487/12370 [2:30:26<23:31,  1.60s/it]

Attempt 5 failed: 'str' object is not callable
Failed to generate question after 5 attempts


Generating questions with gemma3:12b:  93%|█████████▎| 11488/12370 [2:30:26<20:38,  1.40s/it]

Error communicating with Ollama: 500 Server Error: Internal Server Error for url: http://localhost:11434/api/generate
Attempt 1 failed: eval() arg 1 must be a string, bytes or code object


Generating questions with gemma3:12b:  93%|█████████▎| 11509/12370 [2:30:49<11:07,  1.29it/s]

Attempt 1 failed: invalid syntax. Perhaps you forgot a comma? (<string>, line 1)
Attempt 2 failed: invalid syntax. Perhaps you forgot a comma? (<string>, line 1)


Generating questions with gemma3:12b:  94%|█████████▎| 11586/12370 [2:31:50<10:45,  1.21it/s]

Attempt 1 failed: invalid syntax. Perhaps you forgot a comma? (<string>, line 1)
Attempt 2 failed: invalid syntax. Perhaps you forgot a comma? (<string>, line 1)
Attempt 3 failed: invalid syntax. Perhaps you forgot a comma? (<string>, line 1)
Attempt 4 failed: invalid syntax. Perhaps you forgot a comma? (<string>, line 1)


Generating questions with gemma3:12b:  94%|█████████▎| 11587/12370 [2:31:55<27:23,  2.10s/it]

Attempt 5 failed: invalid syntax. Perhaps you forgot a comma? (<string>, line 1)
Failed to generate question after 5 attempts


Generating questions with gemma3:12b:  94%|█████████▍| 11615/12370 [2:32:16<09:39,  1.30it/s]

Attempt 1 failed: invalid syntax. Perhaps you forgot a comma? (<string>, line 1)
Attempt 2 failed: invalid syntax. Perhaps you forgot a comma? (<string>, line 1)


Generating questions with gemma3:12b:  94%|█████████▍| 11627/12370 [2:32:26<08:36,  1.44it/s]

Attempt 1 failed: invalid syntax. Perhaps you forgot a comma? (<string>, line 1)


Generating questions with gemma3:12b:  96%|█████████▌| 11863/12370 [2:35:37<06:30,  1.30it/s]

Error communicating with Ollama: 500 Server Error: Internal Server Error for url: http://localhost:11434/api/generate
Attempt 1 failed: eval() arg 1 must be a string, bytes or code object




Attempt 1 failed: 'str' object is not callable


Generating questions with gemma3:12b:  98%|█████████▊| 12107/12370 [2:39:25<02:28,  1.77it/s]

Attempt 1 failed: unterminated string literal (detected at line 1) (<string>, line 1)
Attempt 2 failed: unterminated string literal (detected at line 1) (<string>, line 1)


Generating questions with gemma3:12b:  99%|█████████▉| 12232/12370 [2:41:06<01:45,  1.30it/s]

Error communicating with Ollama: 500 Server Error: Internal Server Error for url: http://localhost:11434/api/generate
Attempt 1 failed: eval() arg 1 must be a string, bytes or code object


Generating questions with gemma3:12b: 100%|█████████▉| 12323/12370 [2:42:31<00:40,  1.17it/s]

Attempt 1 failed: invalid syntax. Perhaps you forgot a comma? (<string>, line 1)
Attempt 2 failed: invalid syntax. Perhaps you forgot a comma? (<string>, line 1)
Attempt 3 failed: invalid syntax. Perhaps you forgot a comma? (<string>, line 1)
Attempt 4 failed: invalid syntax. Perhaps you forgot a comma? (<string>, line 1)


Generating questions with gemma3:12b: 100%|█████████▉| 12328/12370 [2:42:40<00:50,  1.21s/it]

Error communicating with Ollama: 500 Server Error: Internal Server Error for url: http://localhost:11434/api/generate
Attempt 1 failed: eval() arg 1 must be a string, bytes or code object


Generating questions with gemma3:12b: 100%|██████████| 12370/12370 [2:43:25<00:00,  1.26it/s]


Unnamed: 0,source,text,llama3.2_question_text,llama3.2_answer,gemma3:12b_question,gemma3:12b_answer
0,https://www.heinzhistorycenter.org/event/irish...,Skip Main NavigationSkip to site alert\nHeinz ...,What is the date of the Irish Genealogy Workshop?,"Tuesday, Mar. 4, 2025",What dates will the one-on-one consultation se...,"The individual, one-on-one consultation sessio..."
1,https://www.heinzhistorycenter.org/event/one-o...,Skip Main NavigationSkip to site alert\nHeinz ...,What is a personal consultation with the Ulste...,A 30-minute consultation session where you wil...,What is the cost of a single 30-minute consult...,Each 30-minute consultation session is priced ...
2,https://www.heinzhistorycenter.org/event/histo...,Skip Main NavigationSkip to site alert\nHeinz ...,When does History Uncorked: Mirror Ball take p...,"Friday, Mar. 7, 2025",What is the date and time of the History Uncor...,History Uncorked: Mirror Ball will be held on ...
3,https://www.heinzhistorycenter.org/event/speak...,Skip Main NavigationSkip to site alert\nHeinz ...,What is the title of Jason A. Cherry,Factor of Ambition,When will Jason A. Cherry's lecture on 'Willia...,Jason A. Cherry’s lecture will take place on S...
4,https://www.heinzhistorycenter.org/event/gut-y...,Skip Main NavigationSkip to site alert\nHeinz ...,What does Gut Yontif mean?,Have a good holiday!,What does the Yiddish greeting 'Gut Yontif' me...,"According to the text, 'Gut Yontif' is a Yiddi..."
...,...,...,...,...,...,...
12365,2024_Operating_Budget.pdf,Anticipated 2028 Issuance ($49 million plus 1%...,('What is the City of Pittsburgh Operating Bud...,,What is the amount of debt service for the Cit...,The City of Pittsburgh Operating Budget for Fi...
12366,2024_Operating_Budget.pdf,Pension Funding 318,"(""What is Pension Funding 318?"", ""A report on ...",,What is the title of the text chunk provided?,The text chunk is titled 'Pension Funding 318'.
12367,2024_Operating_Budget.pdf,City of Pittsburgh Operating Budget Fiscal Ye...,"(""What is the Pension Funding for City of Pitt...",,What is the subject of document 319 within the...,Document 319 within the City of Pittsburgh Ope...
12368,2024_Operating_Budget.pdf,City of Pittsburgh Operating Budget Fiscal Ye...,"(""Pension Funding in City of Pittsburgh Operat...",,What is the subject of the City of Pittsburgh'...,The City of Pittsburgh Operating Budget for Fi...


In [34]:
# generate_questions_for_dataset(df, model_name="deepseek-r1", save_path="all_combined_R1.csv")

In [32]:
df = pd.read_csv("all_combined_gemma3.csv", lineterminator='\n')
df.head()

Unnamed: 0,source,text,llama3.2_question_text,llama3.2_answer,gemma3:12b_question,gemma3:12b_answer
0,https://www.heinzhistorycenter.org/event/irish...,Skip Main NavigationSkip to site alert\nHeinz ...,What is the date of the Irish Genealogy Workshop?,"Tuesday, Mar. 4, 2025",What dates will the one-on-one consultation se...,"The individual, one-on-one consultation sessio..."
1,https://www.heinzhistorycenter.org/event/one-o...,Skip Main NavigationSkip to site alert\nHeinz ...,What is a personal consultation with the Ulste...,A 30-minute consultation session where you wil...,What is the cost of a single 30-minute consult...,Each 30-minute consultation session is priced ...
2,https://www.heinzhistorycenter.org/event/histo...,Skip Main NavigationSkip to site alert\nHeinz ...,When does History Uncorked: Mirror Ball take p...,"Friday, Mar. 7, 2025",What is the date and time of the History Uncor...,History Uncorked: Mirror Ball will be held on ...
3,https://www.heinzhistorycenter.org/event/speak...,Skip Main NavigationSkip to site alert\nHeinz ...,What is the title of Jason A. Cherry,Factor of Ambition,When will Jason A. Cherry's lecture on 'Willia...,Jason A. Cherry’s lecture will take place on S...
4,https://www.heinzhistorycenter.org/event/gut-y...,Skip Main NavigationSkip to site alert\nHeinz ...,What does Gut Yontif mean?,Have a good holiday!,What does the Yiddish greeting 'Gut Yontif' me...,"According to the text, 'Gut Yontif' is a Yiddi..."


In [33]:
# Filter out rows with NaN values in gemma3:12b_question or gemma3:12b_answer columns
df_filtered = df.dropna(subset=['gemma3:12b_question', 'gemma3:12b_answer'])

# Display the filtered dataframe
print(f"Original dataframe shape: {df.shape}")
print(f"Filtered dataframe shape: {df_filtered.shape}")
df_filtered.head()


Original dataframe shape: (12370, 6)
Filtered dataframe shape: (12352, 6)


Unnamed: 0,source,text,llama3.2_question_text,llama3.2_answer,gemma3:12b_question,gemma3:12b_answer
0,https://www.heinzhistorycenter.org/event/irish...,Skip Main NavigationSkip to site alert\nHeinz ...,What is the date of the Irish Genealogy Workshop?,"Tuesday, Mar. 4, 2025",What dates will the one-on-one consultation se...,"The individual, one-on-one consultation sessio..."
1,https://www.heinzhistorycenter.org/event/one-o...,Skip Main NavigationSkip to site alert\nHeinz ...,What is a personal consultation with the Ulste...,A 30-minute consultation session where you wil...,What is the cost of a single 30-minute consult...,Each 30-minute consultation session is priced ...
2,https://www.heinzhistorycenter.org/event/histo...,Skip Main NavigationSkip to site alert\nHeinz ...,When does History Uncorked: Mirror Ball take p...,"Friday, Mar. 7, 2025",What is the date and time of the History Uncor...,History Uncorked: Mirror Ball will be held on ...
3,https://www.heinzhistorycenter.org/event/speak...,Skip Main NavigationSkip to site alert\nHeinz ...,What is the title of Jason A. Cherry,Factor of Ambition,When will Jason A. Cherry's lecture on 'Willia...,Jason A. Cherry’s lecture will take place on S...
4,https://www.heinzhistorycenter.org/event/gut-y...,Skip Main NavigationSkip to site alert\nHeinz ...,What does Gut Yontif mean?,Have a good holiday!,What does the Yiddish greeting 'Gut Yontif' me...,"According to the text, 'Gut Yontif' is a Yiddi..."
