In [19]:
import csv
import json
import requests
import re
from pathlib import Path

In [20]:
# define model, ollama API, input files
model_name = "deepseek-r1:70b" # model name here
ollama_url = "http://localhost:11434/api/generate"

input_files = [
    "relevance_210725_prompts_templ-1.csv",
    "relevance_210725_prompts_templ-2.csv",
    "relevance_210725_prompts_templ-3.csv",
    "relevance_210725_prompts_templ-4.csv",
    "relevance_210725_prompts_templ-5.csv",
]

In [None]:
#new version to (try to) handle concurrent requests

import asyncio
import aiohttp
import csv
import json
import re
from typing import List, Dict, Tuple
import time
from datetime import datetime, timedelta

# Fix for Jupyter notebooks
try:
    import nest_asyncio
    nest_asyncio.apply()
except ImportError:
    print("Installing nest_asyncio for Jupyter compatibility...")
    import subprocess
    import sys
    subprocess.check_call([sys.executable, "-m", "pip", "install", "nest_asyncio"])
    import nest_asyncio
    nest_asyncio.apply()

async def process_prompt(session: aiohttp.ClientSession, ollama_url: str, 
                        model_name: str, prompt: str, row: List[str], 
                        semaphore: asyncio.Semaphore) -> List[str]:
    """Process a single prompt with rate limiting via semaphore."""
    async with semaphore:  # Limit concurrent requests
        payload = {
            "model": model_name,
            "prompt": prompt,
            "stream": False
        }
        
        try:
            async with session.post(
                ollama_url, 
                headers={"Content-Type": "application/json"},
                json=payload,
                timeout=aiohttp.ClientTimeout(total=30)  # changed from 30
            ) as response:
                eval_completion = ""
                if response.status == 200:
                    try:
                        result = await response.json()
                        # Extract response text
                        result_text = (
                            result.get("response") or 
                            result.get("output") or 
                            result.get("content") or
                            result.get("message", {}).get("content", "") or
                            str(result)
                        ).strip()
                        
                        # Normalize to Yes/No
                        if result_text.lower().startswith("yes"):
                            eval_completion = "Yes"
                        elif result_text.lower().startswith("no"):
                            eval_completion = "No"
                        else:
                            eval_completion = result_text
                    except (ValueError, KeyError):
                        result_text = await response.text()
                        if result_text.lower().startswith("yes"):
                            eval_completion = "Yes"
                        elif result_text.lower().startswith("no"):
                            eval_completion = "No"
                        else:
                            eval_completion = result_text.strip()
                else:
                    eval_completion = f"Error: HTTP {response.status}"
                    
        except asyncio.TimeoutError:
            eval_completion = "Error: Timeout"
        except Exception as e:
            eval_completion = f"Error: {str(e)}"
        
        return row + [eval_completion, model_name]

async def process_file_async(input_path: str, output_path: str, model_name: str, 
                           ollama_url: str, max_concurrent: int = 10) -> None:
    """Process a single CSV file with async concurrent requests."""
    
    # Read all rows first
    rows_to_process = []
    with open(input_path, newline='', encoding='utf-8') as infile:
        reader = csv.reader(infile)
        header = next(reader)
        new_header = header + ["eval_completion", "model"]
        
        for row in reader:
            if row:  # Skip empty rows
                rows_to_process.append(row)
    
    if not rows_to_process:
        # Write header only if no data
        with open(output_path, 'w', newline='', encoding='utf-8') as outfile:
            writer = csv.writer(outfile)
            writer.writerow(new_header)
        return
    
    # Process with concurrent requests
    semaphore = asyncio.Semaphore(max_concurrent)
    connector = aiohttp.TCPConnector(limit=max_concurrent * 2)  # Connection pooling
    
    async with aiohttp.ClientSession(connector=connector) as session:
        # Create tasks for all rows
        tasks = [
            process_prompt(session, ollama_url, model_name, row[-1], row, semaphore)
            for row in rows_to_process
        ]
        
        # Process in batches to manage memory
        batch_size = 100
        all_results = []
        start_time = time.time()
        total_batches = (len(tasks) - 1) // batch_size + 1
        
        for i in range(0, len(tasks), batch_size):
            batch_start = time.time()
            batch = tasks[i:i + batch_size]
            batch_results = await asyncio.gather(*batch, return_exceptions=True)
            
            # Handle exceptions
            for j, result in enumerate(batch_results):
                if isinstance(result, Exception):
                    # Create error row
                    original_row = rows_to_process[i + j]
                    error_row = original_row + [f"Error: {str(result)}", model_name]
                    all_results.append(error_row)
                else:
                    all_results.append(result)
            
            # Calculate ETA
            current_batch = (i // batch_size) + 1
            elapsed_time = time.time() - start_time
            avg_time_per_batch = elapsed_time / current_batch
            remaining_batches = total_batches - current_batch
            eta_seconds = remaining_batches * avg_time_per_batch
            eta_time = datetime.now() + timedelta(seconds=eta_seconds)
            
            batch_time = time.time() - batch_start
            print(f"Batch {current_batch}/{total_batches} completed in {batch_time:.1f}s | "
                  f"Progress: {(current_batch/total_batches)*100:.1f}% | "
                  f"ETA: {eta_time.strftime('%H:%M:%S')} ({eta_seconds/60:.1f}m remaining)")
    
    # Write all results
    with open(output_path, 'w', newline='', encoding='utf-8') as outfile:
        writer = csv.writer(outfile)
        writer.writerow(new_header)
        writer.writerows(all_results)

def process_files_optimized(input_files: List[str], model_name: str, ollama_url: str,
                          max_concurrent: int = 10) -> None:
    """Main function to process multiple CSV files - Jupyter notebook compatible."""
    
    async def process_all_files():
        file_tasks = []
        total_files = 0
        
        # Count valid files and create tasks
        for input_path in input_files:
            template_match = re.search(r"templ-\d+", input_path)
            if not template_match:
                continue
            template_tag = template_match.group()
            model_tag = model_name.replace(":", "-")
            output_path = f"relevance_210725_completions_{model_tag}-{template_tag}.csv"
            
            # Create async task for this file
            task = process_file_async(input_path, output_path, model_name, ollama_url, max_concurrent)
            file_tasks.append((task, input_path, output_path))
            total_files += 1
        
        print(f"Starting processing of {total_files} files...")
        overall_start = time.time()
        
        # Process files concurrently (but you might want to limit this too)
        for file_idx, (task, input_path, output_path) in enumerate(file_tasks):
            file_start_time = time.time()
            await task
            file_end_time = time.time()
            file_duration = file_end_time - file_start_time
            
            # Calculate overall ETA
            files_completed = file_idx + 1
            elapsed_total = time.time() - overall_start
            avg_time_per_file = elapsed_total / files_completed
            remaining_files = total_files - files_completed
            eta_seconds = remaining_files * avg_time_per_file
            eta_time = datetime.now() + timedelta(seconds=eta_seconds)
            
            print(f"File {files_completed}/{total_files} completed: {input_path} -> {output_path}")
            print(f"  File duration: {file_duration:.1f}s | Overall progress: {(files_completed/total_files)*100:.1f}%")
            if remaining_files > 0:
                print(f"  Overall ETA: {eta_time.strftime('%H:%M:%S')} ({eta_seconds/60:.1f}m remaining)")
            print()
        
        total_duration = time.time() - overall_start
        print(f"All {total_files} files completed in {total_duration/60:.1f} minutes!")
    
    # This will now work in Jupyter notebooks
    asyncio.run(process_all_files())

# Alternative: Process files sequentially but requests within each file concurrently
def process_files_sequential_hybrid(input_files: List[str], model_name: str, 
                                  ollama_url: str, max_concurrent: int = 10) -> None:
    """Process files one by one, but requests within each file concurrently."""
    valid_files = [f for f in input_files if re.search(r"templ-\d+", f)]
    total_files = len(valid_files)
    overall_start = time.time()
    
    print(f"Starting sequential processing of {total_files} files...")
    
    for file_idx, input_path in enumerate(valid_files):
        template_match = re.search(r"templ-\d+", input_path)
        template_tag = template_match.group()
        model_tag = model_name.replace(":", "-")
        output_path = f"relevance_210725_completions_{model_tag}-{template_tag}.csv"
        
        file_start_time = time.time()
        asyncio.run(process_file_async(input_path, output_path, model_name, ollama_url, max_concurrent))
        file_end_time = time.time()
        file_duration = file_end_time - file_start_time
        
        # Calculate ETA
        files_completed = file_idx + 1
        elapsed_total = time.time() - overall_start
        avg_time_per_file = elapsed_total / files_completed
        remaining_files = total_files - files_completed
        eta_seconds = remaining_files * avg_time_per_file
        eta_time = datetime.now() + timedelta(seconds=eta_seconds)
        
        print(f"File {files_completed}/{total_files} completed: {input_path} -> {output_path}")
        print(f"  Duration: {file_duration:.1f}s | Progress: {(files_completed/total_files)*100:.1f}%")
        if remaining_files > 0:
            print(f"  ETA: {eta_time.strftime('%H:%M:%S')} ({eta_seconds/60:.1f}m remaining)")
        print()
    
    total_duration = time.time() - overall_start
    print(f"All {total_files} files completed in {total_duration/60:.1f} minutes!")

In [24]:
process_files_optimized(input_files=input_files, model_name=model_name, ollama_url=ollama_url, max_concurrent=5)

Starting processing of 5 files...


KeyboardInterrupt: 

In [23]:
# original, took to long

for input_path in input_files:
    # Determine output file name based on template number and model
    template_match = re.search(r"templ-\d+", input_path)
    if not template_match:
        continue
    template_tag = template_match.group() 
    # replace colon with hyphen in models
    model_tag = model_name.replace(":", "-")
    output_path = f"relevance_210725_completions_{model_tag}-{template_tag}.csv"
    
    with open(input_path, newline='', encoding='utf-8') as infile, \
         open(output_path, 'w', newline='', encoding='utf-8') as outfile:
        reader = csv.reader(infile)
        writer = csv.writer(outfile)
        
        # Read the header and append new column names
        header = next(reader)
        new_header = header + ["eval_completion", "model"]
        writer.writerow(new_header)
        
        # Iterate over each row in the input CSV
        for row in reader:
            if not row:  # skip empty lines if any
                continue
            prompt = row[-1]  # eval_prompt is the last col
            
            # Prepare the JSON payload for Ollama API
            payload = {
                "model": model_name,
                "prompt": prompt,
                "stream": False  # get a single JSON response instead of stream
            }
            
            try:
                response = requests.post(ollama_url, headers={"Content-Type": "application/json"},
                                         data=json.dumps(payload))
            except Exception as e:
                # If there's a connection error or similar, you might want to handle it
                print(f"Error calling Ollama API for prompt: {prompt[:30]}... \n{e}")
                continue
            
            eval_completion = ""
            if response.status_code == 200:
                # Parse JSON response from Ollama
                try:
                    result = response.json()
                except ValueError:
                    # If response is not a valid JSON (unexpected), use raw text
                    result_text = response.text.strip()
                    # Determine yes/no from text
                    if result_text.lower().startswith("yes"):
                        eval_completion = "Yes"
                    elif result_text.lower().startswith("no"):
                        eval_completion = "No"
                    else:
                        eval_completion = result_text  # fallback to whatever it is
                else:
                    # Ollama's response JSON might have the output text in a field.
                    # We attempt common possible keys.
                    if "response" in result:
                        result_text = result["response"]
                    elif "output" in result:
                        result_text = result["output"]
                    elif "content" in result:
                        # If using chat-style response, it might be nested:
                        # e.g., {"model": ..., "choices": [{"message": {"role": "assistant", "content": "Yes"}}], ...}
                        result_text = result.get("content", "") or result.get("message", {}).get("content", "")
                    else:
                        # If none of the known keys, use the full JSON string as fallback
                        result_text = str(result)
                    result_text = str(result_text).strip()
                    # Normalize to "Yes" or "No"
                    if result_text.lower().startswith("yes"):
                        eval_completion = "Yes"
                    elif result_text.lower().startswith("no"):
                        eval_completion = "No"
                    else:
                        eval_completion = result_text
            else:
                # If the API call failed (non-200 status), record the status or an error
                eval_completion = f"Error: HTTP {response.status_code}"
            
            # Append the new columns to the row
            row_with_output = row + [eval_completion, model_name]
            writer.writerow(row_with_output)

    print(f"Completed {input_path} -> {output_path}")

KeyboardInterrupt: 