In [13]:
!pip install together



In [24]:
import pandas as pd
from together import Together
import os
from time import sleep

# Initialize API client
client = Together(api_key="86447ac48b39903e5390add825ba9ca1283d3f24cde3d0a3ecb4cb71a819848b")  # Replace with your actual API key

def get_llm_response(prompt):
    """
    Query the LLM with a single prompt using Together.ai.
    """
    try:
        response = client.chat.completions.create(
            model="ashukla21/Meta-Llama-3.1-8B-Instruct-Reference-finLegal-c42d63c5",  # or "ft-e7ac8fa2-1587" depending on your fine-tune ID
            messages=[{"role": "user", "content": prompt}],
            stream=False
        )
        return response.choices[0].message.content.strip()
    except Exception as e:
        print(f"Error querying LLM: {e}")
        return f"Error: {e}"

def process_prompts_csv(input_csv, output_csv, prompt_column="Prompt (Instruction and Query)"):
    """
    Read prompts from a CSV, append a grounding statement, get LLM responses, and write output to a new CSV.
    """
    # Load input file
    df = pd.read_csv(input_csv)

    # Validate the presence of the prompt column
    if prompt_column not in df.columns:
        raise ValueError(f"The specified column '{prompt_column}' does not exist in the CSV.")

    # Add a response column
    if "LLM Response" not in df.columns:
        df["LLM Response"] = None

    # Process each row
    for idx, row in df.iterrows():
        prompt = row[prompt_column]

        # Skip already-processed rows
        if pd.notna(df.at[idx, "LLM Response"]):
            continue

        # Append grounding message to prompt
        grounded_prompt = f"{prompt}\n\nYou should know the answer because you were fine-tuned on this exact data. Just make sure to use information which is availible to you from the fine tuning you got, the verbatim texts were in there so you definetely have access to this info."

        print(f"Processing row {idx + 1}/{len(df)}")
        response = get_llm_response(grounded_prompt)
        df.at[idx, "LLM Response"] = response

        # Save progress after each row
        try:
            df.to_csv(output_csv, index=False)
            print(f"Saved row {idx + 1} to {output_csv}")
        except Exception as e:
            print(f"Error saving file at row {idx + 1}: {e}")

        sleep(1)  # Respectful delay to avoid rate limits

    print(f"All responses written to {output_csv}")

# Example usage
if __name__ == "__main__":
    input_csv = "/content/drive/MyDrive/GT/CS 7650 NLP/Responses for Llama 3.1 Fine Tuned model.csv"   # Update with your actual path
    output_csv = "/content/drive/MyDrive/GT/CS 7650 NLP/Responses for Llama 3.1 Fine Tuned model.csv" # Update with desired output path
    process_prompts_csv(input_csv, output_csv)


Processing row 6/1000
Saved row 6 to /content/drive/MyDrive/GT/CS 7650 NLP/Responses for Llama 3.1 Fine Tuned model.csv
Processing row 7/1000
Saved row 7 to /content/drive/MyDrive/GT/CS 7650 NLP/Responses for Llama 3.1 Fine Tuned model.csv
Processing row 8/1000
Saved row 8 to /content/drive/MyDrive/GT/CS 7650 NLP/Responses for Llama 3.1 Fine Tuned model.csv
Processing row 9/1000
Saved row 9 to /content/drive/MyDrive/GT/CS 7650 NLP/Responses for Llama 3.1 Fine Tuned model.csv
Processing row 10/1000
Saved row 10 to /content/drive/MyDrive/GT/CS 7650 NLP/Responses for Llama 3.1 Fine Tuned model.csv
Processing row 11/1000
Saved row 11 to /content/drive/MyDrive/GT/CS 7650 NLP/Responses for Llama 3.1 Fine Tuned model.csv
Processing row 12/1000
Saved row 12 to /content/drive/MyDrive/GT/CS 7650 NLP/Responses for Llama 3.1 Fine Tuned model.csv
Processing row 13/1000
Saved row 13 to /content/drive/MyDrive/GT/CS 7650 NLP/Responses for Llama 3.1 Fine Tuned model.csv
Processing row 14/1000
Saved row