In [3]:
import pandas as pd
from typing import Optional
from pydantic import BaseModel, Field
from langchain_openai import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
import json

# Define your Pydantic model for call analysis
class CallAnalysis(BaseModel):
    call_summary: str = Field(description="A concise summary of the call")
    call_intent: str = Field(description="The main intent of the call (e.g., billing inquiry, technical support, cancellation)")

def parse_csv_with_llm(input_csv: str, output_csv: str):
    # Initialize the LLM
    llm = ChatOpenAI(model_name="gpt-4o-mini", temperature=0)
    
    # Create a prompt template for analyzing call transcripts
    prompt = ChatPromptTemplate.from_messages([
        ("system", """You are a helpful assistant that analyzes call transcripts.
        Given a call transcript, provide a structured analysis with:
        1. A concise call summary
        2. The main intent of the call
        """),
        ("user", "Analyze this call transcript:\n\n{call_transcript}")
    ])
    
    # Read the input CSV
    df = pd.read_csv(input_csv)
    
    # Process each row
    structured_data = []
    for _, row in df.iterrows():
        # Get call transcript
        call_transcript = row['call_transcript']
        conversation_id = row['conversation_id']
        
        # Get LLM response
        chain = prompt | llm
        response = chain.invoke({"call_transcript": call_transcript})
        
        try:
            # Parse the LLM response into a dictionary
            parsed_data = json.loads(response.content)
            
            # Validate with Pydantic
            validated_data = CallAnalysis(**parsed_data)
            
            # Create output row with original data + analysis
            output_row = {
                'conversation_id': conversation_id,
                'call_transcript': call_transcript,
                'call_summary': validated_data.call_summary,
                'call_intent': validated_data.call_intent
            }
            
            structured_data.append(output_row)
            print(f"Processed conversation {conversation_id}")
            
        except Exception as e:
            print(f"Error processing conversation {conversation_id}: {e}")
            # Add row with error info
            output_row = {
                'conversation_id': conversation_id,
                'call_transcript': call_transcript,
                'call_summary': f"Error: {str(e)}",
                'call_intent': "Analysis Failed"
            }
            structured_data.append(output_row)
    
    # Convert structured data to DataFrame
    output_df = pd.DataFrame(structured_data)
    
    # Save to CSV
    output_df.to_csv(output_csv, index=False)
    print(f"Processed data saved to {output_csv}")

if __name__ == "__main__":
    # Example usage
    input_csv = "../data/call_transcript_sample.csv"
    output_csv = "../data/analyzed_calls.csv"
    parse_csv_with_llm(input_csv, output_csv)

ModuleNotFoundError: No module named 'pandas'