<a href="https://colab.research.google.com/github/darshanp0411/Airline-Customer-Strategy-Intelligence-Agent/blob/main/agent_analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!python3.12 -m pip install langchain



In [None]:
# Install missing libraries
!pip install langchain_openai
!pip install textblob
!pip install langchain

import pandas as pd
import numpy as np
import os
import time
from datetime import datetime, timedelta
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from textblob import TextBlob

# ==========================================
# 1. CONFIGURATION
# ==========================================
# üîë PASTE YOUR OPENAI API KEY HERE
os.environ["OPENAI_API_KEY"] = "Put your Key"

INPUT_FILE = "american-airlines.csv"
OUTPUT_FILE = "AAdvantage_Strategic_Analysis.csv"
ROWS_TO_PROCESS = 1000 # ‚ö†Ô∏è Safety Limit: Set to 100 for testing so you don't spend $$$. Set to len(df) for full file.

# ==========================================
# 2. LOAD & CLEAN DATA
# ==========================================
print(f"üìÅ Loading {INPUT_FILE}...")
try:
    df = pd.read_csv(INPUT_FILE)
    if 'reviews' in df.columns:
        df.rename(columns={'reviews': 'review'}, inplace=True)
    elif 'review' not in df.columns:
        # Fallback to first column
        df.rename(columns={df.columns[0]: 'review'}, inplace=True)
except FileNotFoundError:
    print(f"‚ùå Error: {INPUT_FILE} not found.")
    exit()

# Basic Cleaning
df['cleaned_review'] = df['review'].astype(str).str.replace("‚úÖ Trip Verified |", "").str.replace("Not Verified |", "").str.strip()

# ==========================================
# 3. ENRICH METADATA (Dates & Routes)
# ==========================================
print("‚ú® Generating metadata...")
# Generate random dates (Last 2 years)
def random_date():
    start = datetime(2023, 1, 1)
    end = datetime.now()
    return start + timedelta(days=np.random.randint(0, (end - start).days))

df['Date'] = [random_date() for _ in range(len(df))]

# Generate Routes
routes = ['JFK-LHR', 'LAX-JFK', 'DFW-ORD', 'MIA-GRU', 'ORD-LHR', 'CLT-MCO', 'PHL-CDG']
df['Route'] = np.random.choice(routes, len(df), p=[0.2, 0.25, 0.2, 0.15, 0.1, 0.05, 0.05])

# ==========================================
# 4. REAL AI ANALYSIS (Using OpenAI)
# ==========================================
print(f"üöÄ AI Agent starting... Processing first {ROWS_TO_PROCESS} reviews.")

# Initialize the LLM (GPT-3.5-Turbo is cheap and fast)
llm = ChatOpenAI(temperature=0, model="gpt-3.5-turbo")

# --- PROMPTS ---
segment_prompt = ChatPromptTemplate.from_template(
    """Analyze this airline review: "{review}"
    Classify the traveler based on context clues (meetings, corporate, family, vacation).
    Return ONLY one word: "Business", "Leisure", or "Unknown"."""
)

competitor_prompt = ChatPromptTemplate.from_template(
    """Review: "{review}"
    Does the user explicitly mention a competitor airline (Delta, United, Southwest, JetBlue, BA)?
    If YES, return the airline name. If NO, return "None"."""
)

pain_prompt = ChatPromptTemplate.from_template(
    """Review: "{review}"
    Identify the PRIMARY complaint category.
    Choose exactly one: [Loyalty Program, Flight Delay, Baggage, Staff Service, Seat Comfort, Booking App, In-flight Food, None]
    Return ONLY the category name."""
)

# Function to call AI for a single row
def analyze_review_with_ai(review_text):
    try:
        # Run the 3 prompts
        segment = llm.invoke(segment_prompt.format(review=review_text)).content.strip()
        competitor = llm.invoke(competitor_prompt.format(review=review_text)).content.strip()
        issue = llm.invoke(pain_prompt.format(review=review_text)).content.strip()
        return pd.Series([segment, competitor, issue])
    except Exception as e:
        print(f"‚ö†Ô∏è API Error: {e}")
        return pd.Series(["Unknown", "None", "General"])

# Slice the dataframe to save money/time during testing
df_ai = df.head(ROWS_TO_PROCESS).copy()

# Apply the AI Analysis (This takes time! ~1 second per row)
# Using 'apply' allows us to send each row to OpenAI
df_ai[['Traveler_Type', 'Competitor', 'Key_Issue']] = df_ai['cleaned_review'].apply(analyze_review_with_ai)

# Sentiment Score (Free & Fast)
df_ai['Sentiment_Score'] = df_ai['cleaned_review'].apply(lambda x: TextBlob(str(x)).sentiment.polarity)

# ==========================================
# 5. SAVE RESULTS
# ==========================================
df_ai.to_csv(OUTPUT_FILE, index=False)
print(f"üéâ Analysis Complete! {len(df_ai)} rows processed.")
print(f"üíæ Saved to: {OUTPUT_FILE}")
print("You can now run 'streamlit run dashboard.py'")

üìÅ Loading american-airlines.csv...
‚ú® Generating metadata...
üöÄ AI Agent starting... Processing first 1000 reviews.
üéâ Analysis Complete! 1000 rows processed.
üíæ Saved to: AAdvantage_Strategic_Analysis.csv
You can now run 'streamlit run dashboard.py'
