# Fynd AI Intern Assessment - Task 1
## Rating Prediction via Prompting

This notebook implements and evaluates 3 different prompting approaches for classifying Yelp reviews into 1-5 star ratings using OpenAI's GPT models.

### Objectives:
1. Design 3 distinct prompting approaches
2. Evaluate accuracy, JSON validity, and consistency
3. Compare performance across approaches
4. Generate comprehensive analysis report

In [None]:
# Import required libraries
import pandas as pd
import numpy as np
import json
import openai
import time
import re
from typing import Dict, List, Tuple
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import warnings
warnings.filterwarnings('ignore')

# Set up plotting
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")

print("‚úÖ Libraries imported successfully")
print("üöÄ Starting Fynd AI Assessment - Task 1")

In [None]:
# OpenAI API Configuration
openai.api_key = "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"

client = openai.OpenAI(api_key=openai.api_key)

print("üîë OpenAI API configured")
print("ü§ñ Ready for real-time AI analysis")

## Data Preparation

Loading and preparing the Yelp reviews dataset for analysis.

In [None]:
# Create sample Yelp reviews dataset
def create_sample_data(size=50):
    """Create sample Yelp reviews for demonstration"""
    
    sample_reviews = [
        # 5-star reviews
        {"text": "Amazing food and excellent service! The staff was incredibly friendly and the atmosphere was perfect. Will definitely come back!", "stars": 5},
        {"text": "Outstanding restaurant! Every dish was perfectly prepared and the service was impeccable. Best dining experience I've had in years!", "stars": 5},
        {"text": "Absolutely phenomenal! The chef's special was incredible and our server was attentive without being intrusive. Highly recommend!", "stars": 5},
        {"text": "Perfect evening! Great food, wonderful ambiance, and exceptional service. This place exceeded all expectations!", "stars": 5},
        {"text": "Fantastic restaurant! The menu is creative, portions are generous, and everything was delicious. Can't wait to return!", "stars": 5},
        
        # 4-star reviews
        {"text": "Good food but the service was a bit slow. The restaurant was clean and the prices were reasonable.", "stars": 4},
        {"text": "Pretty good overall. The pasta was delicious and the wine selection was impressive. Slightly expensive but worth it.", "stars": 4},
        {"text": "Really enjoyed our meal. Food was tasty and well-presented. Only complaint is the wait time for our table.", "stars": 4},
        {"text": "Nice restaurant with good food. Service was friendly and efficient. Would recommend for a casual dinner.", "stars": 4},
        {"text": "Solid choice for dinner. The steak was cooked perfectly and the sides were great. Atmosphere could be better.", "stars": 4},
        
        # 3-star reviews
        {"text": "Average experience. Food was okay, nothing special. Service was decent but could be better.", "stars": 3},
        {"text": "It's an okay place. Food is decent, service is average. Nothing to complain about but nothing extraordinary either.", "stars": 3},
        {"text": "Mixed experience. Some dishes were good, others were mediocre. Service was inconsistent throughout the evening.", "stars": 3},
        {"text": "Decent restaurant. Food was acceptable and service was fine. Not bad but not great either.", "stars": 3},
        {"text": "Average meal. The appetizer was good but the main course was disappointing. Service was okay.", "stars": 3},
        
        # 2-star reviews
        {"text": "Disappointing meal. The food was cold when it arrived and the server seemed uninterested. Overpriced for what we got.", "stars": 2},
        {"text": "Not impressed. The food took forever to arrive and when it did, it was lukewarm. The staff seemed overwhelmed.", "stars": 2},
        {"text": "Below expectations. Food was bland and service was poor. The restaurant was also quite noisy.", "stars": 2},
        {"text": "Disappointing experience. Long wait times, mediocre food, and inattentive service. Won't be returning.", "stars": 2},
        {"text": "Not great. Food was underseasoned and service was slow. The place also felt dirty and unkempt.", "stars": 2},
        
        # 1-star reviews
        {"text": "Terrible experience! Rude staff, awful food, and dirty restaurant. Would never recommend this place to anyone.", "stars": 1},
        {"text": "Worst restaurant ever! Food was inedible, service was horrible, and the place was filthy. Complete waste of money!", "stars": 1},
        {"text": "Absolutely awful! The food was disgusting, staff was rude, and the restaurant was dirty. Avoid at all costs!", "stars": 1},
        {"text": "Horrible experience! Food poisoning from undercooked chicken, terrible service, and unsanitary conditions.", "stars": 1},
        {"text": "Worst meal ever! Everything was wrong - cold food, rude waiters, dirty tables. Never coming back!", "stars": 1}
    ]
    
    # Repeat and shuffle to create larger dataset
    extended_reviews = sample_reviews * (size // len(sample_reviews) + 1)
    return pd.DataFrame(extended_reviews[:size])

# Load data
df = create_sample_data(50)  # Using 50 samples for demo

print(f"üìä Dataset loaded: {len(df)} reviews")
print(f"üìà Rating distribution:")
print(df['stars'].value_counts().sort_index())

# Display sample reviews
print("\nüìù Sample reviews:")
for i in range(3):
    print(f"\n{i+1}. Rating: {df.iloc[i]['stars']} stars")
    print(f"   Review: {df.iloc[i]['text'][:100]}...")

## Approach 1: Direct Classification

Simple, straightforward prompt asking for star rating classification with clear criteria.

In [None]:
def approach_1_direct_classification(review_text: str) -> Dict:
    """
    Approach 1: Direct Classification
    Simple, straightforward prompt asking for star rating classification
    """
    prompt = f'''
    You are a review rating classifier. Analyze the following restaurant review and predict the star rating from 1 to 5 stars.
    
    Review: "{review_text}"
    
    Respond with a JSON object in this exact format:
    {{
        "predicted_stars": <number from 1-5>,
        "explanation": "<brief reasoning for the assigned rating>"
    }}
    
    Consider:
    - 5 stars: Excellent, outstanding experience
    - 4 stars: Good, above average with minor issues
    - 3 stars: Average, okay experience
    - 2 stars: Below average, several issues
    - 1 star: Poor, terrible experience
    '''
    
    return call_openai_api(prompt, "Direct Classification")

print("‚úÖ Approach 1 (Direct Classification) defined")
print("üìã Strategy: Simple classification with clear rating criteria")

## Approach 2: Sentiment Analysis

Focus on detailed sentiment analysis with aspect-based evaluation.

In [None]:
def approach_2_sentiment_analysis(review_text: str) -> Dict:
    """
    Approach 2: Sentiment-Based Analysis
    Focus on sentiment analysis with detailed reasoning
    """
    prompt = f'''
    As an expert sentiment analyst, evaluate this restaurant review by analyzing:
    1. Overall sentiment (positive/negative/neutral)
    2. Specific aspects mentioned (food, service, atmosphere, value)
    3. Intensity of emotions expressed
    4. Language tone and word choice
    
    Review: "{review_text}"
    
    Based on your analysis, assign a star rating (1-5) where:
    - Very positive sentiment with praise = 4-5 stars
    - Mostly positive with some concerns = 3-4 stars  
    - Neutral or mixed sentiment = 2-3 stars
    - Mostly negative sentiment = 1-2 stars
    - Very negative with strong criticism = 1 star
    
    Return your response as JSON:
    {{
        "predicted_stars": <1-5>,
        "explanation": "<detailed reasoning based on sentiment analysis>"
    }}
    '''
    
    return call_openai_api(prompt, "Sentiment Analysis")

print("‚úÖ Approach 2 (Sentiment Analysis) defined")
print("üîç Strategy: Deep sentiment analysis with aspect-based evaluation")

## Approach 3: Comparative Analysis

Use few-shot learning with example reviews for consistent benchmarking.

In [None]:
def approach_3_comparative_analysis(review_text: str) -> Dict:
    """
    Approach 3: Comparative Analysis with Examples
    Use few-shot learning with example reviews
    """
    prompt = f'''
    You are an experienced restaurant reviewer. Rate this review by comparing it to these examples:
    
    EXAMPLES:
    5 Stars: "Absolutely phenomenal! Best meal of my life. Perfect service, amazing atmosphere."
    4 Stars: "Really good food and service. Had a great time, just minor wait for table."
    3 Stars: "Decent place. Food was okay, service was fine. Nothing special but acceptable."
    2 Stars: "Food was cold, service was slow. Disappointed but not the worst experience."
    1 Star: "Terrible! Rude staff, awful food, dirty restaurant. Complete disaster."
    
    Now rate this review: "{review_text}"
    
    Compare the language, sentiment, and specific complaints/praise to the examples above.
    
    Provide your rating as JSON:
    {{
        "predicted_stars": <1-5>,
        "explanation": "<comparison-based reasoning>"
    }}
    '''
    
    return call_openai_api(prompt, "Comparative Analysis")

print("‚úÖ Approach 3 (Comparative Analysis) defined")
print("üìä Strategy: Few-shot learning with example-based comparison")

## API Integration & Helper Functions

In [None]:
def call_openai_api(prompt: str, approach: str) -> Dict:
    """Make API call to OpenAI with error handling and retry logic"""
    max_retries = 3
    retry_delay = 1
    
    for attempt in range(max_retries):
        try:
            response = client.chat.completions.create(
                model="gpt-3.5-turbo",
                messages=[
                    {"role": "system", "content": "You are a helpful assistant that analyzes restaurant reviews and returns valid JSON responses."},
                    {"role": "user", "content": prompt}
                ],
                max_tokens=200,
                temperature=0.1
            )
            
            content = response.choices[0].message.content.strip()
            
            # Extract JSON from response
            json_match = re.search(r'\{.*\}', content, re.DOTALL)
            if json_match:
                json_str = json_match.group()
                result = json.loads(json_str)
                
                # Validate required fields
                if 'predicted_stars' in result and 'explanation' in result:
                    # Ensure stars is in valid range
                    stars = int(result['predicted_stars'])
                    if 1 <= stars <= 5:
                        return {
                            'predicted_stars': stars,
                            'explanation': result['explanation'],
                            'valid_json': True,
                            'approach': approach
                        }
            
            # If we get here, JSON was invalid
            return {
                'predicted_stars': 3,  # Default fallback
                'explanation': 'Invalid JSON response from API',
                'valid_json': False,
                'approach': approach,
                'raw_response': content
            }
            
        except Exception as e:
            print(f"API call failed (attempt {attempt + 1}): {e}")
            if attempt < max_retries - 1:
                time.sleep(retry_delay)
                retry_delay *= 2
            else:
                return {
                    'predicted_stars': 3,
                    'explanation': f'API call failed: {str(e)}',
                    'valid_json': False,
                    'approach': approach
                }

print("üîß API integration functions ready")
print("üõ°Ô∏è Error handling and retry logic implemented")

## Evaluation Framework

Comprehensive evaluation of all three approaches.

In [None]:
def evaluate_approach(df: pd.DataFrame, approach_func, approach_name: str) -> Dict:
    """Evaluate a single approach on the dataset"""
    print(f"\nüîÑ Evaluating {approach_name}...")
    
    predictions = []
    actual_ratings = []
    valid_json_count = 0
    explanations = []
    
    for idx, row in df.iterrows():
        print(f"Processing review {idx + 1}/{len(df)}", end='\r')
        
        result = approach_func(row['text'])
        predictions.append(result['predicted_stars'])
        actual_ratings.append(row['stars'])
        explanations.append(result['explanation'])
        
        if result['valid_json']:
            valid_json_count += 1
        
        # Add small delay to respect API rate limits
        time.sleep(0.1)
    
    # Calculate metrics
    accuracy = accuracy_score(actual_ratings, predictions)
    json_validity_rate = valid_json_count / len(df)
    
    # Calculate per-class accuracy
    class_report = classification_report(actual_ratings, predictions, output_dict=True, zero_division=0)
    
    results = {
        'approach_name': approach_name,
        'accuracy': accuracy,
        'json_validity_rate': json_validity_rate,
        'predictions': predictions,
        'actual_ratings': actual_ratings,
        'explanations': explanations,
        'classification_report': class_report,
        'confusion_matrix': confusion_matrix(actual_ratings, predictions)
    }
    
    print(f"\n‚úÖ {approach_name} Results:")
    print(f"   üìä Accuracy: {accuracy:.3f}")
    print(f"   üìã JSON Validity Rate: {json_validity_rate:.3f}")
    
    return results

print("üìè Evaluation framework ready")

## Running All Evaluations

Execute all three approaches and collect results.

In [None]:
# Run all evaluations
print("üöÄ Starting comprehensive evaluation...")
print("‚è±Ô∏è This may take a few minutes due to API calls...")

approaches = [
    (approach_1_direct_classification, "Direct Classification"),
    (approach_2_sentiment_analysis, "Sentiment Analysis"),
    (approach_3_comparative_analysis, "Comparative Analysis")
]

all_results = {}

for approach_func, approach_name in approaches:
    results = evaluate_approach(df, approach_func, approach_name)
    all_results[approach_name] = results

print("\nüéâ All evaluations completed!")

## Results Analysis & Visualization

In [None]:
# Create comparison table
def create_comparison_table(results: Dict) -> pd.DataFrame:
    """Create comparison table of all approaches"""
    comparison_data = []
    
    for approach_name, result in results.items():
        comparison_data.append({
            'Approach': approach_name,
            'Accuracy': f"{result['accuracy']:.3f}",
            'JSON Validity Rate': f"{result['json_validity_rate']:.3f}",
            'Precision (Macro Avg)': f"{result['classification_report']['macro avg']['precision']:.3f}",
            'Recall (Macro Avg)': f"{result['classification_report']['macro avg']['recall']:.3f}",
            'F1-Score (Macro Avg)': f"{result['classification_report']['macro avg']['f1-score']:.3f}"
        })
    
    return pd.DataFrame(comparison_data)

# Display comparison table
comparison_df = create_comparison_table(all_results)
print("üìä APPROACH COMPARISON TABLE")
print("=" * 80)
print(comparison_df.to_string(index=False))

# Find best approach
best_approach = max(all_results.keys(), key=lambda x: all_results[x]['accuracy'])
print(f"\nüèÜ Best Performing Approach: {best_approach}")
print(f"üìà Best Accuracy: {all_results[best_approach]['accuracy']:.3f}")

In [None]:
# Create comprehensive visualizations
fig, axes = plt.subplots(2, 2, figsize=(16, 12))
fig.suptitle('Fynd AI Assessment - Rating Prediction Results', fontsize=16, fontweight='bold')

# 1. Accuracy Comparison
approaches = list(all_results.keys())
accuracies = [all_results[app]['accuracy'] for app in approaches]

bars1 = axes[0, 0].bar(approaches, accuracies, color=['#1f77b4', '#ff7f0e', '#2ca02c'])
axes[0, 0].set_title('Accuracy Comparison', fontweight='bold')
axes[0, 0].set_ylabel('Accuracy')
axes[0, 0].set_ylim(0, 1)
for i, v in enumerate(accuracies):
    axes[0, 0].text(i, v + 0.01, f'{v:.3f}', ha='center', fontweight='bold')

# 2. JSON Validity Rate
json_rates = [all_results[app]['json_validity_rate'] for app in approaches]
bars2 = axes[0, 1].bar(approaches, json_rates, color=['#d62728', '#9467bd', '#8c564b'])
axes[0, 1].set_title('JSON Validity Rate', fontweight='bold')
axes[0, 1].set_ylabel('Validity Rate')
axes[0, 1].set_ylim(0, 1)
for i, v in enumerate(json_rates):
    axes[0, 1].text(i, v + 0.01, f'{v:.3f}', ha='center', fontweight='bold')

# 3. Confusion Matrix for Best Approach
cm = all_results[best_approach]['confusion_matrix']
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', ax=axes[1, 0])
axes[1, 0].set_title(f'Confusion Matrix - {best_approach}', fontweight='bold')
axes[1, 0].set_xlabel('Predicted')
axes[1, 0].set_ylabel('Actual')

# 4. Rating Distribution Comparison
for i, (approach, result) in enumerate(all_results.items()):
    pred_dist = pd.Series(result['predictions']).value_counts().sort_index()
    axes[1, 1].plot(pred_dist.index, pred_dist.values, marker='o', label=f'{approach} (Predicted)', linewidth=2)

actual_dist = pd.Series(all_results[approaches[0]]['actual_ratings']).value_counts().sort_index()
axes[1, 1].plot(actual_dist.index, actual_dist.values, marker='s', label='Actual', linewidth=3, color='black')
axes[1, 1].set_title('Rating Distribution Comparison', fontweight='bold')
axes[1, 1].set_xlabel('Star Rating')
axes[1, 1].set_ylabel('Count')
axes[1, 1].legend()
axes[1, 1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

print("üìä Visualizations generated successfully!")

## Detailed Analysis & Insights

In [None]:
# Generate comprehensive report
print("\n" + "="*80)
print("FYND AI ASSESSMENT - TASK 1 EVALUATION REPORT")
print("="*80)

print(f"\nüìä Dataset Summary:")
print(f"   ‚Ä¢ Total Reviews Analyzed: {len(df)}")
print(f"   ‚Ä¢ Rating Distribution: {dict(df['stars'].value_counts().sort_index())}")

print(f"\nüîç Detailed Analysis:")
for approach_name, result in all_results.items():
    print(f"\n   {approach_name}:")
    print(f"     ‚úì Accuracy: {result['accuracy']:.3f}")
    print(f"     ‚úì JSON Validity: {result['json_validity_rate']:.3f}")
    print(f"     ‚úì Precision: {result['classification_report']['macro avg']['precision']:.3f}")
    print(f"     ‚úì Recall: {result['classification_report']['macro avg']['recall']:.3f}")
    print(f"     ‚úì F1-Score: {result['classification_report']['macro avg']['f1-score']:.3f}")

print(f"\nüèÜ Key Findings:")
print(f"   ‚Ä¢ Best Approach: {best_approach}")
print(f"   ‚Ä¢ Highest Accuracy: {all_results[best_approach]['accuracy']:.3f}")
print(f"   ‚Ä¢ Most Reliable JSON: {max(all_results.keys(), key=lambda x: all_results[x]['json_validity_rate'])}")

# Save results
results_summary = {
    'dataset_size': len(df),
    'approaches': {
        name: {
            'accuracy': float(result['accuracy']),
            'json_validity_rate': float(result['json_validity_rate']),
            'precision': float(result['classification_report']['macro avg']['precision']),
            'recall': float(result['classification_report']['macro avg']['recall']),
            'f1_score': float(result['classification_report']['macro avg']['f1-score'])
        }
        for name, result in all_results.items()
    },
    'best_approach': best_approach
}

with open('evaluation_results.json', 'w') as f:
    json.dump(results_summary, f, indent=2)

print(f"\nüíæ Results saved to 'evaluation_results.json'")
print("üéØ Task 1 evaluation completed successfully!")

## Sample Predictions Analysis

In [None]:
# Show sample predictions from each approach
print("\nüîç SAMPLE PREDICTIONS ANALYSIS")
print("=" * 60)

for i in range(min(3, len(df))):
    review = df.iloc[i]
    print(f"\nüìù Review {i+1}:")
    print(f"   Text: {review['text'][:100]}...")
    print(f"   Actual Rating: {review['stars']} stars")
    print(f"   Predictions:")
    
    for approach_name, result in all_results.items():
        predicted = result['predictions'][i]
        explanation = result['explanations'][i][:80] + "..." if len(result['explanations'][i]) > 80 else result['explanations'][i]
        accuracy_indicator = "‚úÖ" if predicted == review['stars'] else "‚ùå"
        print(f"     {accuracy_indicator} {approach_name}: {predicted} stars - {explanation}")

print("\nüéâ Analysis complete! Check the generated visualizations and results above.")