# ChatGPT Prompt Analyzer 🤖

This notebook demonstrates the functionality of our ChatGPT Prompt Analyzer. It includes:
- Feature extraction
- Pattern matching
- Classification
- Visualization

## Setup and Imports

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.graph_objects as go
import re
from IPython.display import display, HTML

# Set plotting style
plt.style.use('seaborn')
sns.set_palette('husl')

## Feature Extraction Functions

In [None]:
def clean_text(text):
    """Clean and preprocess text"""
    text = re.sub(r'[^\w\s]', ' ', str(text))
    text = re.sub(r'\s+', ' ', text)
    return text.strip().lower()

def extract_features(text):
    """Extract features from text for classification"""
    words = text.split()
    first_three_words = ' '.join(words[:3])
    
    # Basic metrics
    features = {
        'word_count': len(words),
        'avg_word_length': sum(len(word) for word in words) / len(words) if words else 0,
        'contains_question': '?' in text,
        'contains_exclamation': '!' in text,
        'sentence_count': len(re.split(r'[.!?]+', text))
    }
    
    # Add all other features from model_training.py
    # ... (copy the rest of the feature extraction code)
    
    return features

## Prediction Function

In [None]:
def predict_type_with_confidence(text):
    """Predict interaction type and confidence scores"""
    cleaned_text = clean_text(text)
    features = extract_features(cleaned_text)
    
    scores = {
        'analysis': (
            features['analysis_pattern_score'] * 2.0 +
            features['analysis_verb_score'] * 1.5 +
            features['data_term_score'] +
            features['quality_term_score'] +
            features['context_term_score'] +
            sum([
                features['data_analysis_score'],
                features['performance_analysis_score'],
                features['quality_analysis_score'],
                features['security_analysis_score']
            ])
        ),
        'teaching': (
            features['teaching_score'] * 1.5 +
            (2.0 if features['starts_with_teaching'] else 0) +
            (1.0 if features['has_basic_terms'] else 0)
        ),
        'guidance': (
            features['guidance_score'] * 1.5 +
            (2.0 if features['starts_with_guidance'] else 0)
        ),
        'creation': (
            features['creation_score'] * 1.5 +
            (2.0 if features['starts_with_creation'] else 0)
        )
    }
    
    # Normalize scores
    total = sum(scores.values()) or 1
    confidence_scores = {k: (v/total)*100 for k, v in scores.items()}
    
    # Get prediction
    max_score = max(scores.values())
    prediction = max(scores.items(), key=lambda x: x[1])[0] if max_score >= 1.0 else 'other'
    
    return prediction, confidence_scores, features

## Visualization Functions

In [None]:
def plot_confidence_scores(confidence_scores):
    """Plot confidence scores using a bar chart"""
    plt.figure(figsize=(10, 5))
    categories = list(confidence_scores.keys())
    scores = list(confidence_scores.values())
    
    colors = ['#2ecc71', '#3498db', '#9b59b6', '#e74c3c']
    plt.bar(categories, scores, color=colors)
    plt.title('Confidence Scores by Category')
    plt.ylabel('Confidence (%)')
    plt.ylim(0, 100)
    
    for i, score in enumerate(scores):
        plt.text(i, score + 1, f'{score:.1f}%', ha='center')
    
    plt.show()

def plot_feature_analysis(features):
    """Plot detailed feature analysis"""
    analysis_features = {
        'Data Analysis': features['data_analysis_score'],
        'Performance': features['performance_analysis_score'],
        'Quality': features['quality_analysis_score'],
        'Security': features['security_analysis_score']
    }
    
    plt.figure(figsize=(12, 6))
    plt.bar(analysis_features.keys(), analysis_features.values())
    plt.title('Analysis Feature Scores')
    plt.ylabel('Score')
    plt.xticks(rotation=45)
    
    for i, score in enumerate(analysis_features.values()):
        plt.text(i, score + 0.01, f'{score:.2f}', ha='center')
    
    plt.tight_layout()
    plt.show()

## Interactive Analysis

In [None]:
def analyze_prompt(prompt):
    """Analyze a prompt and display results"""
    prediction, confidence_scores, features = predict_type_with_confidence(prompt)
    
    print(f"🎯 Prediction: {prediction.title()}\n")
    
    print("📊 Confidence Scores:")
    for category, score in sorted(confidence_scores.items(), key=lambda x: x[1], reverse=True):
        print(f"{category.title()}: {score:.1f}%")
    
    print("\n📈 Key Metrics:")
    print(f"Word Count: {features['word_count']}")
    print(f"Average Word Length: {features['avg_word_length']:.1f}")
    print(f"Technical Term Ratio: {features['technical_term_ratio']:.2f}")
    
    # Plot visualizations
    plot_confidence_scores(confidence_scores)
    plot_feature_analysis(features)
    
    return features

## Example Usage

Try analyzing different prompts:

In [None]:
# Example prompts
example_prompts = [
    "Analyze this dataset and identify trends",
    "Can you analyze the performance of this algorithm?",
    "Please analyze our security vulnerabilities",
    "Analyze the code quality of this module"
]

# Analyze first example
print("Example Analysis:")
analyze_prompt(example_prompts[0])

## Interactive Analysis

Enter your own prompt to analyze:

In [None]:
# Your prompt here
your_prompt = "Enter your prompt here"
analyze_prompt(your_prompt)