# üìä LLM Token & Cost Analyzer

This project provides an interactive dashboard to analyze text using tokenization.

## Features
- Token count using tiktoken
- Cost estimation for GPT models
- Text diversity analysis
- Visualization of tokens vs words


In [1]:
# Install dependency
!pip install tiktoken matplotlib

import tiktoken
import matplotlib.pyplot as plt
from IPython.display import display, clear_output
import ipywidgets as widgets



In [2]:
# Pricing per 1K tokens (approx)
pricing = {
    "gpt-3.5-turbo": 0.002,
    "gpt-4": 0.03
}

# Function to analyze text
def analyze_text(text, model):
    enc = tiktoken.encoding_for_model(model)
    tokens = enc.encode(text)
    token_count = len(tokens)

    cost = (token_count / 1000) * pricing[model]

    words = text.split()
    unique_words = len(set(words))

    return token_count, cost, unique_words, len(words)

In [3]:
# UI Widgets
text_input = widgets.Textarea(
    value="Enter your text here...",
    description="Text:",
    layout=widgets.Layout(width='100%', height='150px')
)

model_select = widgets.Dropdown(
    options=["gpt-3.5-turbo", "gpt-4"],
    value="gpt-3.5-turbo",
    description="Model:"
)

button = widgets.Button(description="Analyze üöÄ", button_style='success')

output = widgets.Output()

In [4]:
# Button click event
def on_click(b):
    with output:
        clear_output()

        text = text_input.value
        model = model_select.value

        tokens, cost, unique_words, word_count = analyze_text(text, model)

        # Styled Output
        print("‚ú® LLM TEXT ANALYSIS DASHBOARD")
        print("=" * 50)
        print(f"üß† Model: {model}")
        print(f"üìù Words: {word_count}")
        print(f"üî¢ Tokens: {tokens}")
        print(f"üí∞ Cost: ${cost:.6f}")
        print(f"üìö Unique Words: {unique_words}")
        print("-" * 50)

        # Insights
        print("üìå Insights:")
        if tokens < 50:
            print("üëâ Small input (cheap & fast)")
        elif tokens < 200:
            print("üëâ Medium input")
        else:
            print("‚ö†Ô∏è Large input (expensive)")

        diversity = unique_words / max(word_count, 1)
        if diversity > 0.7:
            print("üëâ High diversity text")
        else:
            print("üëâ Repetitive text")

        # Visualization
        plt.figure()
        plt.bar(["Words", "Tokens"], [word_count, tokens])
        plt.title("Words vs Tokens")
        plt.xlabel("Metric")
        plt.ylabel("Count")
        plt.show()

button.on_click(on_click)

In [5]:
# Display UI
display(text_input, model_select, button, output)

Textarea(value='Enter your text here...', description='Text:', layout=Layout(height='150px', width='100%'))

Dropdown(description='Model:', options=('gpt-3.5-turbo', 'gpt-4'), value='gpt-3.5-turbo')

Button(button_style='success', description='Analyze üöÄ', style=ButtonStyle())

Output()


## Conclusion
The project demonstrates how input size affects LLM cost and efficiency.