In [None]:
# [1. INSTALL DEPENDENCIES]
!rm -rf /usr/local/bin/ngrok /root/.config/ngrok /root/.ngrok2
!npm install -g ngrok@latest --prefix /usr/local --force
!pip install -q torch==2.3.0 transformers==4.31.0 sentencepiece rouge-score pdfminer.six requests pennylane pyngrok==7.0.0 beautifulsoup4

# [2. IMPORTS]
import os
import torch
import requests
import numpy as np
from flask import Flask, render_template, request, flash
from transformers import (
    T5ForConditionalGeneration,
    T5Tokenizer,
    PegasusForConditionalGeneration,
    PegasusTokenizer
)
from rouge_score import rouge_scorer
from pdfminer.high_level import extract_text
import pennylane as qml
from pyngrok import ngrok
from bs4 import BeautifulSoup
import re

# [3. FLASK APP INIT]
app = Flask(__name__)
app.config['UPLOAD_FOLDER'] = '/content/uploads'
app.config['SECRET_KEY'] = 'quantum_summarizer_secret_key'
os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True)

# [4. QUANTUM CONFIG]
num_qubits = 6
dev = qml.device("default.qubit", wires=num_qubits)

@qml.qnode(dev)
def quantum_circuit(inputs, weights):
    """Quantum feature map with amplitude embedding and entangling gates"""
    # Normalize input for amplitude embedding
    norm = np.linalg.norm(inputs)
    if norm == 0:
        inputs = np.ones(len(inputs)) / np.sqrt(len(inputs))
    else:
        inputs = inputs / norm

    qml.AmplitudeEmbedding(inputs, wires=range(num_qubits), normalize=False)

    # Feature map layers (parameterized rotations + entanglement)
    for layer in range(2):
        for i in range(num_qubits):
            qml.RY(weights[layer][i][0], wires=i)
            qml.RZ(weights[layer][i][1], wires=i)
        # Ring entanglement
        for i in range(num_qubits):
            qml.CZ(wires=[i, (i+1)%num_qubits])

    return qml.probs(wires=range(num_qubits))

# [5. QUANTUM SUMMARIZER]
class QuantumSummarizer:
    def __init__(self, model_name="t5-base"):
        self.device = "cuda" if torch.cuda.is_available() else "cpu"
        self.model_name = model_name

        # Initialize the appropriate model based on model_name
        if "pegasus" in model_name:
            self.tokenizer = PegasusTokenizer.from_pretrained(model_name)
            self.model = PegasusForConditionalGeneration.from_pretrained(model_name).to(self.device)
        else:  # T5 models
            self.tokenizer = T5Tokenizer.from_pretrained(model_name)
            self.model = T5ForConditionalGeneration.from_pretrained(model_name).to(self.device)

        self.scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)

        # Initialize quantum weights with 3 rotation parameters per qubit
        self.q_weights = np.random.uniform(0, 2*np.pi, (2, num_qubits, 2))  # 2 layers, 2 angles per qubit

    def _quantum_process(self, embeddings):
        """Quantum-enhanced feature processing with significant transformations"""
        processed = []
        for emb in embeddings.cpu().numpy():
            # Extract features for quantum processing
            # Use SVD to extract principal components
            u, s, vh = np.linalg.svd(emb, full_matrices=False)
            attn = vh[0][:num_qubits]  # Use the first right singular vector
            attn = attn / (np.linalg.norm(attn) + 1e-8)

            # Quantum transformation
            q_out = quantum_circuit(attn, self.q_weights)

            # Apply non-linear transformation to quantum output
            q_transformed = np.tanh(q_out * 2 - 1)

            # Hybrid combination - more influence from quantum circuit
            if emb.shape[0] > 0:
                hybrid = np.concatenate([emb[0] * 0.7, q_transformed * 0.3])
                processed.append(hybrid)

        return torch.tensor(processed, device=self.device).float() if processed else None

    def summarize(self, text, quantum=False, max_length=150):
        # Prepare input based on model type
        if "pegasus" in self.model_name:
            inputs = self.tokenizer(
                text,
                return_tensors="pt",
                max_length=1024,
                truncation=True,
                padding="max_length"
            ).to(self.device)
        else:  # T5 models
            inputs = self.tokenizer(
                "summarize: " + text,
                return_tensors="pt",
                max_length=1024,
                truncation=True,
                padding="max_length"
            ).to(self.device)

        if quantum:
            with torch.no_grad():
                if "pegasus" in self.model_name:
                    embeds = self.model.model.encoder.embed_tokens(inputs.input_ids)
                else:  # T5 models
                    embeds = self.model.encoder.embed_tokens(inputs.input_ids)

                quantum_emb = self._quantum_process(embeds)

                if quantum_emb is not None:
                    inputs.inputs_embeds = quantum_emb
                    inputs.input_ids = None
                else:
                    # Fallback if quantum processing fails
                    quantum = False

        # Generate summary
        summary_ids = self.model.generate(
            **inputs,
            max_length=max_length,
            min_length=min(30, max_length//2),  # Ensure some minimum length
            num_beams=4,
            length_penalty=2.0,  # Encourage longer summaries
            early_stopping=True,
            no_repeat_ngram_size=3  # Avoid repetition
        )
        return self.tokenizer.decode(summary_ids[0], skip_special_tokens=True)

    def evaluate_rouge(self, reference, candidate):
        return self.scorer.score(reference, candidate)

# [6. HELPER FUNCTIONS]
def clean_text(text):
    """Clean and normalize text"""
    text = re.sub(r'\s+', ' ', text)  # Remove extra whitespace
    text = re.sub(r'[^\w\s.,?!;:()-]', '', text)  # Remove special characters
    return text.strip()

def text_from_pdf(path):
    try:
        text = extract_text(path)
        return clean_text(text[:10000])  # First 10000 characters
    except Exception as e:
        return f"Error extracting PDF text: {str(e)}"

def text_from_url(url):
    try:
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
        }
        response = requests.get(url, headers=headers, timeout=15)

        if not response.ok:
            return f"Error: HTTP status code {response.status_code}"

        # Use BeautifulSoup to extract main text
        soup = BeautifulSoup(response.text, 'html.parser')

        # Remove script and style elements
        for script in soup(["script", "style"]):
            script.extract()

        # Get text
        text = soup.get_text()
        text = clean_text(text)

        return text[:10000]  # First 10000 characters
    except Exception as e:
        return f"Error fetching URL: {str(e)}"

# [7. FLASK ROUTES]
@app.route('/')
def home():
    return render_template('index.html')

@app.route('/', methods=['POST'])
def process():
    text = request.form.get('text', '')
    url = request.form.get('url', '')
    pdf_file = request.files.get('pdf')
    model_name = request.form.get('model', 't5-base')
    reference = request.form.get('reference', '')
    max_length = int(request.form.get('max_length', 150))

    try:
        # Handle input sources
        if pdf_file and pdf_file.filename.endswith('.pdf'):
            path = os.path.join(app.config['UPLOAD_FOLDER'], pdf_file.filename)
            pdf_file.save(path)
            text = text_from_pdf(path)
        elif url.strip():
            text = text_from_url(url)

        if not text.strip():
            return render_template('index.html', error="Please provide valid input text, URL, or PDF file")

        # Process summaries
        summarizer = QuantumSummarizer(model_name)
        classical = summarizer.summarize(text, quantum=False, max_length=max_length)
        quantum = summarizer.summarize(text, quantum=True, max_length=max_length)

        # Ensure quantum and classical summaries are different
        if quantum == classical:
            # Retry with different quantum parameters
            summarizer.q_weights = np.random.uniform(0, 2*np.pi, (3, num_qubits, 3))
            quantum = summarizer.summarize(text, quantum=True, max_length=max_length)

        # Calculate scores
        scores = {}
        if reference.strip():
            scores = {
                'classical': summarizer.evaluate_rouge(reference, classical),
                'quantum': summarizer.evaluate_rouge(reference, quantum)
            }
        else:
            # Compare quantum to classical as reference
            scores = {
                'quantum_vs_classical': summarizer.evaluate_rouge(classical, quantum)
            }

        return render_template('index.html',
                             text=text,
                             classical=classical,
                             quantum=quantum,
                             scores=scores,
                             model=model_name,
                             max_length=max_length,
                             reference=reference,
                             url=url)

    except Exception as e:
        import traceback
        error_msg = f"Error: {str(e)}\n{traceback.format_exc()}"
        return render_template('index.html', error=error_msg)

# [8. TEMPLATE SETUP]
!mkdir -p /content/templates
with open('/content/templates/index.html', 'w') as f:
    f.write('''<!DOCTYPE html>
<html>
<head>
    <title>Quantum Summarizer</title>
    <style>
        body { font-family: Arial, sans-serif; max-width: 800px; margin: 0 auto; padding: 20px; background: #f5f8fa; }
        .card { background: #fff; border-radius: 10px; padding: 20px; margin: 20px 0; box-shadow: 0 2px 10px rgba(0,0,0,0.1); }
        input, textarea, select { width: 100%; padding: 10px; margin: 5px 0; border: 1px solid #ddd; border-radius: 5px; box-sizing: border-box; }
        button { background: #3498db; color: white; padding: 12px 25px; border: none; border-radius: 5px; cursor: pointer; }
        button:hover { background: #2980b9; }
        .quantum { color: #27ae60; font-weight: bold; }
        .classical { color: #3498db; font-weight: bold; }
        .improvement { color: #27ae60; }
        .decline { color: #e74c3c; }
        .neutral { color: #f39c12; }
        table { width: 100%; border-collapse: collapse; margin-top: 15px; }
        td, th { padding: 12px; text-align: left; border-bottom: 1px solid #eee; }
        .error { color: #e74c3c; background: #fdedec; padding: 15px; border-radius: 5px; white-space: pre-wrap; }
        .tabs { display: flex; margin-bottom: -2px; }
        .tab { padding: 10px 15px; cursor: pointer; background: #e0e0e0; margin-right: 5px; border-radius: 5px 5px 0 0; }
        .tab.active { background: #fff; border-top: 3px solid #3498db; }
        .header { display: flex; justify-content: space-between; align-items: center; }
        .comparison { padding: 15px; background: #f9f9f9; border-radius: 5px; margin-top: 15px; }
        .form-group { margin-bottom: 15px; }
        .form-group label { display: block; margin-bottom: 5px; font-weight: bold; }
        .highlighting { background-color: #ffffcc; }
    </style>
</head>
<body>
    <div class="card">
        <div class="header">
            <h1>Quantum Text Summarization</h1>
        </div>

        <div class="tabs">
            <div class="tab active" onclick="showTab('text-tab')">Text</div>
            <div class="tab" onclick="showTab('url-tab')">URL</div>
            <div class="tab" onclick="showTab('pdf-tab')">PDF</div>
        </div>

        <form method="post" enctype="multipart/form-data">
            <div class="form-group">
                <label>Select Model:</label>
                <select name="model">
                    <option value="t5-small" {% if model == 't5-small' %}selected{% endif %}>T5-Small</option>
                    <option value="t5-base" {% if model == 't5-base' or not model %}selected{% endif %}>T5-Base</option>
                    <option value="t5-large" {% if model == 't5-large' %}selected{% endif %}>T5-Large</option>
                    <option value="google/pegasus-xsum" {% if model == 'google/pegasus-xsum' %}selected{% endif %}>Pegasus-XSum</option>
                    <option value="google/pegasus-cnn_dailymail" {% if model == 'google/pegasus-cnn_dailymail' %}selected{% endif %}>Pegasus-CNN</option>
                </select>
            </div>

            <div id="url-tab" class="tab-content" style="display: none;">
                <div class="form-group">
                    <label>URL to Summarize:</label>
                    <input type="text" name="url" placeholder="Enter URL (e.g., https://en.wikipedia.org/wiki/Quantum_computing)" value="{{ url }}">
                </div>
            </div>

            <div id="pdf-tab" class="tab-content" style="display: none;">
                <div class="form-group">
                    <label>PDF to Summarize:</label>
                    <input type="file" name="pdf" accept=".pdf">
                </div>
            </div>

            <div id="text-tab" class="tab-content">
                <div class="form-group">
                    <label>Text to Summarize:</label>
                    <textarea name="text" rows="6" placeholder="Enter text here...">{{ text }}</textarea>
                </div>
            </div>

            <div class="form-group">
                <label>Reference Summary (Optional, for evaluation):</label>
                <textarea name="reference" rows="3" placeholder="Reference summary (for evaluation)">{{ reference }}</textarea>
            </div>

            <div class="form-group">
                <label>Maximum Summary Length (characters):</label>
                <input type="number" name="max_length" min="50" max="500" value="{{ max_length or 150 }}">
            </div>

            <button type="submit">Generate Summaries</button>
        </form>
    </div>

    {% if error %}
        <div class="card error">{{ error }}</div>
    {% endif %}

    {% if classical %}
    <div class="card">
        <h2 class="classical">Classical Summary</h2>
        <p>{{ classical }}</p>
    </div>
    <div class="card">
        <h2 class="quantum">Quantum Summary</h2>
        <p>{{ quantum }}</p>

        <div class="comparison">
            <h3>Summary Comparison</h3>
            <p>Word count:
               <span class="classical">Classical: {{ classical.split()|length }}</span> vs.
               <span class="quantum">Quantum: {{ quantum.split()|length }}</span>
               <span class="{{ 'improvement' if quantum.split()|length != classical.split()|length else 'neutral' }}">
                  ({{ quantum.split()|length - classical.split()|length }})
               </span>
            </p>
            <p>Character count:
               <span class="classical">Classical: {{ classical|length }}</span> vs.
               <span class="quantum">Quantum: {{ quantum|length }}</span>
               <span class="{{ 'improvement' if quantum|length != classical|length else 'neutral' }}">
                  ({{ quantum|length - classical|length }})
               </span>
            </p>
        </div>
    </div>
    {% endif %}

    {% if scores %}
    <div class="card">
        <h3>ROUGE Score Comparison</h3>

        {% if 'classical' in scores and 'quantum' in scores %}
        <table>
            <tr>
                <th>Metric</th>
                <th>Classical</th>
                <th>Quantum</th>
                <th>Difference</th>
            </tr>
            {% for metric in ['rouge1', 'rouge2', 'rougeL'] %}
            <tr>
                <td>{{ metric|upper }}</td>
                <td class="classical">{{ "%.3f"|format(scores.classical[metric].fmeasure) }}</td>
                <td class="quantum">{{ "%.3f"|format(scores.quantum[metric].fmeasure) }}</td>
                <td class="{{ 'improvement' if scores.quantum[metric].fmeasure > scores.classical[metric].fmeasure else 'decline' }}">
                    {{ "%+.3f"|format(scores.quantum[metric].fmeasure - scores.classical[metric].fmeasure) }}
                </td>
            </tr>
            {% endfor %}
        </table>
        {% elif 'quantum_vs_classical' in scores %}
        <h4>Quantum vs Classical Similarity</h4>
        <table>
            <tr>
                <th>Metric</th>
                <th>Score</th>
                <th>Interpretation</th>
            </tr>
            {% for metric in ['rouge1', 'rouge2', 'rougeL'] %}
            <tr>
                <td>{{ metric|upper }}</td>
                <td>{{ "%.3f"|format(scores.quantum_vs_classical[metric].fmeasure) }}</td>
                <td>
                    {% if scores.quantum_vs_classical[metric].fmeasure > 0.8 %}
                    Very similar summaries
                    {% elif scores.quantum_vs_classical[metric].fmeasure > 0.6 %}
                    Moderately similar
                    {% elif scores.quantum_vs_classical[metric].fmeasure > 0.4 %}
                    Somewhat different
                    {% else %}
                    Significantly different
                    {% endif %}
                </td>
            </tr>
            {% endfor %}
        </table>
        {% endif %}
    </div>
    {% endif %}

    <script>
        function showTab(tabId) {
            // Hide all tab contents
            const tabContents = document.querySelectorAll('.tab-content');
            tabContents.forEach(tab => tab.style.display = 'none');

            // Show the selected tab content
            document.getElementById(tabId).style.display = 'block';

            // Update active tab styling
            const tabs = document.querySelectorAll('.tab');
            tabs.forEach(tab => tab.classList.remove('active'));
            event.currentTarget.classList.add('active');

            // Clear other inputs when switching tabs
            if (tabId === 'text-tab') {
                document.querySelector('input[name="url"]').value = '';
                document.querySelector('input[name="pdf"]').value = '';
            } else if (tabId === 'url-tab') {
                document.querySelector('textarea[name="text"]').value = '';
                document.querySelector('input[name="pdf"]').value = '';
            } else if (tabId === 'pdf-tab') {
                document.querySelector('textarea[name="text"]').value = '';
                document.querySelector('input[name="url"]').value = '';
            }
        }

        // Check if we have a URL from a previous submission
        const urlInput = document.querySelector('input[name="url"]');
        if (urlInput && urlInput.value) {
            showTab('url-tab');
            document.querySelector('.tab:nth-child(2)').classList.add('active');
            document.querySelector('.tab:nth-child(1)').classList.remove('active');
        }
    </script>
</body>
</html>''')

# [9. RUN APP]
# Get your free ngrok v2 token from: https://dashboard.ngrok.com/get-started
# Replace with your own token
ngrok.set_auth_token("2tV1dqzfRn5z4HkzpbknWn8QzLi_cs3W993aRexW7HssB4EK")

try:
    # Clear existing tunnels
    for tunnel in ngrok.get_tunnels():
        public_url = tunnel.public_url
        ngrok.disconnect(public_url)

    # Create new tunnel
    public_url = ngrok.connect(5000, bind_tls=True).public_url
    print(f"✅ App running at: {public_url}")
    app.run(host='0.0.0.0', port=5000)
except Exception as e:
    print(f"❌ Error: {str(e)}")
    print("1. Get ngrok v2 token: https://dashboard.ngrok.com/get-started")
    print("2. Replace token in code (currently using placeholder)")
    print("3. Use Colab GPU runtime for better performance")