In [None]:
!pip install pyngrok flask


In [13]:
from pyngrok import ngrok

# Replace this with your real token
ngrok.set_auth_token("2xpOUQXM1B73OAFvg4kSvtImZQg_oSfwCwoVWc4NSvojT9Yk")


In [27]:
from flask import Flask, request, jsonify
from pyngrok import ngrok
import nltk
from nltk.tokenize import sent_tokenize, word_tokenize
from nltk.corpus import wordnet
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
import random

# Download punkt tokenizer if not available
try:
    nltk.data.find('tokenizers/punkt')
except LookupError:
    nltk.download('punkt')



try:
    nltk.data.find('corpora/wordnet')
except LookupError:
    nltk.download('wordnet')



app = Flask(__name__)

# === Genetic Algorithm Summarizer ===
def fitness(individual, sentence_vectors, max_sentences):
    # Fitness = sum of TF-IDF sentence scores, penalize if more sentences than allowed
    score = np.sum(individual * sentence_vectors)
    penalty = max(0, np.sum(individual) - max_sentences) * 0.5
    return score - penalty

def ga_summarize(paragraph, bullet_count=3, population_size=30, generations=50):
    sentences = sent_tokenize(paragraph)
    if len(sentences) <= bullet_count:
        return sentences

    # Vectorize sentences (TF-IDF sum per sentence)
    vectorizer = TfidfVectorizer(stop_words='english')
    X = vectorizer.fit_transform(sentences)
    sentence_scores = np.asarray(X.sum(axis=1)).flatten()

    # Initialize population: each individual is a binary vector selecting sentences
    population = [np.random.choice([0,1], size=len(sentences)) for _ in range(population_size)]

    for _ in range(generations):
        fitness_scores = [fitness(ind, sentence_scores, bullet_count) for ind in population]
        # Select top half individuals - FIXED sorting
        sorted_pop = [x for x, _ in sorted(zip(population, fitness_scores),
                                         key=lambda pair: pair[1],
                                         reverse=True)]
        population = sorted_pop[:population_size//2]

        # Crossover & Mutation to refill population
        offspring = []
        while len(offspring) < population_size//2:
            parent1, parent2 = random.sample(population, 2)
            crossover_point = random.randint(1, len(sentences)-1)
            child = np.concatenate([parent1[:crossover_point], parent2[crossover_point:]])
            # Mutation: flip one bit with 10% chance
            if random.random() < 0.1:
                mutate_idx = random.randint(0, len(sentences)-1)
                child[mutate_idx] = 1 - child[mutate_idx]
            offspring.append(child)
        population.extend(offspring)

    # Final selection: best individual
    fitness_scores = [fitness(ind, sentence_scores, bullet_count) for ind in population]
    best_ind = population[np.argmax(fitness_scores)]
    summary = [sent for sent, selected in zip(sentences, best_ind) if selected == 1]
    return summary[:bullet_count]
# === Paraphrasing with a Simple Informed Search + Fuzzy Heuristics ===

# For simplicity, we use synonyms replacement and heuristic scoring



def get_synonyms(word):
    synonyms = set()
    for syn in wordnet.synsets(word):
        for lemma in syn.lemmas():
            if lemma.name().lower() != word.lower():
                synonyms.add(lemma.name().replace('_', ' '))
    return list(synonyms)

def heuristic_score(original_words, candidate_words):
    # Fuzzy heuristic: combine similarity & difference
    # similarity = % of words same
    same_count = sum(o==c for o,c in zip(original_words, candidate_words))
    similarity = same_count / max(len(original_words), 1)
    difference = 1 - similarity
    # We want to keep meaning (high similarity) but change wording (difference)
    # So score = weighted sum
    score = 0.7 * similarity + 0.3 * difference
    return score

def paraphrase_sentence(sentence):
    words = word_tokenize(sentence)
    candidates = []

    # Generate candidates by replacing one word with a synonym at a time
    for i, word in enumerate(words):
        syns = get_synonyms(word)
        for syn in syns[:3]:  # limit synonyms to top 3 to reduce search space
            new_words = words.copy()
            new_words[i] = syn
            candidates.append(new_words)

    if not candidates:
        return sentence  # No paraphrase possible

    # Score candidates
    scored_candidates = [(heuristic_score(words, cand), cand) for cand in candidates]
    scored_candidates.sort(key=lambda x: x[0], reverse=True)

    # Pick best candidate
    best_words = scored_candidates[0][1]
    return ' '.join(best_words)


# === Flask Routes ===

@app.route('/summarize', methods=['POST'])
def summarize_route():
    data = request.get_json()
    paragraph = data.get('paragraph', '')
    bullet_count = int(data.get('bullet_count', 3))
    summary = ga_summarize(paragraph, bullet_count)
    return jsonify(summary)

@app.route('/paraphrase', methods=['POST'])
def paraphrase():

    data = request.get_json()
    paragraph = data.get('paragraph', '')
    sentences = sent_tokenize(paragraph)
    paraphrased_sentences = [paraphrase_sentence(sent) for sent in sentences]
    return jsonify({'paraphrased': ' '.join(paraphrased_sentences)})


if __name__ == '__main__':
    # Start Flask app in another thread or normal run
    port = 5000

    # Open ngrok tunnel
    public_url = ngrok.connect(port)
    print(f" * ngrok tunnel \"{public_url}\" -> \"http://127.0.0.1:{port}\"")

    # Run Flask app
    app.run(port=port)


[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


 * ngrok tunnel "NgrokTunnel: "https://17b7-35-221-51-235.ngrok-free.app" -> "http://localhost:5000"" -> "http://127.0.0.1:5000"
 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:5000
INFO:werkzeug:[33mPress CTRL+C to quit[0m
INFO:werkzeug:127.0.0.1 - - [30/May/2025 20:15:57] "POST /summarize HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [30/May/2025 20:15:57] "POST /summarize HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [30/May/2025 20:28:28] "POST /paraphrase HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [30/May/2025 20:28:35] "POST /paraphrase HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [30/May/2025 20:29:11] "POST /summarize HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [30/May/2025 20:29:11] "POST /summarize HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [30/May/2025 20:29:23] "POST /paraphrase HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [30/May/2025 20:29:37] "POST /paraphrase HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [30/May/2025 20:31:44] "POST /summarize HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [30/May/2025 20:31:44] "POST /summarize HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [30/May/2025 20:33:40] "POST /summarize HTTP/1.1