In [3]:
from flask import Flask, request, render_template
import pandas as pd
from sentence_transformers import SentenceTransformer, util
import torch
import matplotlib.pyplot as plt

app = Flask(__name__)

# **1. Load the preprocessed dataset**
df = pd.read_csv("final_categorized_dataset.csv", parse_dates=['date'])

# **2. Load the SentenceTransformer Model**
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = SentenceTransformer('all-MiniLM-L6-v2', device=device)

# **3. Filter by Keyword**
def filter_by_keyword(df, keyword):
    """
    Filter rows by keyword similarity using combined_text embeddings.
    """
    print(f"Filtering rows relevant to the keyword: {keyword}")
    keyword_embedding = model.encode(keyword, convert_to_tensor=True)

    def calculate_similarity(text):
        combined_text_embedding = model.encode(text, convert_to_tensor=True)
        return util.cos_sim(combined_text_embedding, keyword_embedding).item()

    df["similarity"] = df["combined_text"].apply(calculate_similarity)
    filtered_df = df[df["similarity"] > 0.5]  # Filter rows with high similarity
    return filtered_df

# **4. Summarize by SWOT Categories**
def summarize_by_category(df):
    """
    Summarize content grouped by SWOT categories.
    """
    summaries = {}
    for category in ["Strengths", "Weaknesses", "Opportunities", "Threats"]:
        category_rows = df[df["categories"].apply(lambda x: category in x)]
        combined_text = " ".join(category_rows["content"].tolist())
        summaries[category] = combined_text[:500] if combined_text else "No content available for this category."
    return summaries

# **5. Visualize Publication Distribution**
def visualize_category_distribution(df, output_path="static/publication_distribution.png"):
    """
    Create a bar chart showing the number of articles per publisher.
    """
    publication_counts = df["publication"].value_counts()
    plt.figure(figsize=(10, 6))
    publication_counts.plot(kind="bar", color="skyblue")
    plt.title("Number of Articles by Publisher")
    plt.xlabel("Publisher")
    plt.ylabel("Number of Articles")
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.savefig(output_path)
    plt.close()

# **6. Flask Routes**
@app.route("/", methods=["GET", "POST"])
def index():
    summaries = None
    chart_path = None
    if request.method == "POST":
        keyword = request.form["keyword"]
        
        # Filter the dataset
        filtered_df = filter_by_keyword(df, keyword)

        # Summarize SWOT categories
        summaries = summarize_by_category(filtered_df)

        # Generate the publication distribution chart
        visualize_category_distribution(filtered_df)

        chart_path = "static/publication_distribution.png"

    return render_template("index.html", summaries=summaries, chart_path=chart_path)

# **7. Run Flask App**
if __name__ == "__main__":
    app.run(debug=True)

Dask installed and ready to use.
Using device: cuda
