In [None]:
from flask import Flask, render_template, request
import pandas as pd

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import Pipeline

app = Flask(__name__)

# --------------------
# Training Data
# --------------------
train_data = {
    "comment": [
        "Win free money now",
        "Nice video very helpful",
        "Click here to earn fast",
        "Great explanation",
        "Subscribe for free followers",
        "Thanks for this content",
        "Earn money from home",
        "Very informative post"
    ],
    "label": [1, 0, 1, 0, 1, 0, 1, 0]
}

train_df = pd.DataFrame(train_data)

# --------------------
# ML Pipeline
# --------------------
model = Pipeline([
    ("vectorizer", TfidfVectorizer(
        stop_words="english",
        lowercase=True
    )),
    ("classifier", MultinomialNB())
])

model.fit(train_df["comment"], train_df["label"])

# --------------------
# Helper: Auto-detect comment column
# --------------------
def find_comment_column(df):
    for col in ["comment_text", "comment", "content", "text"]:
        if col in df.columns:
            return col
    return None   # safer fallback

# --------------------
# Routes
# --------------------
@app.route("/", methods=["GET", "POST"])
def index():
    table = None
    info = None

    if request.method == "POST":

        if "file" not in request.files:
            info = "No file uploaded"
            return render_template("index.html", table=table, info=info)

        file = request.files["file"]

        if file.filename == "":
            info = "No file selected"
            return render_template("index.html", table=table, info=info)

        # ‚úÖ SAFE CSV READ (handles encoding issues)
        try:
            df = pd.read_csv(file, encoding="utf-8")
        except UnicodeDecodeError:
            df = pd.read_csv(file, encoding="latin1")

        # ‚ùå Empty file check
        if df.empty:
            info = "Uploaded CSV file is empty"
            return render_template("index.html", table=table, info=info)

        # Auto-detect comment column
        col = find_comment_column(df)

        if col is None:
            info = "No valid comment column found in CSV"
            return render_template("index.html", table=table, info=info)

        # Handle missing values safely
        comments = df[col].fillna("").astype(str)

        # Predict
        predictions = model.predict(comments)

        df["Prediction"] = [
            "Spam üö´" if p == 1 else "Not Spam ‚úÖ"
            for p in predictions
        ]

        table = df.head(50).to_html(index=False)
        info = f"Detected using column: '{col}' (showing first 50 rows)"

    return render_template("index.html", table=table, info=info)

# --------------------
# App Run (Safe for Jupyter & Windows)
# --------------------
if __name__ == "__main__":
    app.run(debug=True, use_reloader=False)



 * Serving Flask app '__main__'
 * Debug mode: on


 * Running on http://127.0.0.1:5000
Press CTRL+C to quit
127.0.0.1 - - [17/Feb/2026 22:01:27] "GET / HTTP/1.1" 200 -
127.0.0.1 - - [17/Feb/2026 22:01:28] "GET /favicon.ico HTTP/1.1" 404 -
