<a href="https://colab.research.google.com/github/Valmik2004/Day-Wise-Tasks-Internship-Springboard-Internship/blob/main/Day_10_Tasks.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import files
uploaded = files.upload()

Saving fake_job_postings.csv to fake_job_postings.csv


In [None]:
import pandas as pd, re, joblib
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression

# Load dataset
df = pd.read_csv('fake_job_postings.csv')

# Ensure clean_description exists
def basic_clean(text):
    text = str(text).lower()
    text = re.sub(r'<.*?>', ' ', text)
    text = re.sub(r'[^a-z\s]', ' ', text)
    text = re.sub(r'\s+', ' ', text).strip()
    return text

if 'clean_description' not in df.columns:
    df['clean_description'] = df['description'].fillna('').apply(basic_clean)

# Drop empties
df = df.dropna(subset=['clean_description'])
df = df[df['clean_description'].str.strip()!='']

# Vectorize + train
vectorizer = TfidfVectorizer(max_features=3000)
X = vectorizer.fit_transform(df['clean_description'])
y = df['fraudulent']

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

model = LogisticRegression(max_iter=200, random_state=42)
model.fit(X_train, y_train)

# Save pkl files for app
joblib.dump(model, 'fake_job_model.pkl')
joblib.dump(vectorizer, 'tfidf_vectorizer.pkl')
print("Saved: fake_job_model.pkl, tfidf_vectorizer.pkl")


Saved: fake_job_model.pkl, tfidf_vectorizer.pkl


In [None]:
%%writefile app.py
from flask import Flask, render_template, request
import joblib

app = Flask(__name__)

# Load model and vectorizer
model = joblib.load('fake_job_model.pkl')
vectorizer = joblib.load('tfidf_vectorizer.pkl')

# Global counters for current session
fake_count = 0
real_count = 0

@app.route('/')
def home():
    return render_template('index.html', fake=fake_count, real=real_count)

@app.route('/predict', methods=['POST'])
def predict():
    global fake_count, real_count
    job_desc = request.form.get('job_description', '').strip()

    # Basic error handling
    if not job_desc or len(job_desc.split()) < 5:
        return render_template(
            'index.html',
            error="Please enter a detailed job description (at least 5 words).",
            fake=fake_count,
            real=real_count
        )

    # Predict
    X_input = vectorizer.transform([job_desc])
    pred = model.predict(X_input)[0]
    prob = model.predict_proba(X_input)[0][1]

    label = "Fake Job" if pred == 1 else "Real Job"
    confidence = round(prob * 100, 2) if pred == 1 else round((1 - prob) * 100, 2)

    # Update counters
    if pred == 1:
        fake_count += 1
    else:
        real_count += 1

    return render_template(
        'result.html',
        label=label,
        confidence=confidence,
        description=job_desc,
        fake=fake_count,
        real=real_count
    )

if __name__ == '__main__':
    app.run()


Writing app.py


In [None]:
!mkdir -p templates


In [None]:
%%writefile templates/index.html
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>Fake Job Detection</title>
<style>
    body { font-family: 'Segoe UI'; background: #f3f5f9; text-align: center; padding: 50px; }
    form { background: white; padding: 30px; border-radius: 12px; box-shadow: 0 0 10px rgba(0,0,0,0.1); width: 50%; margin: auto; }
    textarea { width: 90%; height: 120px; padding: 10px; border: 1px solid #ccc; border-radius: 8px; }
    button { padding: 10px 20px; border: none; border-radius: 8px; background-color: #28a745; color: white; cursor: pointer; }
    button:hover { background-color: #218838; }
    .error { color: red; margin-top: 10px; }
    .stats { margin-top: 30px; background: #fff; padding: 15px; border-radius: 10px; width: 50%; margin: auto; }
    .fake { color: red; font-weight: bold; }
    .real { color: green; font-weight: bold; }
</style>
</head>
<body>
<h2>Fake Job Detection System</h2>
<form action="/predict" method="POST">
<label><b>Enter Job Description:</b></label><br><br>
<textarea name="job_description" placeholder="Paste job post here..."></textarea><br><br>
<button type="submit">Check Authenticity</button>
</form>

{% if error %}
<p class="error">{{ error }}</p>
{% endif %}

<div class="stats">
<h3>Prediction Summary</h3>
<p>ðŸ§© Fake Jobs Detected: <span class="fake">{{ fake }}</span></p>
<p>âœ… Real Jobs Detected: <span class="real">{{ real }}</span></p>
</div>
</body>
</html>


Writing templates/index.html


In [None]:
%%writefile templates/result.html
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>Fake Job Detection Result</title>
<style>
    body { font-family: 'Segoe UI'; background: #f3f5f9; text-align: center; padding: 50px; }
    .card { background: white; padding: 30px; border-radius: 12px; box-shadow: 0 0 10px rgba(0,0,0,0.1); width: 50%; margin: auto; }
    h2 { color: #333; }
    .fake { color: red; font-weight: bold; }
    .real { color: green; font-weight: bold; }
    progress { width: 80%; height: 20px; margin-top: 10px; }
    a { display: inline-block; margin-top: 20px; text-decoration: none; color: #007bff; }
    a:hover { text-decoration: underline; }
    .stats { margin-top: 30px; background: #fff; padding: 15px; border-radius: 10px; width: 80%; margin: auto; }
</style>
</head>
<body>
<div class="card">
    <h2>Prediction Result</h2>
    <p><b>Job Description:</b> {{ description }}</p>

    <h3>Prediction:
        {% if label == 'Fake Job' %}
        <span class="fake">{{ label }}</span>
        {% else %}
        <span class="real">{{ label }}</span>
        {% endif %}
    </h3>

    <p>Confidence: {{ confidence }}%</p>
    <progress value="{{ confidence }}" max="100"></progress>

    <a href="/">ðŸ”™ Go Back</a>

    <div class="stats">
        <h3>Prediction Summary</h3>
        <p>ðŸ§© Fake Jobs Detected: <span class="fake">{{ fake }}</span></p>
        <p>âœ… Real Jobs Detected: <span class="real">{{ real }}</span></p>
    </div>
</div>
</body>
</html>


Writing templates/result.html


In [None]:
!pip install pyngrok flask joblib --quiet
from pyngrok import ngrok


In [None]:
# If you restart runtime, set token again
ngrok.set_auth_token("354A7wIzV8oo6I3t1xYLDyPtsBH_4UifAjbTkMFA1pG6hLhgT")

public_url = ngrok.connect(5000)
print("Public URL:", public_url)

!python app.py

Public URL: NgrokTunnel: "https://unbalanced-galen-holdable.ngrok-free.dev" -> "http://localhost:5000"
 * Serving Flask app 'app'
 * Debug mode: off
 * Running on http://127.0.0.1:5000
[33mPress CTRL+C to quit[0m
127.0.0.1 - - [12/Nov/2025 03:47:08] "GET / HTTP/1.1" 200 -
127.0.0.1 - - [12/Nov/2025 03:47:10] "[33mGET /favicon.ico HTTP/1.1[0m" 404 -
127.0.0.1 - - [12/Nov/2025 03:47:29] "POST /predict HTTP/1.1" 200 -
127.0.0.1 - - [12/Nov/2025 03:55:08] "GET / HTTP/1.1" 200 -
127.0.0.1 - - [12/Nov/2025 03:57:15] "POST /predict HTTP/1.1" 200 -
