In [None]:
import pandas as pd
import numpy as np
import joblib
import time

from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer

from src.preprocessing import preprocess
from typing import List, Tuple

In [None]:
MODEL_PATHS = {
    "Logistic Regression": "../models/logreg_model.pkl",
    "SVM": "../models/svm_model.pkl"
}

VECTORIZER_PATH = "../models/tfidf_vectorizer.pkl"
ENCODER_PATH = "../models/label_encoder.pkl"

vectorizer = joblib.load(VECTORIZER_PATH)
encoder = joblib.load(ENCODER_PATH)

def load_model(path: str):
    return joblib.load(path)

In [None]:
sample_tweets = [
    "I absolutely loved the user interface and overall performance!",
    "Worst experience ever. Completely disappointed with the app.",
    "Not sure how I feel about it yet.",
    "The new update made things worse than before.",
    "Fantastic service! Everything works perfectly.",
    "It crashes too much. Unusable.",
    "Could be better, but decent overall.",
    "I don't like the new design, it's confusing.",
    "Great job on the latest features, very useful!",
    "I hate the new changes, they ruined everything."
]

In [None]:
def time_inference(model, tweets: List[str]) -> Tuple[float, float]:
    cleaned = [preprocess(tweet) for tweet in tweets]
    X = vectorizer.transform(cleaned)

    start_single = time.perf_counter()
    for x in X:
        _ = model.predict(x)
    end_single = time.perf_counter()

    start_batch = time.perf_counter()
    _ = model.predict(X)
    end_batch = time.perf_counter()

    avg_single = (end_single - start_single) / len(tweets)
    batch_time = end_batch - start_batch

    return avg_single, batch_time

In [None]:
results = []

for name, path in MODEL_PATHS.items():
    model = load_model(path)
    avg_time, batch_time = time_inference(model, sample_tweets)
    results.append({
        "Model": name,
        "Avg Time Per Tweet (s)": round(avg_time, 6),
        "Batch Time (s)": round(batch_time, 6)
    })

df_results = pd.DataFrame(results)
df_results

In [None]:
sample_labels = [
    "positive",
    "negative",,
    "negative",
    "positive",
    "negative",
    "positive",
    "negative",
    "positive",
    "negative",
    "negative"
]

cleaned = [preprocess(t) for t in sample_tweets]

X_train, X_test, y_train, y_test = train_test_split(
    cleaned, sample_labels, test_size=0.3, random_state=42
)

live_pipeline = make_pipeline(
    TfidfVectorizer(),
    LogisticRegression(max_iter=500)
)

live_pipeline.fit(X_train, y_train)

In [None]:
def time_pipeline(model, tweets: List[str]) -> Tuple[float, float]:
    start_single = time.perf_counter()
    for tweet in tweets:
        _ = model.predict([tweet])
    end_single = time.perf_counter()

    start_batch = time.perf_counter()
    _ = model.predict(tweets)
    end_batch = time.perf_counter()

    avg_single = (end_single - start_single) / len(tweets)
    batch_time = end_batch - start_batch
    return avg_single, batch_time

live_avg, live_batch = time_pipeline(live_pipeline, X_test)

df_results.loc[len(df_results.index)] = {
    "Model": "Live Logistic Pipeline",
    "Avg Time Per Tweet (s)": round(live_avg, 6),
    "Batch Time (s)": round(live_batch, 6)
}

df_results

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

plt.figure(figsize=(8, 5))
sns.barplot(data=df_results, x="Model", y="Avg Time Per Tweet (s)")
plt.title("Average Inference Time per Tweet")
plt.ylabel("Time (seconds)")
plt.xticks(rotation=15)
plt.grid(True, axis='y', linestyle='--', alpha=0.7)
plt.tight_layout()
plt.show()