In [5]:
# Install necessary packages
!pip install langchain together sqlalchemy langchain_community langchain_experimental transformers langchain-core llamacpp


Defaulting to user installation because normal site-packages is not writeable
Collecting llamacpp
  Downloading llamacpp-0.1.14.tar.gz (1.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.9/1.9 MB[0m [31m7.9 MB/s[0m eta [36m0:00:00[0m:00:01[0m0:01[0mm
[?25h  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone
Building wheels for collected packages: llamacpp
  Building wheel for llamacpp (pyproject.toml) ... [?25ldone
[?25h  Created wheel for llamacpp: filename=llamacpp-0.1.14-cp312-cp312-linux_x86_64.whl size=238956 sha256=f8c7a88e4915b59ae1fee96095c3d97553e1c7c72c14ed6ec65b2005a7158bca
  Stored in directory: /home/ucloud/.cache/pip/wheels/c1/48/87/073511ac7a086d0953f73a70c9662222b4ac85fa8c3e2fa61f
Successfully built llamacpp
Installing collected packages: llamacpp
Successfully installed llamacpp-0.1.14


In [9]:
import os
from langchain import SQLDatabase
from langchain_experimental.sql import SQLDatabaseSequentialChain, SQLDatabaseChain
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain_core.runnables.base import RunnableLambda
from pydantic import BaseModel
from typing import List
from huggingface_hub import login
from langchain_community.llms import LlamaCpp

In [10]:
# Define the SQLite database connection string
sqlite_uri = "sqlite:////work/MLops/news_database.db"
db = SQLDatabase.from_uri(sqlite_uri)

In [12]:
import os
import requests
import sqlite3
import pickle

# Define the setup function to load SVM model and TF-IDF vectorizer
def setup():
    # Load the SVM model
    with open("/work/MLops/Models/svm_classifier.pkl", "rb") as f:
        svm_classifier = pickle.load(f)

    # Load the TF-IDF vectorizer
    with open("/work/MLops/Models/tfidf_vectorizer.pkl", "rb") as f:
        vectorizer = pickle.load(f)

    # Connect to SQLite database
    conn = sqlite3.connect("news_database.db")
    cursor = conn.cursor()

    return svm_classifier, vectorizer, cursor

# Call the setup function to get SVM classifier, TF-IDF vectorizer, and cursor
svm_classifier, vectorizer, cursor = setup()

# Function to predict sentiment for a given title using the SVM model
def predict_sentiment_svm(title):
    # Vectorize the title
    title_vectorized = vectorizer.transform([title])

    # Make prediction using SVM classifier
    prediction = svm_classifier.predict(title_vectorized)

    # Return the predicted sentiment (capitalized)
    return prediction[0].capitalize()

# Function to predict sentiment, political leaning, and bias for a given title using the Together model
def predict_sentiment_together(title):
    # Define the prompt for the Together API
    prompt = f"""\
    Label the news headline as either 'Positive', 'Negative', or 'Neutral', and indicate if the sentence is biased and what the political leaning is (Liberal, Conservative, Neutral):

    Headline: {title}
    Label:"""

    # Make request to Together API
    endpoint = 'https://api.together.xyz/inference'
    TOGETHER_API_KEY = os.getenv('TOGETHER_API_KEY')

    res = requests.post(endpoint, json={
        "model": 'meta-llama/Llama-3-70b-chat-hf',
        "prompt": prompt,
        "top_p": 1,
        "top_k": 40,
        "temperature": 0.8,
        "max_tokens": 50,  # Increased to handle longer responses
        "repetition_penalty": 1,
    }, headers={
        "Authorization": f"Bearer {TOGETHER_API_KEY}",
        "User-Agent": "<YOUR_APP_NAME>"
    })

    # Extract sentiment, bias, and political leaning from response
    response_choices = res.json()['output']['choices']
    for choice in response_choices:
        text = choice['text'].strip()
        if text.startswith("Positive"):
            sentiment = "Positive"
        elif text.startswith("Negative"):
            sentiment = "Negative"
        elif text.startswith("Neutral"):
            sentiment = "Neutral"
        else:
            sentiment = "Unknown"

        if "Biased" in text:
            bias = "Biased"
        else:
            bias = "Not Biased"

        if "Liberal" in text:
            political_leaning = "Liberal"
        elif "Conservative" in text:
            political_leaning = "Conservative"
        elif "Neutral" in text:
            political_leaning = "Neutral"
        else:
            political_leaning = "Unknown"

        if sentiment != "Unknown" and bias != "Unknown" and political_leaning != "Unknown":
            break  # Exit loop if all values are found

    return sentiment, bias, political_leaning

# Function to fetch the 10 newest articles and predict sentiment using both models
def predict_sentiment_for_newest_articles(max_articles=3):
    # List to store sentiment predictions
    all_articles_sentiments = []

    # Fetch the newest articles from the database
    cursor.execute("SELECT title FROM news_items ORDER BY published_at DESC LIMIT ?", (max_articles,))
    articles = cursor.fetchall()

    # Iterate over each article and predict sentiment using both models
    for article in articles:
        title = article[0]
        sentiment_svm = predict_sentiment_svm(title)
        sentiment_together, bias, political_leaning = predict_sentiment_together(title)
        all_articles_sentiments.append((title, sentiment_svm, sentiment_together, bias, political_leaning))

    return all_articles_sentiments

# Predict sentiment for the newest articles using both models
articles_sentiments = predict_sentiment_for_newest_articles()

# Print sentiment predictions
for title, sentiment_svm, sentiment_together, bias, political_leaning in articles_sentiments:
    print("Title:", title)
    print("Sentiment (SVM):", sentiment_svm)
    print("Sentiment (Together):", sentiment_together)
    print("Bias (Together):", bias)
    print("Political Leaning (Together):", political_leaning)
    print()


FileNotFoundError: [Errno 2] No such file or directory: 'svm_classifier.pkl'