In [1]:
import requests
from transformers import pipeline
from app.utils.services import retrieve_article, update_article, store_processed_article

# Base URL of your storage service (adjust as needed)
STORAGE_SERVICE_URL = "http://localhost:8000"

class NLPProcessor:
    def __init__(self):
        # Load pipelines for each task
        self.summarizer = pipeline("summarization")
        self.sentiment_analyzer = pipeline("sentiment-analysis")
        # For classification, we use zero-shot-classification which allows dynamic candidate labels.
        self.classifier = pipeline("zero-shot-classification")
    
    def retrieve_article(self, article_id: str) -> dict:
        """
        Retrieves an article from the storage service via its API.
        Expects the article object to contain a 'content' field.
        """
        url = f"{STORAGE_SERVICE_URL}/articles/{article_id}"
        response = requests.get(url)
        if response.status_code != 200:
            raise Exception(f"Failed to retrieve article {article_id}: {response.text}")
        return response.json()
    
    def summarize(self, article_id: str) -> str:
        """
        Retrieves the article and returns its summary.
        Adjust max_length and min_length parameters as needed.
        """
        article = self.retrieve_article(article_id)
        text = article.get("content")
        if not text:
            raise Exception(f"Article {article_id} has no content to summarize.")
        summary = self.summarizer(text, max_length=130, min_length=30, do_sample=False)
        return summary[0]['summary_text']
    
    def analyze_sentiment(self, article_id: str) -> dict:
        """
        Retrieves the article and performs sentiment analysis.
        Returns a dictionary with the sentiment label and score.
        """
        article = self.retrieve_article(article_id)
        text = article.get("content")
        if not text:
            raise Exception(f"Article {article_id} has no content for sentiment analysis.")
        sentiment = self.sentiment_analyzer(text)
        return sentiment[0]
    
    def classify(self, article_id: str, candidate_labels: list = None) -> dict:
        """
        Retrieves the article and classifies it into one of the candidate labels.
        If no candidate_labels are provided, a default set is used.
        """
        if candidate_labels is None:
            candidate_labels = ["economics", "sports", "entertainment", "politics", "technology", "culture", ""]
        article = self.retrieve_article(article_id)
        text = article.get("content")
        if not text:
            raise Exception(f"Article {article_id} has no content to classify.")
        classification = self.classifier(text, candidate_labels)
        return classification

  from .autonotebook import tqdm as notebook_tqdm


ModuleNotFoundError: No module named 'app'

In [None]:
nlp_processor = NLPProcessor()
test_article_id = "your-article-uuid-here"
try:
    summary = nlp_processor.summarize(test_article_id)
    print("Summary:", summary)
    
    sentiment = nlp_processor.analyze_sentiment(test_article_id)
    print("Sentiment:", sentiment)
    
    classification = nlp_processor.classify(test_article_id)
    print("Classification:", classification)
except Exception as e:
    print("Error:", str(e))