In [1]:
import os,sys

current_path = os.path.abspath('')
sys.path.append(os.path.join(current_path,'djangoapp','news','scripts'))
sys.path.append(os.path.join(current_path,'djangoapp','news','vertex'))


In [5]:
import pickle
from django.conf import settings
from transformers import pipeline
from nlp import NLP
from llm import LocalLLM
from cloud.vertex_connection import VertexAI

from nltk.corpus import wordnet


In [6]:
class ModelLoader():
    def __init__(self):

        # load NLP model
        self.nlp = NLP() 

        # wordnet is lazy loaded, this poses a problem when using multiprocessing
        wordnet.ensure_loaded()


        # load LocalLLM
        self.llm = LocalLLM()

        # load summarizer
        self.summarizer = pipeline("summarization", model="Falconsai/text_summarization")
        
        # load VertexAI
        self.vertex = VertexAI()
        


In [7]:
model_loader = ModelLoader()

In [9]:

def classify_NLP(title, nlp):
    clickbait_decision_NLP_proba = nlp.predict_on_text(title)
    clickbait_decision_NLP_proba = clickbait_decision_NLP_proba[0][1]
    return int(clickbait_decision_NLP_proba > nlp.proba_cutoff)

def classify_LLM(title, llm):
    proba_cutoff = 0.5
    probability = llm.predict(title)
    result = int(probability > proba_cutoff)
    return result
    

def classify_VERTEX(title, vertex, summary=None):
    try:
        if summary:
            clickbait_decision_VERTEX = vertex.run(title=title, summary=summary)
        else:
            clickbait_decision_VERTEX = vertex.run(title=title)
        return int(clickbait_decision_VERTEX)
    except:
        return -1

In [10]:
title = ' rrruasfajlsfhd'
content = 'asdfsadfsadasdfasfdlkjasfdljkasfdjlkasfdjlkasfdjlkasfdjlk a sadklfj slkadjflksad lksdjf lkasjf lk sadlkasdj lfk'

# Access the loaded models
nlp = model_loader.nlp
llm = model_loader.llm
summarizer = model_loader.summarizer
vertex = VertexAI()


content_summary = summarizer(content, max_length=200, min_length=40, do_sample=False)[0]["summary_text"]
clickbait_decision_NLP = classify_NLP(title, nlp)
clickbait_decision_LLM = classify_LLM(title, llm)
clickbait_decision_VERTEX = classify_VERTEX(title, vertex)

Your max_length is set to 200, but your input_length is only 97. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=48)


Prediction: 1 for prompt: Is this title a clickbait: ' rrruasfajlsfhd'? Return 1 if yes, 0 if no.
Return value: True


In [14]:
clickbait_decision_VERTEX

1