# EXAMPLE OF LLM FOR BIAS COMPARISON AND SUMMARY GENERATION
## TO BE CONVERTED TO API USING FLASK

In [1]:
import os
import sys
import pandas as pd
from dotenv import load_dotenv

from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.llms.openai import OpenAI
from llama_index.core import (
    Settings,
    Document,
    VectorStoreIndex,
    SummaryIndex,
)

import tensorflow as tf
from tensorflow.keras import preprocessing
from keras.preprocessing.sequence import pad_sequences

sys.modules['keras.src.preprocessing'] = preprocessing

import pickle

import nest_asyncio

nest_asyncio.apply()

In [2]:
df = pd.read_parquet('../data/sameExample_embedded_clustered.parquet.gzip')

In [3]:

load_dotenv('var.env')
os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY')

Settings.llm = OpenAI(temperature=0.5, model="chatgpt-4o-latest")
Settings.embed_model = OpenAIEmbedding(model="text-embedding-ada-002")

llm = OpenAI(
    model="chatgpt-4o-latest"
)

In [4]:
# Bias Model
path = '../model/bias/'
with open(path+'bias_tokenizer.pkl', 'rb') as f:
    tokenizer = pickle.load(f)

interpreter = tf.lite.Interpreter(model_path=path+'bias_detection_lstm.tflite')
interpreter.allocate_tensors()

input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()

def predict_bias(news_text):

    # Tokenisasi dan padding teks baru
    new_sequences = tokenizer.texts_to_sequences(news_text)
    max_len = 30  # Pastikan panjang maksimum sesuai dengan yang digunakan saat melatih model
    new_padded = pad_sequences(new_sequences, maxlen=max_len)

    # Konversi data input menjadi tipe float32
    new_padded = new_padded.astype('float32')

    # Mengatur tensor input dengan data yang dipadatkan
    interpreter.set_tensor(input_details[0]['index'], new_padded)

    # Menjalankan interpreter untuk membuat prediksi
    interpreter.invoke()

    # Mendapatkan hasil prediksi dari tensor output
    predictions_tflite = interpreter.get_tensor(output_details[0]['index'])

    # Menginterpretasikan hasil prediksi
    predicted_labels_tflite = [1 if pred > 0.5 else 0 for pred in predictions_tflite]

    return predicted_labels_tflite[0]


In [5]:
# Predict bias
df['bias'] = df['content'].apply(lambda x: predict_bias([x]))

In [6]:
# Hoax Model
path = '../model/hoax/'
with open(path+'tokenizer_A3.pkl', 'rb') as f:
    tokenizer = pickle.load(f)

interpreter = tf.lite.Interpreter(model_path=path+'hoax_detection_A3.tflite')
interpreter.allocate_tensors()

input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()

def predict_hoax(news_text):
    new_sequences = tokenizer.texts_to_sequences(news_text)
    max_len = 100
    new_padded = pad_sequences(new_sequences, maxlen=max_len)

    new_padded = new_padded.astype('float32')

    interpreter.set_tensor(input_details[0]['index'], new_padded)

    interpreter.invoke()

    predictions_tflite = interpreter.get_tensor(output_details[0]['index'])

    predicted_labels_tflite = [1 if pred > 0.5 else 0 for pred in predictions_tflite]

    return predicted_labels_tflite[0]

In [7]:
# Predict hoax
df['hoax'] = df['content'].apply(lambda x: predict_hoax([x]))

In [8]:
# Liberism_Conservative Model
path = '../model/liberalism_conservative/'
with open(path+'tokenizer_liberalism_conservative_A.pkl', 'rb') as f:
    tokenizer = pickle.load(f)

interpreter = tf.lite.Interpreter(model_path=path+'liberalism_conservative_A.tflite')
interpreter.allocate_tensors()

input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()

def predict_liberalism_conservative(news_text):
    new_sequences = tokenizer.texts_to_sequences(news_text)
    max_len = 100
    new_padded = pad_sequences(new_sequences, maxlen=max_len)

    new_padded = new_padded.astype('float32')

    interpreter.set_tensor(input_details[0]['index'], new_padded)

    interpreter.invoke()

    predictions_tflite = interpreter.get_tensor(output_details[0]['index'])

    predicted_labels_tflite = [1 if pred > 0.5 else 0 for pred in predictions_tflite]

    return predicted_labels_tflite[0]

In [9]:
#  Predict liberalism_conservative
df['liberalism_conservative'] = df['content'].apply(lambda x: predict_liberalism_conservative([x]))

In [10]:
df

Unnamed: 0,source,title,content,embedding,cluster,bias,hoax,liberalism_conservative
0,detik,"Polisi Tembak Polisi di Rumah Pejabat Polri, B...",Jakarta - Seorang polisi Brigadir J tewas dite...,"[-0.00010740216384874657, -0.00601372215896844...",2,0,1,1
1,cnnindonesia,Kronologi Polisi Tembak Polisi hingga Tewas di...,"Jakarta, CNN Indonesia -- Mabes Polri mengungk...","[-0.009914712980389595, -0.006208854261785746,...",2,1,1,1
2,ajnn,"Polisi Tembak Polisi, Brigadir J Tewas di Ruma...",JAKARTA - Polisi Brigadir J tewas akibat temba...,"[-0.006993312854319811, -0.008073685690760612,...",2,1,1,1
3,Suratpemred,Dor! Polisi Tembak Polisi di Rumah Dinas Pejab...,"JAKARTA, SP – Peristiwa penembakan sesama angg...","[0.0023585606832057238, -0.0064333160407841206...",2,0,0,1
4,Indozone,Fakta-fakta Polisi Adu Tembak di Rumah Petingg...,Peristiwa adu tembak antara anggota kepolisian...,"[-0.0008913218625821173, -0.001862582983449101...",2,0,1,1
5,TribuJabar,Polri Ungkap Penyebab 2 Polisi Baku Tembak di ...,"TRIBUNJABAR.ID, JAKARTA - Penyebab meninggalny...","[0.0053572701290249825, -0.009876349940896034,...",2,1,1,1
6,Detiknews,Polri Jelaskan Alasan Kasus Penembakan Brigadi...,Jakarta - Polri mengungkapkan alasan mengapa k...,"[-0.003241070080548525, -0.009468814358115196,...",2,1,1,1
7,Sindonews,"Brigadir Polisi Ditembak Bharada, Ini Penjelas...",JAKARTA - Kepala Biro Penerangan Masyarakat (K...,"[0.0016425038920715451, 0.0018388287862762809,...",2,1,1,1
8,Jpnn,Brigadir J Masuk Kamar dan Melecehkan Istri Ir...,"jpnn.com, JAKARTA SELATAN - Karopenmas Divhuma...","[-0.01566256396472454, -0.011813515797257423, ...",2,1,1,1
9,Cnnindonesia,Brigadir J Merupakan Sopir Istri Kadiv Propam ...,"Sementara, penembak Brigadir J, Bharada E adal...","[-0.018271414563059807, -0.016924822703003883,...",2,1,1,1


In [11]:
def create_documents(df):
    documents = []
    for index, row in df.iterrows():
        document = Document(
            text=row['content'],
            doc_id=str(index),
            metadata={
            'title': row['title'],
            'bias': 'biased' if row['bias'] == 1 else 'not biased',
            'hoax': 'hoax' if row['hoax'] == 1 else 'not hoax',
            'liberalism_conservative': 'liberalism' if row['liberalism_conservative'] == 1 else 'conservative'
            },
            embedding=row['embedding']
        )
        documents.append(document)
    return documents

documents = create_documents(df)

In [12]:
def createSummary(documents):
    summarizeQuery = """
    Create a short, detailed, and factual summary of the articles.
    For more context for you, chatgpt, information regarding: 
    it's bias (0: not biased/neutral, 1: biased), 
    hoax (0: is factual, 1: has hoax), 
    and whether it's ideology (liberal or conservative)
    which are all detected using machine learning models are also included.
    Do NOT discuss about the article's hoax, bias, or political view. 
    Do NOT rely on previous knowledge. 
    Use Indonesian language. 
    """
    summary_index = SummaryIndex.from_documents(documents, use_async=True)
    summary_query_engine = summary_index.as_query_engine(llm=Settings.llm)

    summary = summary_query_engine.query(summarizeQuery)
    return summary

In [13]:
print(createSummary(documents))

Insiden penembakan terjadi di rumah dinas Kadiv Propam Polri Irjen Ferdy Sambo pada Jumat, 8 Juli 2022, pukul 17.00 WIB, yang melibatkan dua anggota polisi, Brigadir J dan Bharada E. Brigadir J tewas dalam kejadian tersebut setelah baku tembak dengan Bharada E. Kronologi yang diungkapkan menyebutkan bahwa Brigadir J diduga memasuki kamar pribadi istri Irjen Ferdy Sambo, melakukan pelecehan, dan menodongkan pistol. Istri Irjen Ferdy berteriak minta tolong, yang kemudian memicu respons Bharada E dari lantai atas. Ketika Bharada E menanyakan apa yang terjadi, Brigadir J diduga menembak lebih dahulu, sehingga Bharada E membalas tembakan. 

Dalam kejadian ini, Brigadir J mengalami luka tembak dan dinyatakan meninggal dunia. Jenazahnya telah dipulangkan ke Jambi untuk dimakamkan. Bharada E telah diamankan untuk proses lebih lanjut. Kasus ini tengah diselidiki oleh Propam Mabes Polri dan Polres Metro Jakarta Selatan. Pihak keluarga Brigadir J menyatakan adanya kejanggalan, seperti luka sayata

In [14]:
Settings.llm = OpenAI(model='gpt-4o')

In [15]:
def queryEngineMaker(df):
    documents = create_documents(df)
    query_engine = VectorStoreIndex.from_documents(documents).as_query_engine(llm=Settings.llm)
    return query_engine

In [19]:
def create_analysis(query_engine):
    compareQuery = """
    I have a collection of articles classified as either liberal or conservative. These articles all discuss the same event but from differing perspectives.
    Your task is to analyze a given query and generate a response summarizing how articles from each perspective address the topic. 
    
    Ensure the response follows this structure:
    Liberal: [Summarize key points using the language and tone of liberal articles. If no liberal perspective exists, only explain that there are no liberal perspectives.]
    Conservative: [Summarize key points using the language and tone of conservative articles. If no conservative perspective exists, only explain that there are no conservative perspectives.]
    
    Follow these guidelines:
    Derive all information directly from the provided articles—do not rely on prior knowledge or external context.
    Keep summaries concise, factual, and reflective of the language and tone used in the articles. Avoid being too wordy.
    Highlight notable phrases or specific terminology unique to each perspective to showcase differences in framing or emphasis.
    Only include sections for perspectives present in the articles. If only one perspective is available, summarize that side alone.
    Format responses as concise paragraphs. Do not include editorial commentary, personal interpretation, or merge perspectives into one.
    Use Indonesian language.
    """
    response = query_engine.query(compareQuery)
    return response.response

In [20]:
response = create_analysis(queryEngineMaker(df))

In [21]:
print(response)

Liberal: Artikel dengan perspektif liberal menyoroti insiden penembakan antar polisi di rumah dinas pejabat Polri. Ditekankan bahwa peristiwa ini melibatkan Brigadir J yang tewas setelah ditembak oleh Bharada E. Penjelasan dari pihak kepolisian menyebutkan bahwa Brigadir J mengacungkan senjata api terlebih dahulu, yang kemudian dibalas oleh Bharada E. Kasus ini sedang dalam penyelidikan lebih lanjut oleh Propam Mabes Polri dan Polres Jakarta Selatan untuk mengungkap motif dan memastikan adanya unsur pidana.

Conservative: Tidak ada perspektif konservatif yang tersedia dalam artikel yang diberikan.
