# Extract the Data

In [1]:
import psycopg2
import re
import requests
from ollama import Client
import data_connector as dc

# Processing jobs for each platform
# 10 = tiktok
# 20 = youtube
# 30 = instagram_post
# 40 = facebook_post
# 50 = google_result
# 60 = twitter_tweets

# Step 1: Extract Data
def fetch_posts(social_media_monitoring_id, platform_id):
    conn = psycopg2.connect(
        dbname='medols',
        user='postgres',
        password='FEWcTB3JIX5gK4T06c1MdkM9N2S8w9pb',
        host='98.98.117.105',
        port='5432')

    if (platform_id == 60):
        ## twitter
        query = """
        select tweet as post_content 
        from twitter_tweets 
        where monitoring_id = '%s'
        """ % social_media_monitoring_id
    elif (platform_id == 40):
        ## facebook post
        query = """
        select  description as post_content
        from 	facebook_post 
        where monitoring_id = '%s'
        """ % social_media_monitoring_id
    elif (platform_id == 30):
        ## instagram post
        query = """
        select 	content as post_content
        from 	instagram_post 
        where monitoring_id = '%s'
        """ % social_media_monitoring_id
    elif (platform_id == 20):
        ## youtube post
        query = """
        select 	description as post_content
        from 	youtube     
        where monitoring_id = '%s'
        """ % social_media_monitoring_id
    elif (platform_id == 10):
        ## tiktok post
        query = """
        select 	"desc" as post_content
        from 	tiktok        
        where monitoring_id = '%s'
        """ % social_media_monitoring_id

    
    cursor = conn.cursor()
    cursor.execute(query)
    posts = cursor.fetchall()
    cursor.close()
    conn.close()
    return [post[0] for post in posts]


# Pre Process the Data

In [2]:
# Step 2: Preprocess Data
def preprocess_text(text):
    # Remove URLs
    text = re.sub(r'http\S+|www\S+|https\S+', '', text, flags=re.MULTILINE)
    # Remove user mentions
    text = re.sub(r'@\w+', '', text)
    # Remove hashtags
    text = re.sub(r'#\w+', '', text)
    # Remove special characters and numbers
    text = re.sub(r'\W+|\d+', ' ', text)
    # Convert to lowercase
    text = text.lower()
    return text

# Step 4: Recording to database

In [3]:
def record_to_database(topics_output, social_media_monitoring_id, platform_id):

    # Establish connection
    conn = psycopg2.connect(
        dbname='medols',
        user='postgres',
        password='FEWcTB3JIX5gK4T06c1MdkM9N2S8w9pb',
        host='98.98.117.105',
        port='5432'
    )

    # Create a cursor
    cur = conn.cursor()
    
    # Text containing apostrophe and double quotes
    # text = 'It\'s a "beautiful" day.'
    
    #preparing all the data component
    model = topics_output['model']
    created_at = topics_output['created_at']
    total_duration = topics_output['total_duration']
    load_duration = topics_output['load_duration']
    prompt_eval_duration = topics_output['prompt_eval_duration']
    eval_count = topics_output['eval_count']
    eval_duration = topics_output['eval_duration']
    role = topics_output['message']['role']
    text_output = "$$" + topics_output['message']['content'] + "$$"

    ssql = """
    insert into topic_detection (social_media_monitoring_id, model, created_at ,total_duration, 
					 	load_duration, prompt_eval_duration, eval_count, eval_duration,
					 	llm_role, topic_detection, platform_id)
    values ('%s','%s','%s',%s ,%s ,%s ,%s ,%s, '%s', %s, %s)
    """
    ssql = ssql % (social_media_monitoring_id, model, created_at, total_duration, load_duration, prompt_eval_duration, eval_count, eval_duration, role, text_output, platform_id)
    #print(ssql)
    #dc.execute_query_psql(ssql)

    # Parameterized query
    cur.execute(ssql, (social_media_monitoring_id, model, created_at, total_duration, load_duration, prompt_eval_duration, eval_count, eval_duration, role, text_output, platform_id,))
    
    # Commit the transaction
    conn.commit()
    
    # Close the cursor and connection
    cur.close()
    conn.close()

def record_calculated(social_media_monitoring_id):
    ssql = "insert into topic_detection_rec (monitoring_search_id, process_time) values ('%s', NOW())"
    ssql = ssql % (social_media_monitoring_id)
    dc.execute_query_psql(ssql)
    

def pre_insert(social_media_monitoring_id, platform_id):
    ssql = "delete from topic_detection where social_media_monitoring_id = '%s' and platform_id = %s"
    ssql = ssql % (social_media_monitoring_id, platform_id)
    dc.execute_query_psql(ssql)

In [4]:
# Step 3: Query the Hosted LLM
def query_ollama_llm(platform_id,texts, num_topics=5):
# 10 = tiktok
# 20 = youtube
# 30 = instagram_post
# 40 = facebook_post
# 50 = google_result
# 60 = twitter_tweets
    
    client = Client(host='http://localhost:11434')
    print("PlatformID: %s" %str(platform_id))

    prompt_txt = """
Anda adalah seorang Ahli Analitik Media Sosial Lanjutan. Berdasarkan postingan %s di bawah ini, 
tentukan 5 topik utama yang mencakup keseluruhan. Sertakan tema-tema kunci, kata kunci, 
dan penjelasan singkat untuk setiap topik yang diidentifikasi, gunakanlah bahasa indonesia
yang baik dan benar dalam seluruh jawaban anda.
Di bawah ini adalah postingan %s:

{post1}
{post2}
{post3}
...
N. {postN}
    """
    if platform_id == 10:
        prompt_txt = prompt_txt % ('Tiktok','Tiktok')
    elif platform_id == 20:
        prompt_txt = prompt_txt % ('Youtube','Youtube')
    elif platform_id == 30:
        prompt_txt = prompt_txt % ('Instagram','Instagram')
    elif platform_id == 40:
        prompt_txt = prompt_txt % ('Facebook','Facebook')
    elif platform_id == 60:
        prompt_txt = prompt_txt % ('Twitter','Twitter')

    print(prompt_txt)
    prompt_txt = prompt_txt + '\n\n'
    prompt_txt = prompt_txt.join(texts)
    
    prompt_msg = [
      {
        'role': 'user',
        'content': prompt_txt,
      },
    ]
    # print(prompt_msg[:1000])
    
    response = client.chat(model='llama3', messages=prompt_msg)
    # return ""
    return response



# Executing LLM Query

In [5]:
import json
import textwrap
import logging

# Set up logging to capture errors
logging.basicConfig(level=logging.INFO)

social_media_monitoring_id = ""
platform_id = 0

# query for outstanding
ssql = """
select 	b.id as medsos_id,
		c.id as medol_id,
		b.is_twitter,
		b.is_tiktok,
		b.is_facebook,
		b.is_instagram,
		b.is_youtube
from 	monitoring_search a 
		left outer join monitoring_media_social b 
			on a.id = b.monitoring_search_id
		left outer join monitoring_media_online c
			on a.id = c.monitoring_search_id 
where 	b.id not in (select monitoring_search_id from topic_detection_rec)
		and a.monitoring_id in (select id 
								from monitoring
								where status = 3)
		and extract(year from a.created_date) = 2024
		and extract(month from a.created_date) >= 8
order by a.created_date desc 
limit 1
"""
df_source = dc.execute_query_psql(ssql)
df_source.head()

social_media_monitoring_id = df_source.iloc[0]['medsos_id']
online_media_monitoring_id = df_source.iloc[0]['medol_id']
print("Processing SosmedID %s" % (social_media_monitoring_id))
print("Processing MedolID %s" % (online_media_monitoring_id))

# Processing jobs for each platform
# 10 = tiktok
# 20 = youtube
# 30 = instagram_post
# 40 = facebook_post
# 50 = google_result
# 60 = twitter_tweets

if (df_source.iloc[0]['is_twitter']):
    print('process twitter')
    platform_id = 60
    
    try:
        # Fetch the posts
        posts = fetch_posts(social_media_monitoring_id, platform_id)
        if len(posts) > 0:
            # Preprocess the posts
            cleaned_posts = [preprocess_text(post) for post in posts]
            
            topics = query_ollama_llm(platform_id,cleaned_posts,5)
            
            # recording to database
            pre_insert(social_media_monitoring_id, platform_id)
            record_to_database(topics, social_media_monitoring_id, platform_id)
    except Exception as e:
        logging.error(f"Error fetching posts: {e}")
        posts = []
        
if (df_source.iloc[0]['is_tiktok']):
    print('process tiktok')
    platform_id = 10
    
    try:
        # Fetch the posts
        posts = fetch_posts(social_media_monitoring_id, platform_id)
        
        # Preprocess the posts
        cleaned_posts = [preprocess_text(post) for post in posts]
        
        topics = query_ollama_llm(platform_id, cleaned_posts,5)
        
        # recording to database
        pre_insert(social_media_monitoring_id, platform_id)
        record_to_database(topics, social_media_monitoring_id, platform_id)
    except Exception as e:
        logging.error(f"Error fetching posts: {e}")
        posts = []

if (df_source.iloc[0]['is_facebook']):
    print('process facebook')
    platform_id = 40
    
    try:
        # Fetch the posts
        posts = fetch_posts(social_media_monitoring_id, platform_id)
        
        # Preprocess the posts
        cleaned_posts = [preprocess_text(post) for post in posts]
        
        topics = query_ollama_llm(platform_id, cleaned_posts,5)
        
        # recording to database
        pre_insert(social_media_monitoring_id, platform_id)
        record_to_database(topics, social_media_monitoring_id, platform_id)
    except Exception as e:
        logging.error(f"Error fetching posts: {e}")
        posts = []

if (df_source.iloc[0]['is_instagram']):
    print('process instagram')
    platform_id = 30
    
    try:
        # Fetch the posts
        posts = fetch_posts(social_media_monitoring_id, platform_id)
        
        # Preprocess the posts
        cleaned_posts = [preprocess_text(post) for post in posts]
        
        topics = query_ollama_llm(platform_id, cleaned_posts,5)
        
        # recording to database
        pre_insert(social_media_monitoring_id, platform_id)
        record_to_database(topics, social_media_monitoring_id, platform_id)
    except Exception as e:
        logging.error(f"Error fetching posts: {e}")
        posts = []

if (df_source.iloc[0]['is_youtube']):
    print('process youtube')
    platform_id = 20
    
    try:
        # Fetch the posts
        posts = fetch_posts(social_media_monitoring_id, platform_id)
        
        # Preprocess the posts
        cleaned_posts = [preprocess_text(post) for post in posts]
        
        topics = query_ollama_llm(platform_id, cleaned_posts,5)
        
        # recording to database
        pre_insert(social_media_monitoring_id, platform_id)
        record_to_database(topics, social_media_monitoring_id, platform_id)
    except Exception as e:
        logging.error(f"Error fetching posts: {e}")
        posts = []



INFO:httpx:HTTP Request: POST http://localhost:11434/api/chat "HTTP/1.1 200 OK"


ERROR:root:Error fetching posts: 'total_duration'


Processing SosmedID cbe29a29-f969-4db5-9364-a1992488ff7c
Processing MedolID f37e3421-ea02-43da-a5fb-0fc79b7cca60
process twitter
process tiktok
PlatformID: 10

Anda adalah seorang Ahli Analitik Media Sosial Lanjutan. Berdasarkan postingan Tiktok di bawah ini, 
tentukan 5 topik utama yang mencakup keseluruhan. Sertakan tema-tema kunci, kata kunci, 
dan penjelasan singkat untuk setiap topik yang diidentifikasi, gunakanlah bahasa indonesia
yang baik dan benar dalam seluruh jawaban anda.
Di bawah ini adalah postingan Tiktok:

{post1}
{post2}
{post3}
...
N. {postN}
    
process facebook


INFO:httpx:HTTP Request: POST http://localhost:11434/api/chat "HTTP/1.1 200 OK"


ERROR:root:Error fetching posts: 'total_duration'


INFO:httpx:HTTP Request: POST http://localhost:11434/api/chat "HTTP/1.1 200 OK"


PlatformID: 40

Anda adalah seorang Ahli Analitik Media Sosial Lanjutan. Berdasarkan postingan Facebook di bawah ini, 
tentukan 5 topik utama yang mencakup keseluruhan. Sertakan tema-tema kunci, kata kunci, 
dan penjelasan singkat untuk setiap topik yang diidentifikasi, gunakanlah bahasa indonesia
yang baik dan benar dalam seluruh jawaban anda.
Di bawah ini adalah postingan Facebook:

{post1}
{post2}
{post3}
...
N. {postN}
    
process instagram
PlatformID: 30

Anda adalah seorang Ahli Analitik Media Sosial Lanjutan. Berdasarkan postingan Instagram di bawah ini, 
tentukan 5 topik utama yang mencakup keseluruhan. Sertakan tema-tema kunci, kata kunci, 
dan penjelasan singkat untuk setiap topik yang diidentifikasi, gunakanlah bahasa indonesia
yang baik dan benar dalam seluruh jawaban anda.
Di bawah ini adalah postingan Instagram:

{post1}
{post2}
{post3}
...
N. {postN}
    


ERROR:root:Error fetching posts: 'total_duration'


INFO:httpx:HTTP Request: POST http://localhost:11434/api/chat "HTTP/1.1 200 OK"


ERROR:root:Error fetching posts: 'total_duration'


process youtube
PlatformID: 20

Anda adalah seorang Ahli Analitik Media Sosial Lanjutan. Berdasarkan postingan Youtube di bawah ini, 
tentukan 5 topik utama yang mencakup keseluruhan. Sertakan tema-tema kunci, kata kunci, 
dan penjelasan singkat untuk setiap topik yang diidentifikasi, gunakanlah bahasa indonesia
yang baik dan benar dalam seluruh jawaban anda.
Di bawah ini adalah postingan Youtube:

{post1}
{post2}
{post3}
...
N. {postN}
    


# Recording Monitoring ID

In [6]:
# record as calculated
record_calculated(social_media_monitoring_id)