# LLM Gemini personal project

In [56]:
## libraries to be installed. can use terminal or cmd

# pip install google-generativeai # done install
# pip install dotenv # done install
# pip install request # done install

In [57]:
import google.generativeai as genai
import os
from dotenv import load_dotenv

# load .env file (from my folder)
load_dotenv()

# Gemini configuration
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")

if not GEMINI_API_KEY:
    print("Error: GEMINI_API_KEY not found in environment variables or .env file.")
    print("Please make sure your .env file has GEMINI_API_KEY=YOUR_API_KEY.")
    exit()

# configure the Gemini API
genai.configure(api_key=GEMINI_API_KEY)

# initialize the generative model
model = genai.GenerativeModel('models/gemini-1.5-flash-latest') # choosing free model. pro version needs to be paid

print("Gemini API have been configured successfully.")

Gemini API have been configured successfully.


In [58]:
import os
import requests
from dotenv import load_dotenv
from serpapi import GoogleSearch # Import the SerpApi client

# load .env file (from my folder)
load_dotenv()

# load API key
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
SERPAPI_API_KEY = os.getenv("SERPAPI_API_KEY")

if not GEMINI_API_KEY:
    print("Error: GEMINI_API_KEY not found in environment variables or .env file.")
    exit()

if not SERPAPI_API_KEY:
    print("Error: SERPAPI_API_KEY not found in environment variables or .env file.")
    print("Please make sure your .env file has SERPAPI_API_KEY=YOUR_API_KEY.")
    exit()

# SerpApi Configuration for Google News
QUERY = "Petronas news" # specific only for Petronas
PAGE_SIZE = 2           # We need top 2 most recent articles
GL = "my"               # Petronas in Malaysia
HL = "en"               # english language

# SerpApi Call for Google News
print(f"Searching Google News for top 2 recent articles about: {QUERY}")
retrieved_articles_content = []

# try:
#     params = {
#         "api_key": SERPAPI_API_KEY,
#         "engine": "google_news", # Google News as my news outlet
#         "q": QUERY,
#         "num": PAGE_SIZE,       # no. of results
#         "gl": GL,               # country parameter
#         "hl": HL,               # language parameter
#     }

#     search = GoogleSearch(params)
#     results = search.get_dict()

#     news_results = results.get("news_results", [])

try:
    params = {
        "api_key": SERPAPI_API_KEY,
        "engine": "google_news", # Switched back to Google News
        "q": QUERY,
        "num": PAGE_SIZE,
        "gl": GL,
        "hl": HL,
        "domains": "theedgemarkets.com" # Google News (via SerpApi) to only return/filter articles by The Edge Malaysia's domain only
                                        # interchangebly with finance.yahoo.com or bernama.com - depends on the preferred domain
    }

    search = GoogleSearch(params)
    results = search.get_dict()

    news_results = results.get("news_results", [])

    # if news_results:
    #     print(f"\nFound {len(news_results)} news results from Google News:")
    #     for i, article in enumerate(news_results):
    #         print(f"\n--- Article {i+1} ---")
    #         print(f"Title: {article.get('title')}")
    #         print(f"Source: {article.get('source')}")
    #         print(f"Published At: {article.get('date')}") # SerpApi uses 'date' for published date
    #         print(f"Link: {article.get('link')}")
    #         print(f"Snippet: {article.get('snippet')}")

    #         # For summarization, we'll use title + snippet as they are often more concise
    #         # than the full article content which SerpApi doesn't provide directly
    #         # (it gives links, but we'd need to scrape them, which is out of scope for now).
    #         # The assignment specifies "summary of recent news articles", snippet is good for this.
    #         article_text = f"Title: {article.get('title', '')}\nSnippet: {article.get('snippet', '')}"
    #         if article_text.strip(): # Ensure there's actual content
    #             retrieved_articles_content.append(article_text)
    #         else:
    #             print("Warning: No title or snippet found for this article.")

    if news_results:
        articles_to_process = news_results[:PAGE_SIZE] # only top 2 most recent articles
        print(f"\nProcessing top {len(articles_to_process)} news results extracted from The Edge Malaysia via Google News:")

        retrieved_articles_content = []
        for i, article in enumerate(articles_to_process): # iterate only over the top 2
            print(f"\n--- Article {i+1} ---")
            print(f"Title: {article.get('title')}")
            print(f"Source: {article.get('source')}")
            print(f"Published At: {article.get('date')}")
            print(f"Link: {article.get('link')}")
            print(f"Snippet: {article.get('snippet')}")

            # article_text = f"Title: {article.get('title', '')}\nSnippet: {article.get('snippet', '')}"
            article_text = f"{article.get('title', '')}\n{article.get('snippet', '')}"
            if article_text.strip():
                retrieved_articles_content.append(article_text)
            else:
                print("Warning: No title or snippet found for this article.")

        print("\n--- Article Retrieval Complete (via SerpApi) ---")
        print(f"Contents retrieved for summarization:\n{retrieved_articles_content}")

    else:
        print("No news articles found for the given query using SerpApi.")

except Exception as e:
    print(f"An error occurred during SerpApi call: {e}")
    if "error" in results: # SerpApi errors sometimes come in the results dictionary
        print(f"SerpApi Error Details: {results['error']}")

Searching Google News for top 2 recent articles about: Petronas news

Processing top 2 news results extracted from The Edge Malaysia via Google News:

--- Article 1 ---
Title: Petronas and Microsoft expand collaboration to support Malaysia's AI ecosystem
Source: {'name': 'TNGlobal', 'icon': 'https://encrypted-tbn2.gstatic.com/faviconV2?url=https://technode.global&client=NEWS_360&size=96&type=FAVICON&fallback_opts=TYPE,SIZE,URL'}
Published At: 08/04/2025, 01:01 AM, +0000 UTC
Link: https://technode.global/2025/08/04/petronas-and-microsoft-expand-collaboration-to-support-malaysias-ai-ecosystem/
Snippet: None

--- Article 2 ---
Title: Putrajaya: Workers in Petronas staff cuts getting compensation, transition support
Source: {'name': 'Malay Mail', 'icon': 'https://encrypted-tbn3.gstatic.com/faviconV2?url=https://www.malaymail.com&client=NEWS_360&size=96&type=FAVICON&fallback_opts=TYPE,SIZE,URL'}
Published At: 07/31/2025, 01:12 AM, +0000 UTC
Link: https://www.malaymail.com/news/malaysia/2025

In [59]:
import textwrap # for wrapping text purposes

if retrieved_articles_content:
    # Combine the content of the two articles
    combined_articles_text = "\n\n".join(retrieved_articles_content)

    print("\n--- DEBUG: Content being sent to Gemini for summarization ---")
    print(combined_articles_text)
    print("-----------------------------------------------------------\n")

    # Crafting the Prompt for Summarization - based on the assessment's instruction
    prompt = f"""Summarize the following two recent news articles about Petronas. 
    The summary must be a single paragraph, approximately 30 words long. 
    Focus on the most crucial main key points for a relationship manager.

    Articles:
    {combined_articles_text}
    """

    print("\n--- Generating Summary with Gemini ---")
    print("Sending prompt to Gemini API...")
    try:
        # Generate content with the model
        response = model.generate_content(
            prompt,
            generation_config=genai.types.GenerationConfig(
                temperature=0.2, # Keep it low for factual summarization (0.0 for deterministic, 1.0 for more creative)
                max_output_tokens=100 # max no. of tokens LLM can generate
            )
        )

        # Access the generated text
        summary = response.text
        print("\nGenerated Summary (around 30 words):\n")
        # print(summary)

        # Wrapping text
        wrapped_summary = textwrap.fill(summary, width=80)
        print(wrapped_summary)

        # calculating the word count
        word_count = len(summary.split())
        print(f"\nWord Count of the Generated Summary: {word_count} words")
        # -----------------------------------

        # Saving the output to a .TXT file
        output_filename = "petronas_online_news_summary.txt"
        with open(output_filename, "w", encoding="utf-8") as f:
            f.write(summary.strip())

        print(f"\nSummary saved to '{output_filename}'")

    except Exception as e:
        print(f"An error occurred during Gemini API call: {e}")
        if hasattr(response, 'prompt_feedback'):
            print(f"Prompt Feedback: {response.prompt_feedback}")
        if hasattr(response, 'candidates'):
            print(f"Candidates: {response.candidates}")

else:
    print("No articles were retrieved to summarize. Please check the news retrieval step.")


--- DEBUG: Content being sent to Gemini for summarization ---
Petronas and Microsoft expand collaboration to support Malaysia's AI ecosystem


Putrajaya: Workers in Petronas staff cuts getting compensation, transition support

-----------------------------------------------------------


--- Generating Summary with Gemini ---
Sending prompt to Gemini API...

Generated Summary (around 30 words):

Petronas is expanding its AI collaboration with Microsoft while simultaneously
providing compensation and support to employees affected by recent staff
reductions.  This highlights a strategic shift towards technological advancement
alongside responsible workforce management.

Word Count of the Generated Summary: 33 words

Summary saved to 'petronas_online_news_summary.txt'
