In [15]:
import requests
import json
import os

# Your Hugging Face API token
API_TOKEN = os.getenv("HF_API_KEY")

MODEL_NAME = "Qwen/Qwen2.5-Coder-32B-Instruct" #"google/gemma-2-2b-it"

# The API endpoint for chat completion
API_URL = f"https://api-inference.huggingface.co/models/{MODEL_NAME}"

# Headers with your API token
headers = {
    "Authorization": f"Bearer {API_TOKEN}",
    "Content-Type": "application/json"
}

# Function to query the model
def query_model(prompt):
    payload = {
        "inputs": prompt,
        "parameters": {
            "max_length": 50,  # Adjust max length as needed
            "temperature": 0.7,  # Adjust temperature for creativity
            "top_p": 0.9  # Adjust top-p for nucleus sampling
        }
    }

    response = requests.post(API_URL, headers=headers, json=payload)

    if response.status_code == 200:
        return response.json()
    else:
        print(f"Error: {response.status_code}")
        print(response.text)
        return None

# Example usage
if __name__ == "__main__":
    DOCUMENT_PATH = "D:/Github/Speech-to-Text-Summarization/transcription.txt"
    with open(DOCUMENT_PATH, "r", encoding="utf-8") as f:
        document_text = f.read()
    # Your input prompt
    user_prompt = "Summarise following text: " + document_text

    # Query the model
    result = query_model(user_prompt)

    if result:
        # Print the model's response
        print("Model Response:")
        print(result[0]['generated_text'])

Error: 422
{"error":"Input validation error: `inputs` tokens + `max_new_tokens` must be <= 32768. Given: 127967 `inputs` tokens and 0 `max_new_tokens`","error_type":"validation"}


## 2nd approach

In [22]:
import requests
import json
import os
from transformers import AutoTokenizer

# Your Hugging Face API token
API_TOKEN = os.getenv("HF_API_KEY")

MODEL_NAME = "Qwen/Qwen2.5-7B-Instruct" #"deepseek-ai/DeepSeek-R1-Distill-Qwen-32B"#"Qwen/Qwen2.5-Coder-32B-Instruct" #
API_URL = f"https://api-inference.huggingface.co/models/{MODEL_NAME}"

DOCUMENT_PATH = r"C:\Users\firmansyah.atmojo\OneDrive - PT. Bumi Serpong Damai Tbk\2 Projects\DA Projects\Speech to Text\The Links Golf\The Links Golf Unsummarized.txt"
with open(DOCUMENT_PATH, "r", encoding="utf-8") as f:
    document_text = f.read()

#document_text = """
#TEMPO.CO, Jakarta - President Prabowo Subianto's instruction for each ministry and agency to implement budget savings, the Ministry of Finance (Kemenkeu) has to cancel the Ministerial Scholarship offer for 2025. This decision was also the result of the Leadership Meeting of the Financial Education and Training Agency (BPPK) on January 31, 2025.
#
#"We sincerely apologize for the cancellation of the Ministry of Finance (Kemenkeu) Ministerial Scholarship offer for 2025. As a follow-up to this cancellation, the scholarship registration process has been discontinued effective from the date this announcement is issued," said Wahyu, as quoted by Antara.
#
#The Ministerial Scholarship is a program designed for future leaders or top talents of the Ministry of Finance to pursue postgraduate studies abroad.
#
#This scholarship program intends to enhance the Ministry of Finance's competencies in human resources to achieve its vision, mission, and strategic goals. Ministerial Scholarship alumni are expected to have a competitive edge, making them well-prepared to lead the Ministry of Finance in the future.
#
#This year, the program was opened on January 10 and was initially scheduled to close on February 9. However, it was canceled following the cancellation letter issued on January 31, 2025.
#
#President Prabowo Subianto previously issued Presidential Instruction (Inpres) 1/2025, instructing a budget cut of Rp 306.69 trillion in the 2025 State Budget (APBN) and Regional Budget (APBD). This amount consists of reductions in ministry/agency (K/L) budgets and transfers to regions (TKD), with cuts of Rp 256.1 trillion and Rp 50.59 trillion, respectively.
#
#In response to the instruction, Minister of Finance Sri Mulyani Indrawati issued letter number S-37/MK.02/2025, specifying 16 expenditure items that need budget reductions, with varying cuts ranging from 10 percent to 90 percent.
#"""

headers = {
    "Authorization": f"Bearer {API_TOKEN}",
    "Content-Type": "application/json"
}

def query_model(prompt, max_new_tokens=500):
    payload = {
        "inputs": prompt,
        "parameters": {
            "max_new_tokens": max_new_tokens,
            "temperature": 0.7,
            "top_p": 0.9,
            "do_sample": True,  # Improves diversity of output
        }
    }

    response = requests.post(API_URL, headers=headers, json=payload)

    if response.status_code == 200:
        return response.json()
    else:
        print(f"Error: {response.status_code}")
        print(response.text)
        return None

def split_document(document_text, tokenizer, max_tokens):
    # Tokenize the document without special tokens
    tokens = tokenizer.encode(document_text, add_special_tokens=False)
    
    chunks = []
    current_chunk = []
    
    for token in tokens:
        current_chunk.append(token)
        if len(current_chunk) >= max_tokens:
            chunks.append(current_chunk)
            current_chunk = []
    
    if current_chunk:
        chunks.append(current_chunk)
    
    # Decode token chunks back to text
    return [tokenizer.decode(chunk) for chunk in chunks]

# Initialize tokenizer
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

# Calculate token usage for static prompt part
static_prompt = """Please summarize the following text in bullet points. 
Focus on key decisions, reasons, and impacts. Use this format:
- Key Point 1
- Key Point 2
- ...
"""
static_prompt_tokens = tokenizer.encode(static_prompt, add_special_tokens=False)

# Set safety margin to account for model's internal formatting
max_total_tokens = 16384#32768
max_response_tokens = 500
available_tokens = max_total_tokens - len(static_prompt_tokens) - max_response_tokens

# Split document into chunks that fit with the prompt
document_chunks = split_document(document_text, tokenizer, available_tokens)

# Verify response handling
summaries = []
for i, chunk in enumerate(document_chunks):
    print(f"Processing chunk {i+1}/{len(document_chunks)}")
    
    full_prompt = static_prompt + chunk  # Now includes bullet point instructions
    
    result = query_model(full_prompt, max_new_tokens=max_response_tokens)
    
    # Handle different API response formats
    if result:
        if isinstance(result, list) and 'generated_text' in result[0]:
            summary = result[0]['generated_text'].replace(full_prompt, "").strip()
        elif isinstance(result, dict) and 'generated_text' in result:
            summary = result['generated_text'].replace(full_prompt, "").strip()
        else:
            summary = f"[Summary format unexpected for chunk {i+1}]"
        summaries.append(summary)
    else:
        summaries.append(f"[Summary failed for chunk {i+1}]")

# Combine all summaries
final_summary = "\n\n".join(summaries)

# Save and display results
with open("summary.txt", "w", encoding="utf-8") as f:
    f.write(final_summary)

print("\nFinal Summary:")
print(final_summary)

tokenizer_config.json:   0%|          | 0.00/7.30k [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


vocab.json:   0%|          | 0.00/2.78M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/1.67M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/7.03M [00:00<?, ?B/s]

Processing chunk 1/2
Error: 403
{"error":"The model Qwen/Qwen2.5-7B-Instruct is too large to be loaded automatically (15GB > 10GB)."}
Processing chunk 2/2
Error: 403
{"error":"The model Qwen/Qwen2.5-7B-Instruct is too large to be loaded automatically (15GB > 10GB)."}

Final Summary:
[Summary failed for chunk 1]

[Summary failed for chunk 2]


'[00:00] Dari loc cair marketing agensi kita dari kudo terus agen kerja mereka ada 3 winfield era sama iq.\n[00:09] Nah ini untuk sertifikat kita, kenapa kita uh keluarin di presentasi supaya dia di awal saja karena hak milik ya sudah ya nanti dapat hak milik tapi karena kan ini pt ya bu jadi bapaknya hakim dulu nanti pas mau uh ajb baik nama baru ibu bisa peningkatan ke.\n[00:27] Sertifikat kita sudah proses semuanya saat ini.\n[00:32] Nah waktu di bulan 8 kemarin kita itu ikut award di properti award terus kita dapat 2.\n[00:40] Award juga langsung itu dapat best nature integrate development sama best high and arsitektur designnya juga.\n[00:47] Jadi kita vilanya lagi pembangunan, tapi kita sudah dapat 2 word karena dari uh penilaiannya mereka bilang ini konsepnya bagus gitu, karena satu satunya vila yang langsung connected bahwa lapangan golf di Indonesia baru kita.\n[01:05] Nah ini 10 lapangan golf terbaik kita itu masuk di 10 besar tapi kita di posisinya 3 bu untuk yang lapangan t

## 3rd Approach

In [None]:
# Import Library
import PyPDF2
from tqdm import tqdm
from docx import Document
import time
#from google.colab import files
import requests
from huggingface_hub import InferenceClient
import re

# API KEY

client = InferenceClient(
	api_key="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"
)

API_URL = "https://api-inference.huggingface.co/models/facebook/bart-large-cnn"
headers = {"Authorization": "Bearer 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"}


def open_PDF (path) : # Open the PDF file
  # Menginisialisasi string kosong
  concatenated_text = ""

  with open(path, "rb") as file:
      reader = PyPDF2.PdfReader(file)
      pages = reader.pages
      # Extract text from each page
      for i in tqdm(range(len(reader.pages))):
          text = pages[i].extract_text().strip()

          # Remove the references section (assuming it starts with 'References' and ends at the end of the document)
          if "References" in text:
              text = text.split("References")[0]
  # Loop through the pages
  for page in tqdm(pages):
      # Extract the text from the page and remove any leading or trailing whitespace
      text = page.extract_text().strip()

      # Concat the extracted text to the concatenated text
      concatenated_text += text
      # Check token length
  return concatenated_text

def open_text (path) : # Mebaca file txt dan mengisi variable text
  with open(path, 'r') as file:
      text = file.read()
  return text

def open_docx (path) : # Membaca file docx
  doc = Document(path)

  # Extract text
  text = []
  for paragraph in doc.paragraphs:
      text.append(paragraph.text)

  # Join paragraphs into a single string
  document_text = '\n'.join(text)
  return document_text

def input_classifier (Path) : # Menentukan jenis file berdasarkan path yang diberikan
  Jenis_File = Path[-3:]
  # Memanggil function sesuai dengan path file
  if Jenis_File == "pdf" :
    text = open_PDF (Path)
  elif Jenis_File == "txt" :
    text = open_text (Path)
  else :
    text = open_docx (Path)
  return text

#path = str(input(" Path File : "))
text = input_classifier (r"C:\Users\firmansyah.atmojo\OneDrive - PT. Bumi Serpong Damai Tbk\2 Projects\DA Projects\Speech to Text\The Links Golf\The Links Golf Unsummarized.txt")
word_count = len(text.split())
print (word_count)

# Prompt
def prompt (text) :
  messages = [
    {
      "role": "user",
      "content":
      f"""Summarize the following text in a concise and clear manner.

      Return your response in bullet points which covers the key points of the text.

      {text}
      """
    }
  ]
  return messages

def chunk_text(text, max_length=1500):
    """Split text into smaller chunks."""
    return [text[i:i + max_length] for i in range(0, len(text), max_length)]

def Qwen(text):
    chunks = chunk_text(text)
    summaries = []
    
    for chunk in chunks:
        messages = prompt(chunk)
        completion = client.chat.completions.create(
            model="deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
            messages=messages,
        )
        
        Content = completion.choices[0].message.content
        penanda = "</think>"
        pattern = rf"{re.escape(penanda)}\s*(.*)"
        summary = re.findall(pattern, Content, re.DOTALL)
        summaries.append(summary)

    return summaries

# Use the modified Qwen function
print("Bullet Point Summary:")
Summary_Qwen = Qwen(text)


8741
Bullet Point Summary:


In [18]:
from dotenv import load_dotenv
load_dotenv()

True

In [20]:
import requests
import os

HF_API_TOKEN = os.getenv("HF_API_TOKEN")
if HF_API_TOKEN is None:
    raise ValueError("HF_API_TOKEN environment variable must be set.")

def summarize_with_prompt_api(filepath, prompt, bullet_points=True, model_name="facebook/bart-large-cnn"):
    """
    Summarizes a text file with a prompt using the Hugging Face Inference API.

    Args:
        filepath: Path to the text file.
        prompt: The prompt to guide the summarization.
        bullet_points: If True, formats the summary into bullet points.
        model_name: The Hugging Face model to use.

    Returns:
        The summarized text, or an error message.
    """

    try:
        with open(filepath, "r", encoding="utf-8") as file:
            text = file.read()
    except FileNotFoundError:
        return "Error: File not found."
    except Exception as e:
        return f"Error reading file: {e}"

    API_URL = f"https://api-inference.huggingface.co/models/{model_name}"
    headers = {"Authorization": f"Bearer {HF_API_TOKEN}"}

    # Format the input for models that expect prompt + text
    input_text = f"{prompt}\n{text}"  # Combine prompt and text

    max_chunk_length = 512  # Adjust as needed
    chunks = [input_text[i:i + max_chunk_length] for i in range(0, len(input_text), max_chunk_length)]

    all_summaries = []
    for chunk in chunks:
        payload = {
            "inputs": chunk,
            "options": {"wait_for_model": True},
            "parameters": {"max_length": len(chunk)//2, "min_length": len(chunk)//4, "do_sample": False},  # Adjust as needed
        }

        try:
            response = requests.post(API_URL, headers=headers, json=payload)
            response.raise_for_status()
            summary = response.json()[0]["summary_text"]
            all_summaries.append(summary)
        except requests.exceptions.RequestException as e:
            return f"API request error: {e}"
        except (KeyError, IndexError) as e:
            return f"Error parsing API response: {e}. Response text: {response.text}"

    full_summary = " ".join(all_summaries)

    if bullet_points:
        bullet_summary = ""
        for sentence in full_summary.split("."):
            if sentence.strip():
                bullet_summary += f"- {sentence.strip()}\n"
        return bullet_summary
    else:
        return full_summary


# Example usage:
filepath = r"C:\Users\firmansyah.atmojo\OneDrive - PT. Bumi Serpong Damai Tbk\2 Projects\DA Projects\Speech to Text\The Links Golf\The Links Golf Unsummarized.txt"
prompt = "Summarize the following text focusing on the key findings and conclusions:"  # Your prompt here!
summary = summarize_with_prompt_api(filepath, prompt)

if summary:
    print(summary)
else:
    print("An error occurred during summarization.")

API request error: 500 Server Error: Internal Server Error for url: https://api-inference.huggingface.co/models/facebook/bart-large-cnn
