In [1]:
import os
from tqdm import tqdm

# pdf_path = "pdf/Eyeriss_An_Energy-Efficient_Reconfigurable_Accelerator_for_Deep_Convolutional_Neural_Networks.pdf"

def get_pdf_text(base_dir='pdf'):
	"""
	Input: base_dir - directory containing pdf files
	Output: text - list of strings, each string is the text extracted from a pdf file
	"""
	import pymupdf 
	def get_single_pdf_text(pdf_path):
		doc = pymupdf.open(pdf_path)
		text = ""
		for page_num in range(len(doc)):
			page = doc[page_num]
			text += page.get_text() + "\n\n"
		metadata = doc.metadata
		return text, metadata

	text = []
	meta = []
	for file in tqdm(os.listdir(base_dir), desc="Converting PDFs to text"):
		text_ = ""
		meta_ = ""
		if file.endswith(".pdf"):
			pdf_path = os.path.join(base_dir, file)
			text_, meta_ = get_single_pdf_text(os.path.join(base_dir, file))
		text.append(text_)
		meta.append(meta_)
	return text, meta

In [2]:
def get_model():
	from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig

	torch.cuda.empty_cache()
	torch.cuda.ipc_collect()

	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	os.environ["CUDA_VISIBLE_DEVICES"] = "4,5,6,7"

	model_name = "deepseek-ai/deepseek-moe-16b-chat"
	tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
	model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True, torch_dtype=torch.bfloat16, device_map=device)
	model.generation_config = GenerationConfig.from_pretrained(model_name)
	model.generation_config.pad_token_id = model.generation_config.eos_token_id
 
	return model, tokenizer

In [3]:
import torch

def summarize_text(text_list=[], max_new_tokens=256, chunk_size=8192):
	"""
	Input: text_list: list of strings, each string is a text to be summarized
			max_new_tokens: int, maximum number of tokens to generate
			chunk_size: int, maximum number of tokens to process at a time
	Output: list of strings, each string is the summary of the corresponding input text	
	"""
	model, tokenizer = get_model()
	all_summaries = []
 
	for text in tqdm(text_list, desc="Processing texts"):
		text_chunks = [text[i:i + chunk_size] for i in range(0, len(text), chunk_size)]
		chunk_summaries = []
		device = model.device

		for chunk in text_chunks:
			messages = [
				{"role": "user", "content": f"Summarize the following text concisely:\n\n{chunk}"}
			]
			input_tensor = tokenizer.apply_chat_template(
				messages,
				add_generation_prompt=True,
				return_tensors="pt",
				padding=True,
				truncation=True,
			).to(device)

			attention_mask = input_tensor.ne(tokenizer.pad_token_id).int().to(device)

			outputs = model.generate(
				input_tensor, 
				attention_mask=attention_mask,  
				max_new_tokens=max_new_tokens  
			)
   
			result = tokenizer.decode(outputs[0][input_tensor.shape[1]:], skip_special_tokens=True)
			chunk_summaries.append(result)
		
			torch.cuda.empty_cache()
			torch.cuda.ipc_collect()
   
		summary = " ".join(chunk_summaries)
		all_summaries.append(summary)

  
	return all_summaries

text_list, _ = get_pdf_text(base_dir='pdf')
summaries = summarize_text(text_list)
print(summaries)

Converting PDFs to text: 100%|██████████| 2/2 [00:00<00:00,  8.60it/s]


Loading checkpoint shards:   0%|          | 0/7 [00:00<?, ?it/s]

OutOfMemoryError: CUDA out of memory. Tried to allocate 20.00 MiB. GPU 0 has a total capacity of 47.43 GiB of which 1.75 MiB is free. Process 662644 has 260.00 MiB memory in use. Process 1754263 has 25.58 GiB memory in use. Including non-PyTorch memory, this process has 21.58 GiB memory in use. Of the allocated memory 17.76 GiB is allocated by PyTorch, and 3.57 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

: 