In [None]:
import numpy as np
import torch
from transformers import BertTokenizer, BertModel
import nltk
from sklearn.metrics.pairwise import cosine_similarity

In [None]:
nltk.download('punkt')  # Only 'punkt' needed for sentence tokenization

In [None]:
# Load BERT components
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertModel.from_pretrained('bert-base-uncased', output_hidden_states=True)

In [None]:
def preprocess_text(text):
    return nltk.sent_tokenize(text)

In [None]:
def get_bert_embedding(sentence):
    tokens = tokenizer(sentence, return_tensors="pt", padding=True, truncation=True)
    with torch.no_grad():
        outputs = model(**tokens)
    return outputs.last_hidden_state.mean(dim=1).squeeze().numpy()

In [None]:
def extractive_summary(text, num_sentences=3):
    sentences = preprocess_text(text)
    sentence_embeddings = np.array([get_bert_embedding(sent) for sent in sentences])
    
    similarity_matrix = cosine_similarity(sentence_embeddings)
    sentence_scores = similarity_matrix.sum(axis=1)
    
    ranked_indices = np.argsort(sentence_scores)[-num_sentences:]
    return " ".join([sentences[i] for i in sorted(ranked_indices)])

In [None]:
text = """Artificial Intelligence (AI) is a rapidly advancing field that aims to create intelligent
machines. It involves various subfields such as machine learning, deep learning, and natural language
processing. AI is used in numerous applications including healthcare, finance, and autonomous systems.
With the increasing availability of data and computational power, AI continues to make remarkable
progress. However, ethical considerations and biases remain significant challenges in AI development."""

summary = extractive_summary(text, num_sentences=2)
print("Generated Summary:\n" + summary)