# WEEK-7

# a)BASIC SENTIMENT ANALYSIS USING LOGISTIC REGRESSION

In [1]:
import nltk
import numpy as np
import re
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

nltk.download("punkt")
text = """
Artificial Intelligence is rapidly transforming the world. 
It has applications in healthcare, finance, education, and more. 
Ongoing research aims to make AI safer and more transparent. 
However, ethical challenges remain in the widespread adoption of AI. 
"""
sentences = nltk.sent_tokenize(text)
cleaned = [re.sub(r'[^a-zA-Z ]', '', s.lower()) for s in sentences]
vectorizer = TfidfVectorizer(stop_words='english')
tfidf_matrix = vectorizer.fit_transform(cleaned)
similarity_matrix = cosine_similarity(tfidf_matrix, tfidf_matrix)
scores = similarity_matrix.sum(axis=1)
N = 2
top_sentence_idx = np.argsort(scores)[-N:]
summary = " ".join([sentences[i] for i in sorted(top_sentence_idx)])
print("Summary:")
print(summary)


Summary:
Ongoing research aims to make AI safer and more transparent. However, ethical challenges remain in the widespread adoption of AI.


[nltk_data] Error loading punkt: <urlopen error [Errno -3] Temporary
[nltk_data]     failure in name resolution>


# b. ABSTRACTIVE TEXT SUMMARIZATION WITH TRANSFORMERS (google collab)

In [1]:
from transformers import pipeline 
summarizer = pipeline("summarization", model="facebook/bart-large-cnn") 
text = """The Apollo 11 mission was the first human spaceflight to land on the Moon. 
Commander Neil Armstrong and lunar module pilot Buzz Aldrin landed on July 20, 
1969, while Michael Collins orbited above. Armstrong became the first human to step 
onto the Moon.""" 
summary = summarizer(text, max_length=50, min_length=25, do_sample=False) 
print("Abstractive Summary:\n", summary[0]['summary_text'])


2025-09-19 13:09:45.292995: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1758287385.509056      36 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1758287385.571692      36 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


OSError: We couldn't connect to 'https://huggingface.co' to load the files, and couldn't find them in the cached files.
Checkout your internet connection or see how to run the library in offline mode at 'https://huggingface.co/docs/transformers/installation#offline-mode'.

# c. EXTRACTIVE SUMMARIZATION USING BERT AND SPACY (Google Collab)

In [2]:
import spacy
from sentence_transformers import SentenceTransformer, util
nlp = spacy.load("en_core_web_sm")
model = SentenceTransformer('bert-base-nli-mean-tokens')
text = """Machine learning is a branch of artificial intelligence that focuses on building 
systems that learn from data. 
It has become essential in applications like recommendation systems, fraud detection, and 
autonomous driving. 
Despite its success, challenges such as overfitting and data quality issues remain 
important considerations."""
doc = nlp(text)
sentences = [sent.text.strip() for sent in doc.sents]
embeddings = model.encode(sentences, convert_to_tensor=True)
cosine_scores = util.cos_sim(embeddings, embeddings)
avg_scores = cosine_scores.mean(dim=1)
summary_sentences = [sentences[0]]
top_other = avg_scores.argsort(descending=True)[0].item()
if top_other != 0:
    summary_sentences.append(sentences[top_other])
summary = ' '.join(summary_sentences)
print("Extractive Summary:\n", summary)

OSError: We couldn't connect to 'https://huggingface.co' to load the files, and couldn't find them in the cached files.
Checkout your internet connection or see how to run the library in offline mode at 'https://huggingface.co/docs/transformers/installation#offline-mode'.