## Importing Necessary Libraries

In [1]:
import pandas as pd
import requests
import time
from bs4 import BeautifulSoup
import torch
from transformers import BartForConditionalGeneration, BartTokenizer

## Parsing data from Website

In [2]:
webUrl = "https://www.hindustantimes.com/india-news"
headers = {'User-Agent':'Mozilla/5.0 (Macintosh; ARM64 Mac OS X 14_0; rv:97.0) Gecko/20100101 Firefox/97.0'}

r = requests.get(webUrl, headers = headers)
htmlContent = r.content

soup = BeautifulSoup(htmlContent, 'html.parser')
time.sleep(5)

## Finding Links for Summaries

In [3]:
news = soup.find('section', class_ = 'listingPage').find_all('div', class_ = 'cartHolder bigCart track timeAgo') + soup.find('section', class_ = 'listingPage').find_all('div', class_ = 'cartHolder listView track timeAgo')

newsList = []
for div in news:
    newsList.append(div.get('data-weburl'))

## BART Model initialized

In [4]:
model_name = 'facebook/bart-large-cnn'
tokenizer = BartTokenizer.from_pretrained(model_name)
model = BartForConditionalGeneration.from_pretrained(model_name)

## Function for Summary Generation

In [5]:
def gen_summary(text):
    # Tokenize the input text
    input_ids = tokenizer.encode(text, truncation=True, padding='longest', return_tensors='pt')

    # Generate the summary
    summary_ids = model.generate(input_ids, num_beams=4, max_length=150, early_stopping=True)
    
    # Decode the generated summary
    summary = tokenizer.decode(summary_ids.squeeze(), skip_special_tokens=True)
    
    return summary

## Getting the Summary for all the news Articles 

In [8]:
for i in newsList:
    
    # Iterating over each News
    newsUrl = i
    
    r = requests.get(newsUrl, headers = headers)
    htmlContent = r.content
    newsSoup = BeautifulSoup(htmlContent, 'html.parser') 
    
    # Getting the Title of the News
    title = newsSoup.find('h1', class_ = 'hdg1').text
    print(title)
    print("\n")
    
    # Getting the Data of the News
    newsArticle = newsSoup.find('div', class_ = 'detail')
    
    # Finding the Data from the News
    if newsArticle:
        newsArticle = newsArticle.find_all("p")
        
        paragraph_text = ""
        
        # Storing the Data in paragraph_text
        for paragraph in newsArticle:
            paragraph_text += paragraph.get_text(strip=True) + " "
            
        paragraph_text = paragraph_text.strip()

        summary = gen_summary(paragraph_text)
    
        print(summary)
        
        print("\n")
        print("==========================================================X======================================================")
        print("\n")
  
    time.sleep(5)

Religious intolerance, press freedom: 75 US lawmakers ask Biden to raise rights issues with PM Modi


75 Democratic senators and members of the House of Representatives signed the letter. They said they were concerned about religious intolerance, press freedoms, internet access and the targeting of civil society groups. Modi left for Washington on Tuesday for avisit projected as a milestone in ties between the two countries.




'Deepen people-to-people ties, address climate change': White House on Modi-Biden meet's outcomes


PM Modi is on a state visit to the US. He is expected to hold talks on various issues with President Biden. The two leaders will also focus on strengthening and deepening people-to-people ties, adding that this factor will be guiding the relationship between the two countries in future.




‘Modi's US visit to enhance bilateral ties, not about China, Russia’: White House


The main objective of Prime Minister NarendraModi's visit to the United States is to streng

CM Gehlot approves aid for research scholars


Rajasthan chief minister Ashok Gehlot on Tuesday approved a proposal of ₹62.30 crore to provide financial assistance to around 6,000 research scholars in the state. “About 2200 research scholars of government colleges and universities receiving financial aid from the state government will be given a fellowship of₹20,000 per month,” the release said.




In a green agri push, cabinet nod likely for PM-Pranam, 2 schemes


PM-Pranam is a scheme to hand out cash incentives to states that can cut their consumption of chemical soil nutrients. It is part of a broader policy push to limit fertilizer subsidies, and promote natural and organic farming. Food and fertiliser subsidies accounted for about an eighth of India’s budget worth nearly ₹39 lakh crore.




China blocks India’s bid to tag 26/11 plotter as terrorist


Sajid Mir is wanted in connection with the 2008 Mumbai attacks. Mir has already been designated a terrorist under the laws of Indi

## Using HyperParameter Tuning for Optimal Results

In [None]:
from sklearn.model_selection import GridSearchCV

# Define the existing code for retrieving news articles and generating summaries
def retrieve_news_and_summarize(newsList):
    summaries = []
    
    for newsUrl in newsList:
        r = requests.get(newsUrl, headers=headers)
        htmlContent = r.content
        newsSoup = BeautifulSoup(htmlContent, 'html.parser')

        title = newsSoup.find('h1', class_='hdg1').text
        newsArticle = newsSoup.find('div', class_='detail')

        if newsArticle:
            newsArticle = newsArticle.find_all("p")
            paragraph_text = ""
            
            for paragraph in newsArticle:
                paragraph_text += paragraph.get_text(strip=True) + " "

            paragraph_text = paragraph_text.strip()

            summary = gen_summary(paragraph_text)
            summaries.append(summary)

            # print(summary)
            # print("=============================X===========================")

    return summaries

# Define the hyperparameter grid
hyperparameters = {
    'num_beams': [2, 4, 8],
    'max_length': [50, 100, 150]
}

# Create a scoring metric
scoring = 'rouge-n'

# Create a GridSearchCV object
grid_search = GridSearchCV(
    estimator=gen_summary,
    param_grid=hyperparameters,
    scoring=scoring,
    cv=5,
    n_jobs=-1
)

# Retrieve news articles and generate summaries
summaries = retrieve_news_and_summarize(newsList)

# Fit the GridSearchCV object
grid_search.fit(summaries, target_variable)  # Replace 'target_variable' with your target variable

# Print the best hyperparameters
print("Best Hyperparameters:", grid_search.best_params_)

# Print the best summary
best_summary = grid_search.best_estimator_.predict(paragraph_text)  # Replace 'paragraph_text' with your test data
print("Best Summary:", best_summary)