In [None]:
pip install -U sentence-transformers

In [8]:
import warnings
warnings.filterwarnings("ignore", category=FutureWarning)

In [9]:
from textblob import TextBlob
import torch

In [None]:
!pip install bert-extractive-summarizer
from summarizer.sbert import SBertSummarizer

In [11]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

def scrape_bbc_sport():
    url = 'https://www.bbc.com/sport'

    # Send a GET request to the URL
    response = requests.get(url)

    soup = BeautifulSoup(response.text, 'html.parser')

    # Extract all div elements with type "article"
    article_elements = soup.find_all('div', {'type': 'article'})

    # Extract links, headlines, and game names
    data = []
    for article_element in article_elements:
        link_element = article_element.find('a')
        if link_element:
            link = link_element['href']
            full_link = f'http://bbc.com{link}'

            headline_element = article_element.find('p')
            headline = headline_element.get_text(strip=True) if headline_element else ""

            # Extract the sport name from the href
            sport_name = link.split('/')[-2]

            data.append({'sport': sport_name, 'headline': headline, 'weblink': full_link})

    # Create a DataFrame
    df = pd.DataFrame(data)

    # Filter out news articles
    df = df[df['sport'] != 'news']

    # Categorize Africa-related articles as "African Sports"
    df['sport'] = df['sport'].apply(lambda x: 'african-sports' if x == 'africa' else x)

    return df

In [12]:
def retrieve_articles_by_weblinks(df, sport):
    sport_articles = df[df['sport'] == sport]
    weblinks = sport_articles['weblink'].tolist()

    # Create a new DataFrame to store the articles
    articles_data = {'title': [], 'article': []}

    for weblink in weblinks:
        # Send a GET request to the weblink
        response = requests.get(weblink)

        # Parse the HTML content of the page using BeautifulSoup
        soup = BeautifulSoup(response.text, 'html.parser')

        # Find all p elements with class containing "$paragraph" and data-reactid containing "$paragraph"
        paragraphs = soup.find_all('p', attrs={"data-reactid": lambda value: value and "$paragraph" in value})

        # Extract and store text from each matching p element
        article_text = "\n".join(paragraph.get_text(strip=True) for paragraph in paragraphs)

        # Extract the title from the original DataFrame based on the weblink
        headline = df[df['weblink'] == weblink]['headline'].iloc[0]

        articles_data['title'].append(headline)
        articles_data['article'].append(article_text)

    # Create a new DataFrame from the collected data
    articles_df = pd.DataFrame(articles_data)

    return articles_df

In [13]:
def get_sentiment(text):
    analysis = TextBlob(text)
    return analysis.sentiment.polarity

In [14]:
def newsAnalysis(dataDiX):
    sent = []
    sentVal = []

    for news in list(dataDiX.values()):
        qq = get_sentiment(news)
        sentVal.append(qq)
        sentiment = find_Sentiment(qq)
        sent.append(sentiment)

    return sent, sentVal

In [15]:
def find_Sentiment(polarity):
    if polarity > 0:
        return "Positive"
    elif polarity < 0:
        return "Negative"
    else:
        return "Neutral"

In [16]:
def calculate_overall_sentiment(article_text):

    paragraphs = article_text.split('\n')
    paragraph_sentiments = [get_sentiment(paragraph) for paragraph in paragraphs if paragraph.strip()]

    if paragraph_sentiments:
        overall_sentiment = sum(paragraph_sentiments) / len(paragraph_sentiments)
        return overall_sentiment
    else:
        return 0 #if no paraghraph found

In [17]:
def summarize_using_sbert(text, ratio=0.2):
    # Load pre-trained sBERT model
    model_name = 'paraphrase-MiniLM-L6-v2'
    summarizer_model = SBertSummarizer(model_name)
    res = summarizer_model.calculate_optimal_k(text, k_max=10)

    result = summarizer_model(text, ratio=0.2, num_sentences=res)
    return result

In [18]:
def chat_bot():
    print("Welcome to the Sports News Chatbot!")
    print("Fetching the latest sports news...\n")

    # Get the latest sports news
    sports_news_df = scrape_bbc_sport()

    # Display available sports categories
    sports_categories = sports_news_df['sport'].unique()
    print("Available sports categories:")
    for idx, category in enumerate(sports_categories, start=1):
        print(f"{idx}. {category.capitalize()}")

    # User interaction loop
    while True:
        user_input = input("\nEnter the number of the sport category you are interested in (type 'exit' to quit): ")

        if user_input.lower() == 'exit':
            print("Goodbye! Have a great day.")
            break

        try:
            selection_index = int(user_input)
            if 1 <= selection_index <= len(sports_categories):
                selected_category = sports_categories[selection_index - 1]
                selected_articles = retrieve_articles_by_weblinks(sports_news_df, selected_category)

                # Extract titles and stories as a dictionary
                titles_and_stories = dict(zip(selected_articles['title'], selected_articles['article']))

                # Perform sentiment analysis
                sentiment_labels, sentiment_values = newsAnalysis(titles_and_stories)

                print(f"\nSelected Articles with Sentiment Analysis:")
                for idx, (title, sentiment_label, sentiment_value) in enumerate(zip(selected_articles['title'], sentiment_labels, sentiment_values), start=1):
                    print(f"\n{idx}.'{title}' : {sentiment_label} | Sentiment Value: {sentiment_value:.2f}")

                headline_index = input("\nEnter the number of the headline to read the article summary. (type 'back' to go back): ")

                if headline_index.lower() == 'back':
                    continue

                try:
                    headline_index = int(headline_index) - 1
                    selected_headline = selected_articles.iloc[headline_index]
                    print(selected_headline['article'])
                # Integrate sBERT for sentence summarization
                    article_summary = summarize_using_sbert(selected_headline['article'])

                    print(f"\nReading article for '{selected_headline['title']}':\n")
                    print(f"\nSummary for '{selected_headline['title']}':\n{article_summary}")

                except (ValueError, IndexError):
                    print("Invalid index. Please enter a valid headline number.")
            else:
                print("Invalid index. Please select a valid sports category.")
        except ValueError:
            print("Invalid input. Please enter a number or 'exit'.")

if __name__ == "__main__":
    chat_bot()

Welcome to the Sports News Chatbot!
Fetching the latest sports news...

Available sports categories:
1. Football
2. Tennis
3. Formula1
4. Rugby-union
5. Snooker
6. Sport
7. Basketball
8. Rugby-league
9. African-sports
10. Winter-sports
11. Cricket
12. Boxing
13. Ice-hockey

Enter the number of the sport category you are interested in (type 'exit' to quit): 1

Selected Articles with Sentiment Analysis:

1.'Henderson's Saudi move a 'slap in the face' - Daniels' : Positive | Sentiment Value: 0.08

2.'England's pioneer who died with a tale untold' : Positive | Sentiment Value: 0.11

3.'FAI confirms Kenny's exit as Republic manager' : Positive | Sentiment Value: 0.08

4.''It could have been a tragedy' - Messi on police-fan clashes' : Negative | Sentiment Value: -0.02

5.'Argentina boss Scaloni says he may resign' : Positive | Sentiment Value: 0.12

6.'Barcelona come from behind to beat Frankfurt' : Positive | Sentiment Value: 0.03

7.'Lyon maintain perfect start in Champions League' : Posit

.gitattributes:   0%|          | 0.00/690 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/3.69k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/629 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/122 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/314 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

modules.json:   0%|          | 0.00/229 [00:00<?, ?B/s]


Reading article for ''It could have been a tragedy' - Messi on police-fan clashes':


Summary for ''It could have been a tragedy' - Messi on police-fan clashes':
('Lionel Messi believes there "could have been a tragedy" during the crowd trouble that delayed Argentina\'s World Cup qualifier against Brazil. The trouble unfolded a fortnight after scuffles between fans of Brazilian side Fluminense and Argentine club Boca Juniors before the Copa Libertadores final - the South American equivalent of the Champions League final - which was also staged at the Maracana. The five-time World Cup winners have now lost three successive qualifiers to sit sixth in the South American qualifying table, eight points behind leaders Argentina and in the last spot that guarantees a place at the 2026 finals.', 3)

Enter the number of the sport category you are interested in (type 'exit' to quit): exit
Goodbye! Have a great day.
