In [None]:
# Colab-specific installations
!pip install transformers sentencepiece pymongo bs4 requests python-dotenv
!pip install "pymongo[srv]"

import datetime
import requests
import os
import torch
from bs4 import BeautifulSoup
from pymongo.mongo_client import MongoClient
from pymongo.server_api import ServerApi
from dotenv import load_dotenv
from datetime import datetime, timedelta
from google.colab import drive

# Mount Google Drive to access or save files
drive.mount('/content/drive')

# Hugging Face transformers
from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM

# Check GPU availability
print("GPU Available:", torch.cuda.is_available())
print("GPU Device Name:", torch.cuda.get_device_name(0) if torch.cuda.is_available() else "No GPU")

# Load environment variables (you may need to create this file in your Google Drive)
load_dotenv('/content/drive/MyDrive/cred.env')

mongo_username = 'hiddenmongo_password = 'hidden

Collecting pymongo
  Downloading pymongo-4.11.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (22 kB)
Collecting bs4
  Downloading bs4-0.0.2-py2.py3-none-any.whl.metadata (411 bytes)
Collecting python-dotenv
  Downloading python_dotenv-1.0.1-py3-none-any.whl.metadata (23 kB)
Collecting dnspython<3.0.0,>=1.16.0 (from pymongo)
  Downloading dnspython-2.7.0-py3-none-any.whl.metadata (5.8 kB)
Downloading pymongo-4.11.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.4/1.4 MB[0m [31m20.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading bs4-0.0.2-py2.py3-none-any.whl (1.2 kB)
Downloading python_dotenv-1.0.1-py3-none-any.whl (19 kB)
Downloading dnspython-2.7.0-py3-none-any.whl (313 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m313.6/313.6 kB[0m [31m17.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: python-dotenv, dnspython, pymongo, bs4
Successf

In [None]:
# MongoDB setup
uri = 'mongodb+srv://hidden.km1fx.mongodb.net/?retryWrites=true&w=majority&appName=buildathon'

client = MongoClient(uri, server_api=ServerApi('1'))

# Send a ping to confirm a successful connection
try:
    client.admin.command('ping')
    print("Pinged your deployment. You successfully connected to MongoDB!")
except Exception as e:
    print(e)

db = client['news_scraper']


ConfigurationError: The DNS query name does not exist: _mongodb._tcp.buildathon.km1fx.mongodb.net.

In [None]:

# Use an open-source model for summarization, optimized for T4 GPU
model_name = "facebook/bart-large-cnn"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name).to('cuda')
summarizer = pipeline("summarization", model=model, tokenizer=tokenizer, device=0)  # Use GPU

# Define the custom prompt template
prompt_template = '''Summarize the following news article in a concise paragraph, focusing on the main highlights.
Remove any text not directly related to the article content. Ensure the summary includes the publishing date if available.
Article text: {article}'''

# Instantiate the sentiment analysis pipeline
sentiment_analyzer = pipeline("sentiment-analysis", device=0)  # Use GPU


In [None]:

# Function to get text from URL using BeautifulSoup
def get_text_from_url(url):
    response = requests.get(url)
    if response.status_code == 200:
        html = response.text
        soup = BeautifulSoup(html, 'html.parser')
        text = soup.get_text()
        return text.strip()
    else:
        print(f"Failed to fetch data from URL: {url}, status code: {response.status_code}")
        return None


In [None]:
# Define liquidity-related keywords
liquidity_keywords = [
    "liquidity", "cash flow", "debt", "financing", "credit",
    "solvency", "funding", "capital"
]

# Define critical event keywords
critical_event_keywords = [
    "bankruptcy", "lawsuit", "regulatory action", "recall", "data breach",
    "fraud", "scandal", "accident", "disaster", "layoffs"
]

# Initialize aggregation variables
all_sentiments = []
all_liquidity_impacts = []
all_critical_events = []
all_decisions = []
total_articles_analyzed = 0

def clean_text(primary_text):
    try:
        # Format the custom prompt with the article's primary text
        formatted_prompt = prompt_template.format(article=primary_text)

        input_length = len(formatted_prompt.split())
        calculated_max_length = int(input_length * 0.5)
        max_length = max(50, min(150, calculated_max_length))

        # Dynamically set min_length as 30% of max_length, ensuring it's reasonable
        min_length = max(30, int(max_length * 0.3))

        # Use the summarization pipeline
        summary = summarizer(
            formatted_prompt,
            max_length=24,
            min_length=1,
            do_sample=False
        )[0]['summary_text']

        # Perform sentiment analysis on the summary
        sentiment = sentiment_analyzer(primary_text)[0]

        return summary, sentiment
    except Exception as e:
        print(f"Error cleaning text: {e}")
        return None, None

# Function to forecast liquidity impact
def forecast_liquidity(summary, sentiment):
    summary_lower = summary.lower()
    liquidity_impact = "Neutral"

    if any(keyword in summary_lower for keyword in liquidity_keywords):
        if sentiment['label'] == 'NEGATIVE':
            liquidity_impact = "Negative Impact on Liquidity"
        elif sentiment['label'] == 'POSITIVE':
            liquidity_impact = "Positive Impact on Liquidity"
    return liquidity_impact

# Function to check for critical events
def check_critical_events(summary):
    summary_lower = summary.lower()
    critical_events = []

    for keyword in critical_event_keywords:
        if keyword in summary_lower:
            critical_events.append(keyword)

    return critical_events

# Function for decision support
def decision_support_system(sentiment, liquidity_impact, critical_events):
    decisions = []

    # Decision based on sentiment
    if sentiment['label'] == 'NEGATIVE' and sentiment['score'] > 0.7:
        decisions.append("Consider reducing exposure or monitoring closely due to negative sentiment.")
    elif sentiment['label'] == 'POSITIVE' and sentiment['score'] > 0.7:
        decisions.append("Positive outlook; potential opportunity to increase exposure.")

    # Decision based on liquidity impact
    if liquidity_impact == "Negative Impact on Liquidity":
        decisions.append("Potential liquidity issues detected; reassess financial stability.")
    elif liquidity_impact == "Positive Impact on Liquidity":
        decisions.append("Improved liquidity expected; may strengthen financial position.")

    # Decision based on critical events
    if critical_events:
        decisions.append(f"Critical events detected: {', '.join(critical_events)}. Immediate action may be required.")

    if not decisions:
        decisions.append("No immediate action required; maintain current position.")

    return decisions

user_input = input("Enter your search query: ")

# Define search strategies
search_strategies = {
    'raw_material_costs': f'{user_input} AND ("raw material" OR "supply chain" OR "input cost" OR "steel prices")',
    'laws_and_regulations': f'{user_input} AND ("emission laws" OR "regulations" OR "tax" OR "climate regulation")',
    'economic_factors': f'{user_input} AND ("financial report" OR "earnings" OR "profit" OR "loss")',
    'industry_events': f'{user_input} AND ("auto industry" OR "market downturn" OR "recession")',
    'climate_and_sustainability': f'{user_input} AND ("climate change" OR "carbon footprint" OR "sustainability initiatives" OR "EV investments")'
}


In [None]:
# Calculate the time range (from 1 month ago to now)
current_time = datetime.utcnow()
two_weeks_ago = current_time - timedelta(weeks=2)
time_str = two_weeks_ago.strftime('%Y-%m-%dT%H:%M:%S')

# Function to fetch news articles using an open RSS feed
def fetch_news(query):
    rss_url = f"https://news.google.com/rss/search?q={query}&hl=en-US&gl=US&ceid=US:en"
    response = requests.get(rss_url)
    if response.status_code == 200:
        soup = BeautifulSoup(response.content, features="xml")
        articles = soup.findAll('item')
        return articles
    else:
        print(f"Failed to fetch news for query: {query}")
        return []

# Main execution loop
for strategy_name, query in search_strategies.items():
    articles = fetch_news(query)

    for article in articles:
        # Extract relevant information from the RSS feed
        title = article.title.text
        link = article.link.text
        pub_date = article.pubDate.text

        # Get the text from the article's URL
        article_text = get_text_from_url(link)

        # Clean the text and get sentiment
        cleaned_article, sentiment = clean_text(article_text)

        if cleaned_article:
            # Forecast liquidity impact
            liquidity_impact = forecast_liquidity(title, sentiment)

            # Check for critical events
            critical_events = check_critical_events(title)

            # Get decision support recommendations
            decisions = decision_support_system(sentiment, liquidity_impact, critical_events)

            # Prepare the document to insert into MongoDB (optional)
            document = {
                'title': title,
                'publishedAt': pub_date,
                'source': article.source.text if article.source else 'Unknown',
                'cleaned_article': title,
                'sentiment': sentiment,
                'liquidity_impact': liquidity_impact,
                'critical_events': critical_events,
                'decisions': decisions
            }

            # Collect data for aggregation
            all_sentiments.append(sentiment)
            all_liquidity_impacts.append(liquidity_impact)
            all_critical_events.extend(critical_events)
            all_decisions.extend(decisions)
            total_articles_analyzed += 1

            # Optionally, insert the document into MongoDB
            db_res = db[strategy_name].insert_one(document)

            if db_res.acknowledged:
                print(f"Inserted article '{title}' into '{strategy_name}' collection.")
            else:
                print(f"Failed to insert article '{title}' into MongoDB.")
        else:
            print(f"Failed to clean article '{title}' for '{strategy_name}'.")


In [None]:
# After processing all articles, provide overall summary
print("\n=== Overall Analysis ===")

# Sentiment Analysis
positive_sentiments = [s for s in all_sentiments if s['label'] == 'POSITIVE']
negative_sentiments = [s for s in all_sentiments if s['label'] == 'NEGATIVE']
num_positive = len(positive_sentiments)
num_negative = len(negative_sentiments)
total_sentiments = num_positive + num_negative

print(f"Total articles analyzed: {total_articles_analyzed}")
print(f"Positive sentiments: {num_positive}")
print(f"Negative sentiments: {num_negative}")

# Liquidity Impact
positive_liquidity = all_liquidity_impacts.count("Positive Impact on Liquidity")
negative_liquidity = all_liquidity_impacts.count("Negative Impact on Liquidity")
neutral_liquidity = all_liquidity_impacts.count("Neutral")

print(f"\n=== Liquidity Impact Summary ===")
print(f"Positive Impact on Liquidity: {positive_liquidity}")
print(f"Negative Impact on Liquidity: {negative_liquidity}")
print(f"Neutral Liquidity Impact: {neutral_liquidity}")

# Critical Events
unique_critical_events = set(all_critical_events)
print(f"\n=== Critical Events Detected ===")
if unique_critical_events:
    for event in unique_critical_events:
        print(f"- {event}")
else:
    print("No critical events detected.")

# Decisions
from collections import Counter
decision_counts = Counter(all_decisions)
print("\n=== Decision Recommendations ===")
for decision, count in decision_counts.items():
    print(f"- {decision}: {count} occurrences")

# Prepare the overall analysis document
overall_analysis = {
    'query': user_input,
    'timestamp': datetime.utcnow(),
    'total_articles_analyzed': total_articles_analyzed,
    'sentiment_summary': {
        'positive': num_positive,
        'negative': num_negative,
        'total': total_sentiments
    },
    'liquidity_impact_summary': {
        'positive': positive_liquidity,
        'negative': negative_liquidity,
        'neutral': neutral_liquidity
    },
    'critical_events_detected': list(unique_critical_events),
    'decision_recommendations': dict(decision_counts)
}

# Insert the overall analysis into MongoDB
db_res = db['overall_analysis'].insert_one(overall_analysis)

if db_res.acknowledged:
    print("Overall analysis successfully inserted into MongoDB.")
else:
    print("Failed to insert overall analysis into MongoDB.")