In [None]:
!pip install flask pandas transformers nltk newsapi-python plotly pyngrok


Collecting newsapi-python
  Downloading newsapi_python-0.2.7-py2.py3-none-any.whl (7.9 kB)
Collecting pyngrok
  Downloading pyngrok-7.1.6-py3-none-any.whl (22 kB)
Installing collected packages: pyngrok, newsapi-python
Successfully installed newsapi-python-0.2.7 pyngrok-7.1.6


In [None]:
import os


In [None]:
os.makedirs('templates', exist_ok=True)
os.makedirs('static', exist_ok=True)

In [None]:
# Write the app.py file
app_code = """

from flask import Flask, request, render_template, jsonify
import pandas as pd
from transformers import BertTokenizer, BertForSequenceClassification, pipeline
import nltk
from nltk.tokenize import sent_tokenize
from newsapi import NewsApiClient
import plotly.express as px
import plotly.io as pio
from pyngrok import ngrok

# Initialize the Flask application
app = Flask(__name__)

# Initialize the News API client
api_key = 'your_actual_newsapi_key'  # Replace with your actual NewsAPI key
newsapi = NewsApiClient(api_key=api_key)

# Download NLTK sentence tokenizer
nltk.download('punkt')

# Load FinBERT model by Prosus AI
model_name = 'ProsusAI/finbert'
tokenizer = BertTokenizer.from_pretrained(model_name)
model = BertForSequenceClassification.from_pretrained(model_name)
nlp = pipeline('sentiment-analysis', model=model, tokenizer=tokenizer)

def get_news_for_keyword(keyword, start_date, end_date):
    query = f'"{keyword}"'  # Exact search
    articles = []
    try:
        all_articles = newsapi.get_everything(q=query,
                                              from_param=start_date,
                                              to=end_date,
                                              language='en',
                                              sort_by='relevancy')
        for article in all_articles['articles']:
            news = {
                'title': article['title'],
                'date': article['publishedAt'],
                'snippet': article['description'],
                'link': article['url'],
                'keyword': keyword
            }
            articles.append(news)
    except Exception as e:
        print(f"Failed to retrieve news for {keyword}: {e}")
    return articles

def analyze_sentiment(text):
    if isinstance(text, str):  # Ensure the text is a string
        sentences = sent_tokenize(text)
        results = []
        for sentence in sentences:
            sentiment = nlp(sentence)
            results.append(sentiment[0])  # Append the first result (since it's a list)
        return results
    else:
        return []  # Return an empty list if the text is not a string

def aggregate_sentiments(sentiments):
    positive, neutral, negative = 0, 0, 0
    positive_conf, neutral_conf, negative_conf = 0.0, 0.0, 0.0

    for sentiment in sentiments:
        label = sentiment['label']
        score = sentiment['score']
        if label == 'positive':
            positive += 1
            positive_conf += score
        elif label == 'neutral':
            neutral += 1
            neutral_conf += score
        elif label == 'negative':
            negative += 1
            negative_conf += score

    total = positive + neutral + negative

    if total > 0:
        return {
            'positive': positive / total,
            'neutral': neutral / total,
            'negative': negative / total,
            'positive_confidence': positive_conf / positive if positive > 0 else 0,
            'neutral_confidence': neutral_conf / neutral if neutral > 0 else 0,
            'negative_confidence': negative_conf / negative if negative > 0 else 0
        }
    else:
        return {
            'positive': 0,
            'neutral': 0,
            'negative': 0,
            'positive_confidence': 0,
            'neutral_confidence': 0,
            'negative_confidence': 0
        }

@app.route('/')
def index():
    return render_template('index.html')

@app.route('/search', methods=['POST'])
def search():
    data = request.json
    keyword = data['keyword']
    start_date = data['start_date']
    end_date = data['end_date']
    articles = get_news_for_keyword(keyword, start_date, end_date)

    if articles:
        df = pd.DataFrame(articles)
        df['sentences_sentiment'] = df['snippet'].apply(analyze_sentiment)
        df['aggregated_sentiment'] = df['sentences_sentiment'].apply(aggregate_sentiments)

        # Get top 5 positive sentiment articles
        df['positive_confidence'] = df['aggregated_sentiment'].apply(lambda x: x['positive_confidence'])
        top_positive = df.nlargest(5, 'positive_confidence').to_dict(orient='records')
        for article in top_positive:
            article['score'] = article['positive_confidence']
            article['sign'] = 'positive'

        # Get top 5 negative sentiment articles
        df['negative_confidence'] = df['aggregated_sentiment'].apply(lambda x: x['negative_confidence'])
        top_negative = df.nlargest(5, 'negative_confidence').to_dict(orient='records')
        for article in top_negative:
            article['score'] = article['negative_confidence']
            article['sign'] = 'negative'

        # Generate historical sentiment graph
        df['date'] = pd.to_datetime(df['date'])
        sentiment_over_time = df.set_index('date')['aggregated_sentiment'].apply(pd.Series).resample('D').mean().fillna(0)
        fig = px.line(sentiment_over_time, x=sentiment_over_time.index, y=['positive', 'neutral', 'negative'], title='Sentiment Over Time')
        graph_html = pio.to_html(fig, full_html=False)

        return jsonify({
            'top_positive': top_positive,
            'top_negative': top_negative,
            'graph_html': graph_html
        })
    else:
        return jsonify({
            'error': 'No news articles found.'
        })

if __name__ == '__main__':
    # Set up ngrok
    public_url = ngrok.connect(5000)
    print(f' * ngrok tunnel "http://127.0.0.1:5000" -> "{public_url}"')

    app.run()


"""

with open('app.py', 'w') as f:
    f.write(app_code)

In [None]:
# Write the index.html file
index_html_code = """

<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Sentiment Analysis</title>
    <link rel="stylesheet" href="{{ url_for('static', filename='styles.css') }}">
    <script src="https://cdn.plot.ly/plotly-latest.min.js"></script>
</head>
<body>
    <div class="container">
        <h1>Sentiment Analysis</h1>
        <form id="search-form">
            <input type="text" id="keyword" name="keyword" placeholder="Enter company name or keyword">
            <input type="date" id="start_date" name="start_date">
            <input type="date" id="end_date" name="end_date">
            <button type="submit">Search</button>
        </form>
        <div id="results">
            <h2>Top 5 Positive Sentiment Articles</h2>
            <div id="top-positive"></div>
            <h2>Top 5 Negative Sentiment Articles</h2>
            <div id="top-negative"></div>
            <h2>Sentiment Over Time</h2>
            <div id="sentiment-graph"></div>
        </div>
    </div>
    <script>
        document.getElementById('search-form').addEventListener('submit', function(e) {
            e.preventDefault();
            const keyword = document.getElementById('keyword').value;
            const startDate = document.getElementById('start_date').value;
            const endDate = document.getElementById('end_date').value;
            fetch('/search', {
                method: 'POST',
                headers: {
                    'Content-Type': 'application/json'
                },
                body: JSON.stringify({ keyword: keyword, start_date: startDate, end_date: endDate })
            })
            .then(response => response.json())
            .then(data => {
                if (data.error) {
                    alert(data.error);
                } else {
                    // Display top positive articles
                    const topPositive = document.getElementById('top-positive');
                    topPositive.innerHTML = '';
                    data.top_positive.forEach(article => {
                        topPositive.innerHTML += `
                            <div class="article">
                                <h3>${article.title} <span style="color: green;">&#x2714;</span></h3>
                                <p>${article.date}</p>
                                <p>${article.snippet}</p>
                                <p>Sentiment Score: ${article.score}</p>
                                <a href="${article.link}" target="_blank">Read more</a>
                            </div>
                        `;
                    });

                    // Display top negative articles
                    const topNegative = document.getElementById('top-negative');
                    topNegative.innerHTML = '';
                    data.top_negative.forEach(article => {
                        topNegative.innerHTML += `
                            <div class="article">
                                <h3>${article.title} <span style="color: red;">&#x2716;</span></h3>
                                <p>${article.date}</p>
                                <p>${article.snippet}</p>
                                <p>Sentiment Score: ${article.score}</p>
                                <a href="${article.link}" target="_blank">Read more</a>
                            </div>
                        `;
                    });

                    // Display sentiment graph
                    const sentimentGraph = document.getElementById('sentiment-graph');
                    sentimentGraph.innerHTML = data.graph_html;
                }
            })
            .catch(error => console.error('Error:', error));
        });

        // Google search recommendations
        document.getElementById('keyword').addEventListener('input', function() {
            const query = this.value;
            if (query.length > 2) {
                fetch(`https://suggestqueries.google.com/complete/search?client=firefox&q=${query}`)
                .then(response => response.json())
                .then(data => {
                    const suggestions = data[1];
                    let dropdown = document.createElement('div');
                    dropdown.id = 'suggestions';
                    suggestions.forEach(suggestion => {
                        let item = document.createElement('div');
                        item.className = 'suggestion-item';
                        item.innerHTML = suggestion;
                        item.addEventListener('click', function() {
                            document.getElementById('keyword').value = suggestion;
                            dropdown.remove();
                        });
                        dropdown.appendChild(item);
                    });
                    document.getElementById('keyword').parentNode.appendChild(dropdown);
                })
                .catch(error => console.error('Error:', error));
            }
        });
    </script>
</body>
</html>


"""

with open('templates/index.html', 'w') as f:
    f.write(index_html_code)


In [None]:
# Write the styles.css file
styles_css_code = """
body {
    font-family: Arial, sans-serif;
    background-color: #f4f4f4;
    color: #333;
}

.container {
    width: 80%;
    margin: 0 auto;
    padding: 20px;
    background-color: #fff;
    border-radius: 8px;
    box-shadow: 0 0 10px rgba(0, 0, 0, 0.1);
}

h1, h2 {
    text-align: center;
}

form {
    display: flex;
    justify-content: center;
    margin-bottom: 20px;
}

input[type="text"] {
    width: 50%;
    padding: 10px;
    margin-right: 10px;
    border-radius: 4px;
    border: 1px solid #ccc;
}

button {
    padding: 10px 20px;
    border: none;
    border-radius: 4px;
    background-color: #5cb85c;
    color: #fff;
    cursor: pointer;
}

button:hover {
    background-color: #4cae4c;
}

#results {
    margin-top: 20px;
}

.article {
    margin-bottom: 20px;
    padding: 10px;
    border: 1px solid #ddd;
    border-radius: 4px;
    background-color: #f9f9f9;
}

.article h3 {
    margin-top: 0;
}

.article p {
    margin: 5px 0;
}

.article a {
    color: #337ab7;
}

.article a:hover {
    text-decoration: underline;
}
"""

with open('static/styles.css', 'w') as f:
    f.write(styles_css_code)


In [None]:
!ngrok authtoken 2iUw4PqBtMWS8y0U2BhlmhXVxt5_4jsngK9CVZXgXgGKx8YAM

Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml


In [None]:
from flask import Flask, request, render_template, jsonify
import pandas as pd
from transformers import BertTokenizer, BertForSequenceClassification, pipeline
import nltk
from nltk.tokenize import sent_tokenize
from newsapi import NewsApiClient
import plotly.express as px
import plotly.io as pio
from pyngrok import ngrok

# Initialize the Flask application
app = Flask(__name__)

# Initialize the News API client
api_key = '13b3ec197b004442aa55917e535135bd'  # Replace with your actual NewsAPI key
newsapi = NewsApiClient(api_key=api_key)

# Download NLTK sentence tokenizer
nltk.download('punkt')

# Load FinBERT model by Prosus AI
model_name = 'ProsusAI/finbert'
tokenizer = BertTokenizer.from_pretrained(model_name)
model = BertForSequenceClassification.from_pretrained(model_name)
nlp = pipeline('sentiment-analysis', model=model, tokenizer=tokenizer)

def get_news_for_keyword(keyword):
    query = f'"{keyword}"'  # Exact search
    articles = []
    try:
        all_articles = newsapi.get_everything(q=query,
                                              language='en',
                                              sort_by='relevancy')
        for article in all_articles['articles']:
            news = {
                'title': article['title'],
                'date': article['publishedAt'],
                'snippet': article['description'],
                'link': article['url'],
                'keyword': keyword
            }
            articles.append(news)
    except Exception as e:
        print(f"Failed to retrieve news for {keyword}: {e}")
    return articles

def analyze_sentiment(text):
    if isinstance(text, str):  # Ensure the text is a string
        sentences = sent_tokenize(text)
        results = []
        for sentence in sentences:
            sentiment = nlp(sentence)
            results.append(sentiment[0])  # Append the first result (since it's a list)
        return results
    else:
        return []  # Return an empty list if the text is not a string

def aggregate_sentiments(sentiments):
    positive, neutral, negative = 0, 0, 0
    positive_conf, neutral_conf, negative_conf = 0.0, 0.0, 0.0

    for sentiment in sentiments:
        label = sentiment['label']
        score = sentiment['score']
        if label == 'positive':
            positive += 1
            positive_conf += score
        elif label == 'neutral':
            neutral += 1
            neutral_conf += score
        elif label == 'negative':
            negative += 1
            negative_conf += score

    total = positive + neutral + negative

    if total > 0:
        return {
            'positive': positive / total,
            'neutral': neutral / total,
            'negative': negative / total,
            'positive_confidence': positive_conf / positive if positive > 0 else 0,
            'neutral_confidence': neutral_conf / neutral if neutral > 0 else 0,
            'negative_confidence': negative_conf / negative if negative > 0 else 0
        }
    else:
        return {
            'positive': 0,
            'neutral': 0,
            'negative': 0,
            'positive_confidence': 0,
            'neutral_confidence': 0,
            'negative_confidence': 0
        }

@app.route('/')
def index():
    return render_template('index.html')

@app.route('/search', methods=['POST'])
def search():
    keyword = request.json['keyword']
    articles = get_news_for_keyword(keyword)

    if articles:
        df = pd.DataFrame(articles)
        df['sentences_sentiment'] = df['snippet'].apply(analyze_sentiment)
        df['aggregated_sentiment'] = df['sentences_sentiment'].apply(aggregate_sentiments)

        # Get top 5 positive sentiment articles
        df['positive_confidence'] = df['aggregated_sentiment'].apply(lambda x: x['positive_confidence'])
        top_positive = df.nlargest(5, 'positive_confidence')

        # Get top 5 negative sentiment articles
        df['negative_confidence'] = df['aggregated_sentiment'].apply(lambda x: x['negative_confidence'])
        top_negative = df.nlargest(5, 'negative_confidence')

        # Generate historical sentiment graph
        df['date'] = pd.to_datetime(df['date'])
        sentiment_over_time = df.set_index('date')['aggregated_sentiment'].apply(pd.Series).resample('D').mean().fillna(0)
        fig = px.line(sentiment_over_time, x=sentiment_over_time.index, y=['positive', 'neutral', 'negative'], title='Sentiment Over Time')
        graph_html = pio.to_html(fig, full_html=False)

        return jsonify({
            'top_positive': top_positive.to_dict(orient='records'),
            'top_negative': top_negative.to_dict(orient='records'),
            'graph_html': graph_html
        })
    else:
        return jsonify({
            'error': 'No news articles found.'
        })

if __name__ == '__main__':
    # Set up ngrok
    public_url = ngrok.connect(5000)
    print(f' * ngrok tunnel "http://127.0.0.1:5000" -> "{public_url}"')

    app.run()


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/252 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]



config.json:   0%|          | 0.00/758 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/438M [00:00<?, ?B/s]

 * ngrok tunnel "http://127.0.0.1:5000" -> "NgrokTunnel: "https://7a28-34-72-148-191.ngrok-free.app" -> "http://localhost:5000""
 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:5000
INFO:werkzeug:[33mPress CTRL+C to quit[0m
INFO:werkzeug:127.0.0.1 - - [10/Jul/2024 08:34:27] "GET / HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [10/Jul/2024 08:34:28] "GET /static/styles.css HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [10/Jul/2024 08:34:29] "[33mGET /favicon.ico HTTP/1.1[0m" 404 -
INFO:werkzeug:127.0.0.1 - - [10/Jul/2024 08:37:34] "POST /search HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [10/Jul/2024 08:37:35] "POST /search HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [10/Jul/2024 08:37:36] "POST /search HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [10/Jul/2024 08:37:51] "POST /search HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [10/Jul/2024 08:37:51] "POST /search HTTP/1.1" 200 -
