In [4]:
import psycopg2
from psycopg2 import connect
from dotenv import load_dotenv
from os import environ
import pandas as pd
import textblob
from textblob import TextBlob
import plotly.graph_objects as go
import numpy as np
import streamlit
import json
from requests import get
import html

from plotly.offline import init_notebook_mode, iplot

In [5]:
def get_db_connection():
    """Returns a database connection."""
    load_dotenv()
    return psycopg2.connect(
        host=environ["DB_HOST"],
        port=environ["DB_PORT"],
        database=environ["DB_NAME"],
        user=environ["DB_USER"],
        password=environ["DB_PASSWORD"]
        )


In [6]:
def load_stories_data() -> pd.DataFrame:
    """Loads re with greatest score change over last 24hrs from RDS.
    Returns them as a Dataframe object."""
    query = """
        SELECT stories.story_id, 
                stories.title, 
                stories.author,
                stories.story_url,
                records.comments, 
                records.record_id, 
                stories.topic_id,
                records.score
        FROM records
        LEFT JOIN stories ON records.story_id = stories.story_id
        WHERE record_time >= NOW() - INTERVAL '1 hours'
        ;
        """
    return pd.read_sql(query, con=get_db_connection())

In [7]:
load_dotenv()
df = load_stories_data()
init_notebook_mode(connected=True)     



pandas only supports SQLAlchemy connectable (engine/connection) or database string URI or sqlite3 DBAPI2 connection. Other DBAPI2 objects are not tested. Please consider using SQLAlchemy.



## Retrieve comment string


In [8]:
BASE_URL = "https://hacker-news.firebaseio.com/v0/item/"

def get_comment_ids(story: int) -> list:
    """Returns the id of comments from given story on Hacker News."""
    story_info = get(BASE_URL + f"{story}.json", timeout=30).json()
    comment_ids = story_info.get("kids")
    return comment_ids


def get_comments(comment: int) -> str:
    """Returns the text from a given comment."""

    comment_info = get(BASE_URL + f"{comment}.json", timeout=30).json()
    comment = comment_info.get("text")
    return comment

### Sentiment analysis for a chosen story

In [13]:
def get_parent_comment_ids(story: int) -> list:
    """Returns the id of comments from given story on Hacker News."""
    story_info = get(BASE_URL + f"{story}.json", timeout=30).json()
    comment_ids = story_info.get("kids")
    return comment_ids


def get_comment_text(comment: int) -> str:
    """Returns the text from a given comment."""

    comment_info = get(BASE_URL + f"{comment}.json", timeout=30).json()
    comment = comment_info.get("text")
    return comment


def get_comment_list(story: int):
    comments = get_parent_comment_ids(story)
    comment_list = [get_comment_text(comment) for comment in comments]
    return comment_list

def get_story_sentiment(story_id: str):
    comment_list = get_comment_list(int(story_id))
    sentiment_list = []
    for comment in comment_list:
        html.unescape(str(comment))
        blob = TextBlob(str(comment))

        for sentence in blob.sentences:
            sentiment = sentence.sentiment.polarity
        sentiment_list.append({'comment': comment,
        'sentiment': sentiment})
    sentiment_mean = []
    for i in range(0,20):
        sentiment_mean.append(sentiment_list[i]['sentiment'])
    average = sum(sentiment_mean)/len(sentiment_mean)
    return average


sentiment = get_story_sentiment(38865518)
print(sentiment)

[{'comment': 'Powerlight technologies(formerly LaserMotive) has demonstrated power over fiber tech that can transmit hundreds of watts[0][1]. The intended application of this tech was for powering underwater remotely operated vehicles(ROV). The same amount of power could be transmitted with a thinner fiberoptic cable than a copper cable, so it would encumber the ROV less. Although other niche applications like powering electronics in regions with EMP or near MRI&#x27;s were suggested. Powerlight&#x27;s linkedin currently shows them powering an inflatable christmas decoration with power over fiber.<p>Some of the people at Powerlight found this tech to be ironic because Powerlight was originally founded as a wireless power beaming company. However, some of their customers asked if they could transmit power via wires, so that&#x27;s what they did.<p>[0]<a href="https:&#x2F;&#x2F;powerlighttech.com&#x2F;power-over-fiber&#x2F;" rel="nofollow">https:&#x2F;&#x2F;powerlighttech.com&#x2F;power-

In [15]:
sentiment_of_overall_response = get_story_sentiment()

def categorise_sentiment():
    if -sentiment_value: int1 <= va< -0.6:
        category_color = "#FF0000"
        category_label = "Very Negative 😠"
    elif -0.6 <= value < -0.3:
        category_color = "#FFA500"
        category_label = "Negative 🙁"
    elif -0.3 <= value < 0.3:
        category_color = "#FFFF00"
        category_label = "Neutral 😐"
    elif 0.3 <= value < 0.6:
        category_color = "#8BFF00"
        category_label = "Positive 🙂"
    else:
        category_color = "#00FF00"
        category_label = "Very Positive 😄"
        
    return {'colour': category_color, 'label': category_label}

SyntaxError: invalid syntax (3920988875.py, line 2)

In [60]:
def generate_sentiment_gauge(value: int):
    category =categorisecategorise_sentiment(value)

    fig = go.Figure(go.Indicator(
        mode="gauge+number",
        value=value,
        title={'text': f"Overall Sentiment: {category.get('label')}"},
        gauge={'axis': {'range': [-1, 1]},
            'bar': {'color': category.get('colour')},
            }))
    
    return fig

value = get_story_sentiment('38865518')
fig = generate_sentiment_gauge(value)
fig

ValueError: Mime type rendering requires nbformat>=4.2.0 but it is not installed

## Comment's per hour speedometer
https://stackoverflow.com/questions/64462790/how-to-plot-plotly-gauge-charts-next-to-each-other-with-python

In [56]:
# select all records for last 3 hours
# where story_id = int(input_story_id)
# order by record time
# desc limit 2


def load_last_two_records(input_story_id: str) -> pd.DataFrame:
    """Loads records from the last 2 hours for a chosen story.
    Returns them as a Dataframe object."""
    query = f"""
        SELECT
            records.story_id,
            MAX(records.comments) - MIN(records.comments) AS comments_count_change,
            stories.title,
            records.record_time
            FROM records
            JOIN stories ON records.story_id = stories.story_id
            WHERE records.story_id = {int(input_story_id)} 
            GROUP BY records.story_id, stories.title, records.record_time
            ORDER BY records.record_time
            DESC LIMIT 2
        ;
        """
    return pd.read_sql(query, con=get_db_connection())

In [57]:
# new guage with hourly increase in the middle
data = load_last_two_records('38865518')
data
# change_load_last_two_records_for_story_id


pandas only supports SQLAlchemy connectable (engine/connection) or database string URI or sqlite3 DBAPI2 connection. Other DBAPI2 objects are not tested. Please consider using SQLAlchemy.



Unnamed: 0,story_id,comments_count_change,title,record_time
0,38865518,0,Power over fiber,2024-01-07 10:03:10.114368
1,38865518,0,Power over fiber,2024-01-07 09:03:15.092878


In [None]:
value = get_story_sentiment(38865518)
value = -0.8

# Determine the category and corresponding color
if -1 <= value < -0.6:
    category_color = "#FF0000"
    category_label = "Very Negative"
elif -0.6 <= value < -0.3:
    category_color = "#FFA500"
    category_label = "Negative"
elif -0.3 <= value < 0.3:
    category_color = "#FFFF00"
    category_label = "Neutral"
elif 0.3 <= value < 0.6:
    category_color = "#8BFF00"
    category_label = "Positive"
else:
    category_color = "#00FF00"
    category_label = "Very Positive"

fig = go.Figure(go.Indicator(
    mode="gauge+number",
    value=value,
    title={'text': f"Overall Sentiment: {category_label}"},
    gauge={'axis': {'range': [-1, 1]},
           'bar': {'color': category_color},
           }
))

fig

## Find other possible visualisations.

Comments/Topic
- What comments generate the most discussion. / Talkie points / Spill the tea.