In [244]:
"""
Defines a DynamoDB table containing Reddit comment data and methods to interact with that table.
"""
from decimal import Decimal
from botocore.exceptions import ClientError
from boto3.dynamodb.conditions import Key

class Comment:
    """
    Encapsulates a DynamoDB table of comment data.
    """

    def __init__(self,
                 dyn_resource):
        """
        Args:
            dyn_resource: A Boto3 DynamoDB resource.
        """

        self.dyn_resource = dyn_resource
        # Table variable is set during call to exists.
        self.table = None


    def exists(self, table_name: str) -> bool:
        """
        Determines whether or not a table exsits. If the table exists, stores it as instance
        variable defining table to be used.

        Args:
            table_name: The name of the table to check.

        Returns:
            True when the table exists, False otherwise.
        """

        try:
            table = self.dyn_resource.Table(table_name)
            table.load()
            exists = True
        except ClientError as err:
            if err.response["Error"]["Code"] == "ResourceNotFoundException":
                exists = False
            else:
                print(f"Couldnt check for existence: {err.response['Error']['Code']}, \
                      {err.response['Error']['Message']}")


        self.table = table

        return exists



    def add_comment(self, data: dict):
        """
        Adds a comment record to the table.
        
        Args:
            data: Json data containing comment information
        """

        try:
            self.table.put_item(
                Item={
                    'match_ID_timestamp': data['match_keywords'] + '_' + str(data['timestamp']),
                    'sentiment_id': data['label'],
                    'sentiment_score': Decimal(data['score']),
                    'id': data['id'],
                    'name': data['name'],
                    'author': data['author'],
                    'body': data['body'],
                    'upvotes': data['upvotes'],
                    'downvotes': data['downvotes'],
                    'timestamp': int(data['timestamp']),
                }
            )

        except ClientError as err:
            print(f"Couldnt add comment to table: {err.response['Error']['Code']}, \
                      {err.response['Error']['Message']}")
            

    def query_comments(self, team_name):
        """
        Queries for comments with a specific match id and date key.

        Args:
        match_id_date: The match id and date to query
        
        Returns:
        The list of comments with the specific match id and date.
        """

        try:
            response = self.table.query(KeyConditionExpression=Key("team_name")
                                        .eq(team_name))
        except ClientError as err:
            print(f"Couldnt query for comments with {match_id_date}: \
                  {err.response['Error']['Code']}, {err.response['Error']['Message']}")
        else:
            return response["Items"]


Connect to the database and query a sample set of data

In [245]:
import boto3

dyn_resource = boto3.resource('dynamodb')
comment_table = Comment(dyn_resource=dyn_resource)
comment_table.exists('soccer_comment_data')

True

In [246]:
raw_data = comment_table.query_comments(team_name='liverpool')

In [247]:
import pandas as pd

df = pd.DataFrame(raw_data)

df.head(5)

Unnamed: 0,sentiment_id,upvotes,team_name,downvotes,sentiment_score,subreddit,timestamp,id,comment_id_timestamp,name,author,body
0,neutral,1,liverpool,0,0.9116109013557434,LiverpoolFC,1723678134,li5mpvx,li5mpvx1723678134,t1_li5mpvx,themanebeat,"Rush played for 6 seasons in the ""prem era"""
1,positive,1,liverpool,0,0.6161769032478333,LiverpoolFC,1723678161,li5msju,li5msju1723678161,t1_li5msju,IggiePopp,Suarez. It wasn't pretty but it was relentless...
2,positive,1,liverpool,0,0.6157285571098327,LiverpoolFC,1723678175,li5mtyk,li5mtyk1723678175,t1_li5mtyk,Vikilinho,Yes! Straight forward answer. We came third la...
3,neutral,1,liverpool,0,0.8521590828895568,soccer,1723678192,li5mvpu,li5mvpu1723678192,t1_li5mvpu,techaansi,Saving for what exactly?
4,negative,1,liverpool,0,0.4987541437149047,LiverpoolFC,1723678457,li5nmaf,li5nmaf1723678457,t1_li5nmaf,CIAinformer2,That comment is recovering from a lot of downv...


In [256]:
import datetime

df['date'] = (pd.to_datetime(df['timestamp'].astype(int), unit='s').dt.tz_localize('UTC')
                      .dt.tz_convert('US/Pacific').dt.floor('Min').dt.tz_localize(None))

df_count = (df.groupby(['date', 'sentiment_id'], as_index=False)['id'].count()
            .rename(columns={'id': 'count'}))

df

Unnamed: 0,sentiment_id,upvotes,team_name,downvotes,sentiment_score,subreddit,timestamp,id,comment_id_timestamp,name,author,body,date
0,neutral,1,liverpool,0,0,LiverpoolFC,1723678134,li5mpvx,li5mpvx1723678134,t1_li5mpvx,themanebeat,"Rush played for 6 seasons in the ""prem era""",2024-08-14 16:28:00
1,positive,1,liverpool,0,1,LiverpoolFC,1723678161,li5msju,li5msju1723678161,t1_li5msju,IggiePopp,Suarez. It wasn't pretty but it was relentless...,2024-08-14 16:29:00
2,positive,1,liverpool,0,1,LiverpoolFC,1723678175,li5mtyk,li5mtyk1723678175,t1_li5mtyk,Vikilinho,Yes! Straight forward answer. We came third la...,2024-08-14 16:29:00
3,neutral,1,liverpool,0,0,soccer,1723678192,li5mvpu,li5mvpu1723678192,t1_li5mvpu,techaansi,Saving for what exactly?,2024-08-14 16:29:00
4,negative,1,liverpool,0,-1,LiverpoolFC,1723678457,li5nmaf,li5nmaf1723678457,t1_li5nmaf,CIAinformer2,That comment is recovering from a lot of downv...,2024-08-14 16:34:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...
360,neutral,1,liverpool,0,0,LiverpoolFC,1723763827,libkaar,libkaar1723763827,t1_libkaar,Specific-Record2866,It’ll be Szobo or Harvey. They’re both fightin...,2024-08-15 16:17:00
361,negative,1,liverpool,0,-1,LiverpoolFC,1723763830,libkamt,libkamt1723763830,t1_libkamt,WonderfulBlackberry9,Half the sub would like to think otherwise,2024-08-15 16:17:00
362,neutral,1,liverpool,0,0,LiverpoolFC,1723763836,libkb7a,libkb7a1723763836,t1_libkb7a,LiverpoolFC-ModTeam,Removed: No Reposts and Don't Post Derivatives...,2024-08-15 16:17:00
363,neutral,1,liverpool,0,0,LiverpoolFC,1723763852,libkcr0,libkcr01723763852,t1_libkcr0,b13_git2,[source ](https://www.instagram.com/reel/C-sx4...,2024-08-15 16:17:00


In [257]:
df_prop = df.groupby('date', as_index=False)['sentiment_id'].value_counts(normalize=True)

Line plot

In [258]:
import plotly.express as px

fig = px.line(df_count,
                  x='date',
                  y='count',
                  color='sentiment_id',
                  template='simple_white',
                  color_discrete_map={'positive': 'red',
                                      'neutral': 'grey',
                                      'negative': 'blue'})

fig.show()

Pie chart:

In [259]:
df_pie = pd.DataFrame(df['sentiment_id'].value_counts(normalize=True)).reset_index()

df_pie

Unnamed: 0,sentiment_id,proportion
0,neutral,0.484932
1,negative,0.320548
2,positive,0.194521


In [267]:
fig = px.pie(df_pie,
             values='proportion',
             names='sentiment_id',
             template='simple_white',
             color='sentiment_id',
             color_discrete_map = {'positive': '#2ca02c', 
                                        'negative': '#d62728', 
                                        'neutral': '#7f7f7f'})

fig.update_traces(textposition='inside', textinfo='percent+label')
fig.update_layout(showlegend=False)

fig.show()

Line Plot with Sentiment Score

In [263]:
df['sentiment_score'] = df['sentiment_id'].map({'positive': 1, 'negative': -1, 'neutral': 0})

df_count_2 = (df.groupby(['date'], as_index=False)['sentiment_score'].sum())

In [264]:
fig = px.line(df_count_2,
            x='date',
            y='sentiment_score',
            template='simple_white',
            )
fig.show()