In [123]:
"""
Defines a DynamoDB table containing Reddit comment data and methods to interact with that table.
"""
from decimal import Decimal
from botocore.exceptions import ClientError
from boto3.dynamodb.conditions import Key

class Comment:
    """
    Encapsulates a DynamoDB table of comment data.
    """

    def __init__(self,
                 dyn_resource):
        """
        Args:
            dyn_resource: A Boto3 DynamoDB resource.
        """

        self.dyn_resource = dyn_resource
        # Table variable is set during call to exists.
        self.table = None


    def exists(self, table_name: str) -> bool:
        """
        Determines whether or not a table exsits. If the table exists, stores it as instance
        variable defining table to be used.

        Args:
            table_name: The name of the table to check.

        Returns:
            True when the table exists, False otherwise.
        """

        try:
            table = self.dyn_resource.Table(table_name)
            table.load()
            exists = True
        except ClientError as err:
            if err.response["Error"]["Code"] == "ResourceNotFoundException":
                exists = False
            else:
                print(f"Couldnt check for existence: {err.response['Error']['Code']}, \
                      {err.response['Error']['Message']}")


        self.table = table

        return exists



    def add_comment(self, data: dict):
        """
        Adds a comment record to the table.
        
        Args:
            data: Json data containing comment information
        """

        try:
            self.table.put_item(
                Item={
                    'match_ID_timestamp': data['match_keywords'] + '_' + str(data['timestamp']),
                    'sentiment_id': data['label'],
                    'sentiment_score': Decimal(data['score']),
                    'id': data['id'],
                    'name': data['name'],
                    'author': data['author'],
                    'body': data['body'],
                    'upvotes': data['upvotes'],
                    'downvotes': data['downvotes'],
                    'timestamp': int(data['timestamp']),
                }
            )

        except ClientError as err:
            print(f"Couldnt add comment to table: {err.response['Error']['Code']}, \
                      {err.response['Error']['Message']}")
            

    def query_comments(self, team_name):
        """
        Queries for comments with a specific match id and date key.

        Args:
        match_id_date: The match id and date to query
        
        Returns:
        The list of comments with the specific match id and date.
        """

        try:
            response = self.table.query(KeyConditionExpression=Key("match_id_date")
                                        .eq(match_id_date))
        except ClientError as err:
            print(f"Couldnt query for comments with {match_id_date}: \
                  {err.response['Error']['Code']}, {err.response['Error']['Message']}")
        else:
            return response["Items"]


In [125]:
import boto3

dyn_resource = boto3.resource('dynamodb')
comment_table = Comment(dyn_resource=dyn_resource)
comment_table.exists('soccer_comment_data')

True

In [126]:
raw_data = comment_table.query_comments(team_name='liverpool')

TypeError: Comment.query_comments() got an unexpected keyword argument 'team_name'

In [91]:
import pandas as pd

df = pd.DataFrame(raw_data)

In [92]:
df['date'] = pd.to_datetime(df['timestamp'].astype(int), unit='s').dt.floor('Min')

df

Unnamed: 0,sentiment_id,upvotes,downvotes,sentiment_score,timestamp,id,comment_id_timestamp,name,match_id_date,author,body,date
0,positive,1,0,0.665999114513397216796875,1723586699,lhz95jq,lhz95jq1723586699,t1_lhz95jq,1_2024-08-13,Relevant_Western3464,Interesting.,2024-08-13 22:04:00
1,negative,1,0,0.8972609043121337890625,1723586734,lhz996i,lhz996i1723586734,t1_lhz996i,1_2024-08-13,edi12334,2 goals conceded in 3 minutes too but apparent...,2024-08-13 22:05:00
2,neutral,1,0,0.692617475986480712890625,1723586736,lhz99eb,lhz99eb1723586736,t1_lhz99eb,1_2024-08-13,Nut-King-Call,Right now Martin Braithwaite is playing in Cop...,2024-08-13 22:05:00
3,neutral,1,0,0.62728989124298095703125,1723586743,lhz9a3t,lhz9a3t1723586743,t1_lhz9a3t,1_2024-08-13,Careful-Snow,Running a bit late there Jimothy,2024-08-13 22:05:00
4,negative,1,0,0.81241357326507568359375,1723586743,lhz9a56,lhz9a561723586743,t1_lhz9a56,1_2024-08-13,foolofatook46,Jesus how did he survive on that??,2024-08-13 22:05:00
...,...,...,...,...,...,...,...,...,...,...,...,...
57,neutral,1,0,0.9076864719390869140625,1723592037,lhznqoc,lhznqoc1723592037,t1_lhznqoc,1_2024-08-13,AnnieIWillKnow,He started against Scotland at the Euros,2024-08-13 23:33:00
58,negative,1,0,0.649271667003631591796875,1723592039,lhznqtw,lhznqtw1723592039,t1_lhznqtw,1_2024-08-13,haiderhtz,"""city at the top"" narrative with united being ...",2024-08-13 23:33:00
59,neutral,1,0,0.7144222259521484375,1723592075,lhznubs,lhznubs1723592075,t1_lhznubs,1_2024-08-13,jersey-city-park,Interesting exclusion,2024-08-13 23:34:00
60,negative,1,0,0.701813042163848876953125,1723592077,lhznuko,lhznuko1723592077,t1_lhznuko,1_2024-08-13,Howyoulikemenoow,"It’s not a gamble, it’s incredibly stupid and ...",2024-08-13 23:34:00


In [112]:
df_count = df.groupby(['date', 'sentiment_id'], as_index=False)['id'].count().rename(columns={'id': 'count'})

In [116]:
df_prop = df.groupby('date', as_index=False)['sentiment_id'].value_counts(normalize=True)

array([1, 6, 4, 1, 3, 5, 8, 1, 4, 3, 2, 3, 1, 1, 1, 7, 8, 3])

In [107]:
df_count

Unnamed: 0,date,sentiment_id,count,proportion
0,2024-08-13 22:04:00,positive,1,
1,2024-08-13 22:05:00,negative,3,
2,2024-08-13 22:05:00,neutral,2,
3,2024-08-13 22:05:00,positive,1,
4,2024-08-13 22:07:00,negative,1,
5,2024-08-13 22:07:00,neutral,2,
6,2024-08-13 22:07:00,positive,1,
7,2024-08-13 22:10:00,neutral,1,
8,2024-08-13 22:11:00,neutral,2,
9,2024-08-13 22:11:00,positive,1,


In [119]:
import plotly.express as px

fig = px.line(df_count,
                  x='date',
                  y='count',
                  color='sentiment_id',
                  template='simple_white',
                  color_discrete_map={'positive': 'red',
                                      'neutral': 'grey',
                                      'negative': 'blue'})

fig.show()

In [54]:
data

'eyJtYXRjaF9rZXl3b3JkcyI6ICJnb2FsIiwgInRpbWVzdGFtcCI6ICIxNjI3ODQ2MjYyIiwgImlkIjogIjEyMzQ1IiwgIm5hbWUiOiAiVGVzdCBDb21tZW50IiwgImF1dGhvciI6ICJ0ZXN0X2F1dGhvciIsICJib2R5IjogIlRoaXMgaXMgYSB0ZXN0IGNvbW1lbnQiLCAidXB2b3RlcyI6ICIxMCIsICJkb3dudm90ZXMiOiAiMSJ9'

In [93]:
import datetime

datetime.datetime.today()

datetime.datetime(2024, 8, 13, 17, 14, 30, 193542)