In [4]:
import asyncio, boto3, pandas as pd, os, sys, pprint
pp = pprint.PrettyPrinter(indent=2, compact=True, width=80)

DAILY_UPLOADS_TABLE = "DailyUploadsTable-dev"
ddb = boto3.client("dynamodb", region_name="ap-south-1")

In [5]:
from datetime import datetime
import ddb_helpers


class GatherUrls:
    post_keys_to_keep = [
        "title",
        "url",
        "upvote_ratio",
        "ups",
        "author",
        "name",
        "total_awards_received",
    ]

    def __init__(self, subreddit) -> None:
        self.subreddit = subreddit
        self.date = str(datetime.today().date())  ## Of the format yyyy-mm-dd
        self.total_duration = 0
        self.urls = []
        self.latest_post = None
        self.eligible_posts = []

    def key(self) -> dict:
        """Returns a dictionary with date as PK, subreddit as SK.

        Returns:
            Dict: Containing serialized subreddit and date
        """

        return {
            "PK": GatherUrls.__serialize_date(self.date),
            "SK": GatherUrls.__serialize_subreddit(self.subreddit),
        }

    def serialize_to_item(self):
        """Serializes member variable data of this object for the access pattern:
        date-Partition Key
        subreddit- Sort Key

        Returns:
            Dict: Ready to be used by boto3 to insert item into DynamoDB.
        """
        item = self.key()
        item["posts"] = GatherUrls.__serialize_posts(self.eligible_posts)
        return item

    @staticmethod
    def __removed_post_is_worthy(post):
        if post["removed_by"] or post["removal_reason"]:
            if post["num_comments"] > 5 and post["score"] > 10:
                return True
            else:
                return False

        return True

    @staticmethod
    def __is_eligible(post):
        if post["is_video"] and not post["over_18"] and not post["stickied"]:
            if post["total_awards_received"] > 0:
                return True

            if post["ups"] > 0 and post["num_comments"] > 0:
                return True

        return False

    def parse_posts(self, posts):
        """Parse posts and insert into a dataframe.
        The last parsed post will updated in a member variable.

        Args:
            posts (list): List of posts from reddit API
        """
        posts = posts["data"]["children"]
        for post in posts:
            post = post["data"]
            self.latest_post = post

            if GatherUrls.__is_eligible(post) and GatherUrls.__removed_post_is_worthy(
                post
            ):

                temp = {key: post[key] for key in GatherUrls.post_keys_to_keep}
                self.eligible_posts.append(temp)
                self.total_duration += int(post["media"]["reddit_video"]["duration"])

    @staticmethod
    def __serialize_posts(posts):
        serialized_posts = {"L": [GatherUrls.__serialize_post(post) for post in posts]}
        return serialized_posts

    @staticmethod
    def deserialize_from_item(serialized_item):
        deserialized_item = {}
        serialized_item = serialized_item["Item"]

        for key, value in serialized_item.items():
            for _key, _value in value.items():
                deserialized_item[key] = ddb_helpers.deserialize_piece_of_item(
                    _key, _value
                )

        return deserialized_item

    @staticmethod
    def __serialize_post(post):
        serialized_post = {"M": {}}

        for key in GatherUrls.post_keys_to_keep:
            serialized_post["M"][key] = {
                ddb_helpers.get_datatype(post[key]): str(post[key])
            }

        return serialized_post

    @staticmethod
    def __serialize_subreddit(subreddit):
        return {"S": subreddit}

    @staticmethod
    def __serialize_date(date):
        return {"S": date}


    @staticmethod
    def deserialize_PK_SK_count(item):
        deserialized_item = {}
        for key, value in item.items():
            for _key, _value in value.items():
                deserialized_item[key] = _value
        return deserialized_item



In [8]:
subreddit = 'funny'
gather_urls = GatherUrls(subreddit=subreddit)

item = ddb.get_item(TableName=DAILY_UPLOADS_TABLE, Key=gather_urls.key())



In [11]:
import pprint
pp = pprint.PrettyPrinter(indent=2, compact=True, width=80)

item['Item']['posts']['L'][0]

{'M': {'upvote_ratio': {'N': '0.84'},
  'author': {'S': 'rightcoastguy'},
  'name': {'S': 't3_p3tc8p'},
  'ups': {'N': '46431'},
  'title': {'S': 'I prototype stupid ideas and today I made a zip off bathing suit with towel pant legs.'},
  'total_awards_received': {'N': '67'},
  'url': {'S': 'https://v.redd.it/4nex55lbp6h71'}}}

In [15]:
deserialized_item = GatherUrls.deserialize_from_item(item)
deserialized_item

{'posts': [{'upvote_ratio': 0.84,
   'author': 'rightcoastguy',
   'name': 't3_p3tc8p',
   'ups': 46431,
   'title': 'I prototype stupid ideas and today I made a zip off bathing suit with towel pant legs.',
   'total_awards_received': 67,
   'url': 'https://v.redd.it/4nex55lbp6h71'},
  {'upvote_ratio': 0.91,
   'author': 'RamsesThePigeon',
   'name': 't3_p3meu8',
   'ups': 17413,
   'title': "If you've ever wondered how upvotes were originally manufactured, this ‘50s-era commercial has you covered.",
   'total_awards_received': 189,
   'url': 'https://v.redd.it/rk72gxpd35h71'},
  {'upvote_ratio': 0.89,
   'author': 'bigfootjustice',
   'name': 't3_p3mk4q',
   'ups': 6116,
   'title': 'Ostrich Heaven',
   'total_awards_received': 18,
   'url': 'https://v.redd.it/yf1xq41p45h71'},
  {'upvote_ratio': 0.9,
   'author': 'ThatAverageJo',
   'name': 't3_p3nood',
   'ups': 5815,
   'title': 'My Response To People Saying My Mustache And Beard Make Me Look Like A Villain',
   'total_awards_receiv