# Data Visualization with Plotly

## Day 1

In [1]:
# Import modules - Standard Library
from collections import Counter
from datetime import datetime, timedelta
from pprint import pprint as pp
import re

In [2]:
# Import modules - Third-Party
import feedparser
import plotly
import plotly.graph_objs as go

In [3]:
# Constants
BLOG_CATEGORIES = {
    'articles': 'article',
    'codechallenge': 'challenge',
    'guest': 'guest',
    'special': 'special',
    'twitter': 'twitter'
}
BLOG_FEED = feedparser.parse(
    url_file_stream_or_string='https://pybit.es/feed'
)
DATE_STRING = 'Thu, 30 Jun 2022 06:56:11 +0000'
URL_STRING = 'https://pybit.es/articles/the-importance-of-disconnecting-as-a-developer/'

In [4]:
# Assign blog entries to a variable
entries = BLOG_FEED['entries']

In [32]:
# Display the number of entries
print(len(entries))

10


In [31]:
# Display the first blog entry
entries[0]

{'title': 'The why and how of networking to boost your Python career',
 'title_detail': {'type': 'text/plain',
  'language': None,
  'base': 'https://pybit.es/feed/',
  'value': 'The why and how of networking to boost your Python career'},
 'links': [{'rel': 'alternate',
   'type': 'text/html',
   'href': 'https://pybit.es/articles/pp77-the-why-and-how-of-networking-to-boost-your-python-career/'}],
 'link': 'https://pybit.es/articles/pp77-the-why-and-how-of-networking-to-boost-your-python-career/',
 'authors': [{'name': 'PyBites Team'}],
 'author': 'PyBites Team',
 'author_detail': {'name': 'PyBites Team'},
 'published': 'Wed, 06 Jul 2022 15:19:21 +0000',
 'published_parsed': time.struct_time(tm_year=2022, tm_mon=7, tm_mday=6, tm_hour=15, tm_min=19, tm_sec=21, tm_wday=2, tm_yday=187, tm_isdst=0),
 'tags': [{'term': 'Podcast', 'scheme': None, 'label': None},
  {'term': 'Career', 'scheme': None, 'label': None},
  {'term': 'career', 'scheme': None, 'label': None},
  {'term': 'communicatio

### Parsed Feed Notes

- The `published` key uses a string value for a date:
    `'Thu, 30 Jun 2022 06:56:11 +0000'`
- The `published_parsed` key uses a `time.struct` object to store a structured date:
    `time.struct_time(tm_year=2022, tm_mon=6, tm_mday=30, tm_hour=6, tm_min=56, tm_sec=11, tm_wday=3, tm_yday=181, tm_isdst=0)`

#### Working with date/time objects is easiest with `datetime.datetime` objects

In [48]:
# Create a helper function to convert strings to datetime objects
def convert_to_datetime(
    date_string: str = DATE_STRING
) -> datetime:
    """ Convert a string to a datetime object.
    
        Args:
            date_string (str):
                A string representing a date, extracted from
                entries['published'].  Default is DATE_STRING.

        Returns:
            date_time (datetime):
                A datetime object representing the converted date_string.
    """

    # Remove the timezone offset from date_string
    date = date_string.split(
        sep='+'  # Split the string to a list object on the '+' character
    )[0].strip()  # Keep the first index (everything to the left of the '+') and remove the trailing whitespace

    # Create a datetime.datetime object from the date string
    date_time =  datetime.strptime(date, '%a, %d %b %Y %H:%M:%S')

    # Convert the datetime object to a year-month string
    date_time = f'{date_time.year}-{date_time.month}'

    return date_time


# Convert a string to a datetime object to a string
date_time = convert_to_datetime()

In [35]:
# Add 5 days to the date_time value
date_time + timedelta(
    days=5
)

datetime.datetime(2022, 7, 5, 6, 56, 11)

In [36]:
# Extract an entry's category from its `link` key
def get_category(
    link: str = URL_STRING
) -> str:
    """ Extract an entry's category from its `link` key.
    
    Args:
        link (str):
            A link with a category embedded in the URL, extracted from
            entries['link'].  Default is URL_STRING.

        Returns:
            category (str):
                Category value, extracted from the link.
    """

    # Extract the category from the link with re.sub
    category_match = re.sub(
        pattern=r'https?://pybit.es/([a-z]+)/.*',
        repl=r'\1',
        string=link
    )

    # Attempt to match category_match to a key in BLOG_CATEGORIES, default is 'article'
    category = BLOG_CATEGORIES.get(category_match, 'article')

    return category

category = get_category()
category

'article'

---

## Day 2

### Plotting RSS Feed Entries with `Plotly`

In [37]:
# Create list of RSS feed entry's published dates
published_dates = [
   convert_to_datetime(
    date_string=entry.published
    ) for entry in entries
]


In [38]:
# Display published_dates
published_dates

[datetime.datetime(2022, 7, 6, 15, 19, 21),
 datetime.datetime(2022, 7, 6, 15, 12, 17),
 datetime.datetime(2022, 6, 30, 6, 56, 11),
 datetime.datetime(2022, 6, 29, 15, 27, 38),
 datetime.datetime(2022, 6, 28, 16, 25, 41),
 datetime.datetime(2022, 6, 23, 14, 50, 48),
 datetime.datetime(2022, 6, 17, 14, 30, 58),
 datetime.datetime(2022, 6, 17, 13, 56, 28),
 datetime.datetime(2022, 6, 14, 14, 41, 28),
 datetime.datetime(2022, 6, 10, 14, 13, 26)]

In [39]:
# Display the posts by month
posts_by_month = Counter(published_dates)

In [40]:
posts_by_month

Counter({datetime.datetime(2022, 7, 6, 15, 19, 21): 1,
         datetime.datetime(2022, 7, 6, 15, 12, 17): 1,
         datetime.datetime(2022, 6, 30, 6, 56, 11): 1,
         datetime.datetime(2022, 6, 29, 15, 27, 38): 1,
         datetime.datetime(2022, 6, 28, 16, 25, 41): 1,
         datetime.datetime(2022, 6, 23, 14, 50, 48): 1,
         datetime.datetime(2022, 6, 17, 14, 30, 58): 1,
         datetime.datetime(2022, 6, 17, 13, 56, 28): 1,
         datetime.datetime(2022, 6, 14, 14, 41, 28): 1,
         datetime.datetime(2022, 6, 10, 14, 13, 26): 1})