In [65]:
import json
import os
import io
from pathlib import Path
from typing import Dict, List, Union

import pandas as pd  # type: ignore
import requests

URL: str = "https://bites-data.s3.us-east-2.amazonaws.com/MonthlySales.csv"
STATS: List[str] = ["sum", "mean", "max"]
TMP: Path = Path(os.getenv("TMP", "/tmp")) / "MonthlySales.csv"


def get_data(url: str) -> Dict[str, str]:
    """Get data from Github

    Args:
        url (str): The URL where the data is located.

    Returns:
        Dict[str, str]: The dictionary extracted from the data
    """
    if TMP.exists():
        data = json.loads(TMP.read_text())
    else:
        response = requests.get(url)
        response.raise_for_status()
        data = json.loads(response.text)
        with TMP.open("w") as tmp:
            json.dump(data, tmp)
    return data


def process_data(url: str) -> pd.DataFrame:
    """Process the data from the Github API

    Args:
        url (str): The URL where the data is located.

    Returns:
        pd.DataFrame: Pandas DataFrame generated from the processed data
    """
    data = get_data(url)
    request = requests.get(data["download_url"]).content
    df = pd.read_csv(io.StringIO(request.decode("utf-8")))
    df['year_mon'] = pd.to_datetime(df.month)
    df['year'] = pd.DatetimeIndex(df.month).year
    df['month'] = pd.DatetimeIndex(df.month).month
    
    return df

# uncomment the following for viewing/testing the reports/code
# if __name__ == "__main__":
#     data = process_data(URL)
#     summary_report(data)
#     for year in (data["month"].dt.year).unique():
#         yearly_report(data, year)

#     yearly_report(data, 2020)

df = process_data(URL)

In [66]:
def summary_report(df: pd.DataFrame, stats: Union[List[str], None] = STATS) -> None:
    """Summary report generated from the DataFrame and list of stats

    Will aggregate statistics for sum, mean, and max by default.

    Args:
        df (pd.DataFrame): Pandas DataFrame of the Github API data
        stats (List[str], optional): List of summaries to aggregate. Defaults to STATS.

    Returns:
        None (prints to standard output)

        Example:
                    sum          mean        max
        year
        2013  484247.51  40353.959167   81777.35
        2014  470532.51  39211.042500   75972.56
        2015  608473.83  50706.152500   97237.42
        2016  733947.03  61162.252500  118447.83
    """
    print(df.groupby("year")['sales'].agg(stats))

In [67]:
summary_report(df)

            sum          mean        max
year                                    
2013  484247.51  40353.959167   81777.35
2014  470532.51  39211.042500   75972.56
2015  608473.83  50706.152500   97237.42
2016  733947.03  61162.252500  118447.83


In [88]:
def yearly_report(df: pd.DataFrame, year: int) -> None:
    """Generate a sales report for the given year

    Args:
        df (pd.DataFrame): Pandas DataFrame of the Github API data
        year (int): The year to generate the report for

    Raises:
        ValueError: Error raised if the year requested is not in the data.
        Should be in the form of "The year YEAR is not included in the report!"

    Returns:
        None (prints to standard output)

        Example:
        2013
                  sales
        month
        1      14236.90
        2       4519.89
        3      55691.01
        4      28295.35
        5      23648.29
        6      34595.13
        7      33946.39
        8      27909.47
        9      81777.35
        10     31453.39
        11     78628.72
        12     69545.62
    """
    if year not in df.year.unique():
        raise ValueError(f'The year {year} is not included in the report!')
    mask = df.year == year
    df_year = df[mask].copy()
    print(f'{year}\n\n{df_year[["month", "sales"]].groupby("month", as_index=True).agg({"sales": "sum"})}')

In [89]:
yearly_report(df, 2014)

2014
          sales
month          
1      18174.08
2      11951.41
3      38726.25
4      34195.21
5      30131.69
6      24797.29
7      28765.33
8      36898.33
9      64595.92
10     31404.92
11     75972.56
12     74919.52
