# Example project for the final project of the EEN060/EEN065 courses

## Debugging the implementation

This file is meant to help students debug their code.

Copy and paste the content of your functions in the cells below to run them independent of the application.

Tips:
1. Use `SHIFT + TAB` to unindent lines.
2. Replace `print` by `print`
3. Do not forget to update the imports below

In [10]:
# built-in imports
from datetime import date, datetime
import collections
import pickle

# external imports
from flask import current_app
import requests

# internal imports
from codeapp import db
from codeapp.models import Show

### 1. get_data_list() function

In [11]:
def get_data_list() -> list[Show]:
    """
    Function responsible for downloading the dataset from the source, translating it
    into a list of Python objects, and saving each object to a Redis list.
    """

    ##### check if dataset already exists, and if so, return the existing dataset  #####
    # db.delete("dataset_list")  # uncomment if you want to force deletion
    if db.exists("dataset_list") > 0:  # checks if the `dataset` key already exists
        print(
            "Dataset already downloaded. "
            f"{db.llen('dataset_list')} items in the database"
        )
        dataset_stored: list[Show] = []  # empty list to be returned
        raw_dataset: list[bytes] = db.lrange("dataset_list", 0, -1)  # get list from DB
        for raw_item in raw_dataset:
            dataset_stored.append(pickle.loads(raw_item))  # load item from DB
        print(
            f"Downloaded {len(dataset_stored)} items from the database."
        )
        return dataset_stored

    ################# dataset has not been downloaded, downloading now #################
    print("Downloading dataset.")
    url: str = "https://onu1.s2.chalmers.se/datasets/amazon_prime_titles.json"
    response = requests.get(url, timeout=200)
    print("Finished downloading dataset.")

    ########################## saving dataset to the database ##########################
    dataset_base: list[Show] = []  # list to store the items
    # for each item in the dataset...
    # data: list[dict[str, str]] = response.json()

    data: list[dict[str, str]] = response.json()

    for item in data:
        # check if the date can be parsed
        date_added: date | None = None
        try:
            date_added = datetime.strptime(item["date_added"], "%B %d, %Y").date()
        except Exception:
            pass

        # check the value of director
        director = None
        if item["director"] is not None and str(item["director"]) != "nan":
            director = item["director"]

        # check the value of cast
        cast = None
        if item["cast"] is not None and str(item["cast"]) != "nan":
            cast = item["cast"]

        # check the value of country
        country = None
        if item["country"] is not None and str(item["country"]) != "nan":
            country = item["country"]

        # check the value of rating
        rating = None
        if item["rating"] is not None and str(item["rating"]) != "nan":
            rating = item["rating"]

        # create a new object
        new_show = Show(
            id=item["show_id"],
            type=item["type"],
            title=item["title"],
            director=director,
            cast=cast,
            country=country,
            date_added=date_added,
            release_year=int(item["release_year"]),
            rating=rating,
            duration=item["duration"],
            listed_in=item["listed_in"],
            description=item["description"],
        )
        # push object to the database list
        db.rpush("dataset_list", pickle.dumps(new_show))
        dataset_base.append(new_show)  # append to the list
    print(f"Processed {len(dataset_base)} items.")

    return dataset_base

In [12]:
import random
# delete the database
# db.delete("dataset_list")
# call the function
data = get_data_list()
print(f"Got back {len(data)} data points.")
print("Here are a few:")
for item in random.sample(data, k=10):
    print(item)

Dataset already downloaded. 9668 items in the database
Downloaded 9668 items from the database.
Got back 9668 data points.
Here are a few:
Show(id='s4712', type='Movie', title='The Somme', director='Carl Hindmarch', cast='Laurence Kennedy, Tilda Swinton, Ed Stoppard', country=None, date_added=None, release_year=2005, duration='89 min', listed_in='Documentary', description="The Somme was a modern battle of such prehistoric brutality that its horror is hard to comprehend. Never have so many soldiers been killed in one day's fighting: 25,000 dead, most in the first hour. This film is based on letters and diaries of soldiers who witnessed the battle first hand and wrote down what they saw and felt.", rating='16')
Show(id='s236', type='Movie', title='UFOTV Presents: The Mystery of the Sphinx - New Scientific Evidence - Expanded Directors Cut', director='Bill Cote', cast='JohnAnthonyWest.Dr.RobertM.Schoch', country=None, date_added=None, release_year=2016, duration='93 min', listed_in='Speci

### 2. calculate_statistics() function

In [13]:
def calculate_statistics(dataset: list[Show]) -> dict[int, int]:
    """
    Receives the dataset in the form of a list of Python objects, and calculates the
    statistics necessary.
    """
    # create the counter
    counter: dict[int, int] = collections.defaultdict(lambda: 0)
    for item in dataset:
        counter[item.release_year] += 1

    print(f"The statistics have {len(counter)} items.")

    return counter

In [14]:
dataset = get_data_list()
print(f"Got back {len(data)} data points.")
stats = calculate_statistics(dataset)
print(f"Got back {len(stats)} statistics.")
print("Here they are:")
for key, value in stats.items():
    print(key, "=", value)

Dataset already downloaded. 9668 items in the database
Downloaded 9668 items from the database.
Got back 9668 data points.
The statistics have 100 items.
Got back 100 statistics.
Here they are:
2014 = 391
2018 = 623
2017 = 562
1989 = 45
2016 = 521
1994 = 63
2020 = 962
2019 = 929
2008 = 171
2001 = 82
1941 = 22
1991 = 45
2005 = 112
2015 = 378
2011 = 252
2013 = 289
1949 = 14
2007 = 176
2002 = 92
1955 = 9
1959 = 18
1983 = 17
2009 = 149
2012 = 252
2010 = 186
1986 = 40
1988 = 44
1920 = 3
1936 = 29
1992 = 56
2021 = 1442
1993 = 57
2006 = 138
1948 = 15
1946 = 21
1944 = 12
1935 = 15
1985 = 28
1937 = 24
1970 = 25
1945 = 14
1939 = 21
1996 = 62
1997 = 59
1974 = 24
1938 = 15
1978 = 23
2004 = 103
1943 = 21
1975 = 27
1960 = 12
1934 = 12
1940 = 16
1961 = 11
2003 = 81
2000 = 81
1967 = 17
1995 = 72
1951 = 18
1932 = 8
1999 = 64
1963 = 20
1969 = 16
1952 = 16
1947 = 22
1929 = 4
1990 = 40
1925 = 8
1968 = 19
1987 = 33
1942 = 16
1979 = 16
1980 = 25
1981 = 22
1976 = 12
1966 = 10
1973 = 17
1956 = 17
1972 = 20
19