In [1]:
from typing import List

import pandas as pd

CONFIRMED_CASES_URL = f"https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data" \
                      f"/csse_covid_19_time_series/time_series_19-covid-Confirmed.csv "

"""
When downloading data it's better to do it in a global scope instead of a function.
This speeds up the tests significantly
"""
confirmed_cases = pd.read_csv(CONFIRMED_CASES_URL, error_bad_lines=False)

In [2]:
df = confirmed_cases

In [93]:
def poland_cases_by_date(day: int, month: int, year: int = 2020) -> int:
    
    """
    Returns confirmed infection cases for country 'Poland' given a date.
    Ex.
    >>> poland_cases_by_date(7, 3, 2020)
    5
    >>> poland_cases_by_date(11, 3)
    31
    :param year: 4 digit integer representation of the year to get the cases for, defaults to 2020
    :param day: Day of month to get the cases for as an integer indexed from 1
    :param month: Month to get the cases for as an integer indexed from 1
    :return: Number of cases on a given date as an integer
    """
    
    # Your code goes here (remove pass)]
    
    if (day<22 and month) == 1 or (year != 2020) or (day <=0) or (month<=0):
        raise ValueError()

    year_short = str(year)[2:]
    
    date = f"{month}/{day}/{year_short}"
    
    result = df.loc[df["Country/Region"]=="Poland"][date].values[0]
    
    print(result)
    
    return result

poland_cases_by_date(7, 3, 2020)
    
poland_cases_by_date(11, 3)

poland_cases_by_date(15, 3, 2020)

5
31
119


119

In [94]:
def top5_countries_by_date(day: int, month: int, year: int = 2020) -> List[str]:
    
    """
    Returns the top 5 infected countries given a date (confirmed cases).
    Ex.
    >>> top5_countries_by_date(27, 2, 2020)
    ['China', 'Korea, South', 'Cruise Ship', 'Italy', 'Iran']
    >>> top5_countries_by_date(12, 3)
    ['China', 'Italy', 'Iran', 'Korea, South', 'France']
    :param day: 4 digit integer representation of the year to get the countries for, defaults to 2020
    :param month: Day of month to get the countries for as an integer indexed from 1
    :param year: Month to get the countries for as an integer indexed from 1
    :return: A list of strings with the names of the coutires
    """

    # Your code goes here (remove pass)
    
    if (day<22 and month) == 1 or (year != 2020) or (day <=0) or (month<=0):
        raise ValueError()
        
    year_short = str(year)[2:]
    
    # Używając fstring - f" "
    
    date = f"{month}/{day}/{year_short}"
    
    # .groupby(by=["Country/Region"]).sum() - sumowanie wszystkich wierszy z tą samą w wartością w kolumnie "Country/Region"
    
    result = list(df.groupby(by=["Country/Region"]).sum().sort_values(by=date, ascending=False).head(5).index)
    
    # Tabelka pd.df jest dwuwymiarowa. list(df) - zwaraca listę kolumn. Przekonwertowanie tabelki na listę się nie uda.
    # Konwertowane są więc na listę indeksy tabelki, czyli nazwy krajów w tym przypadku
    
    print(result)
    
    return result
    
top5_countries_by_date(27, 2, 2020)
top5_countries_by_date(12, 3)
top5_countries_by_date(15, 3, 2020)

['China', 'Korea, South', 'Cruise Ship', 'Italy', 'Iran']
['China', 'Italy', 'Iran', 'Korea, South', 'France']
['China', 'Italy', 'Iran', 'Korea, South', 'Spain']


['China', 'Italy', 'Iran', 'Korea, South', 'Spain']

In [101]:
def no_new_cases_count(day: int, month: int, year: int = 2020) -> int:
    """
    Returns the number of countries/regions where the infection count in a given day was the same as the previous day.
    Ex.
    >>> no_new_cases_count(11, 2, 2020)
    35
    >>> no_new_cases_count(3, 3)
    57
    :param day: 4 digit integer representation of the year to get the cases for, defaults to 2020
    :param month: Day of month to get the countries for as an integer indexed from 1
    :param year: Month to get the countries for as an integer indexed from 1
    :return: Number of countries/regions where the count has not changed in a day
    """
    
    # Your code goes here (remove pass)

    if (day<22 and month == 1) or (year != 2020) or (day <=0) or (month<=0):
        raise ValueError()
        
    year_short = str(year)[2:]
    
    date = f"{month}/{day}/{year_short}"
    
    date_prev_day = f"{month}/{day - 1}/{year_short}"

    if (month == 3) and (day == 1):
        month_feb = 2
        day_feb = 29
        date_prev_day = f"{month_feb}/{day_feb}/{year_short}"
        
    if (month == 2) and (day == 1):
        month_jan = 1
        day_jan = 31
        date_prev_day = f"{month_jan}/{day_jan}/{year_short}"    
        
    result = df.loc[(df[date] - df[date_prev_day]) != 0].shape[0]
    
    print (date)
    print (date_prev_day)
    print (result)
    
    return result
    
no_new_cases_count(11, 2, 2020)
no_new_cases_count(3, 3)
no_new_cases_count(1, 3, 2020)
no_new_cases_count(1, 2)
    


2/11/20
2/10/20
35
3/3/20
3/2/20
57
3/1/20
2/29/20
46
2/1/20
1/31/20
41


41