# Coding Discussion 02

### (0) Data Preparation

In [1]:
## Read in the data
import csv
with open('us-states.csv') as file:
    state_covid_data = []
    for row in csv.reader(file):
        state_covid_data.append(row)
        
len(state_covid_data) # total rows in data

10080

In [2]:
state_covid_data[:5] # data head

[['date', 'state', 'fips', 'cases', 'deaths'],
 ['2020-01-21', 'Washington', '53', '1', '0'],
 ['2020-01-22', 'Washington', '53', '1', '0'],
 ['2020-01-23', 'Washington', '53', '1', '0'],
 ['2020-01-24', 'Illinois', '17', '1', '0']]

### (1) Count up the number of unique dates in the data.

In [3]:
dates = [] # empty list of dates
for row in state_covid_data[1:]: # bc of headers, data starts at row 1
    dates.append(row[0]) # add each date to list of dates
dates.sort() # sort list of dates, in case they were not sorted in original data

print(f'Total Observations: {len(dates)}') # total dates in list, with duplicates
print(f'Unique Dates: {len(set(dates))} ({dates[0]} to {dates[-1]})') # use set to find all unique dates in list

Total Observations: 10079
Unique Dates: 225 (2020-01-21 to 2020-09-01)


### (2) Find the first date in which the District of Columbia recorded a case.

see (3)

### (3) Write a function that takes in a state name as input (e.g. "Wisconsin") and outputs the date of its first case.

In [4]:
def state_exists(state_name):
    """
    the method 'state_exists' accepts a state_name as an argument,
    returns True if the state name appears in data set, otherwise returns False
    """
    for row in state_covid_data[1:]: # bc of headers, data starts at row 1
        if row[1] == state_name: # check if a row has the provided state_name 
            return True
    print(f"State of '{state_name}' does not exist in data set. Please try again.") # print error statement
    return False

def first_case(state_name):
    """
    the method 'first_case' accepts a state_name as an argument,
    and returns the date of the first reported covid case in that state
    """
    if not state_exists(state_name): return # ensure that state_name exists in data set
    dates = [] # empty list of dates
    for row in state_covid_data[1:]: # bc of headers, data starts at row 1
        if row[1] == state_name: # check if the row has the correct state_name 
            dates.append(row[0]) # if so, add the date to our list
    dates.sort() # sort list of dates, in case they were not sorted in original data
    return dates[0] # return first date in list

In [5]:
first_case('Canada')

State of 'Canada' does not exist in data set. Please try again.


In [6]:
first_case('District of Columbia')

'2020-03-07'

In [7]:
first_case('Alabama')

'2020-03-13'

### (4) Write a function that takes in a state name as input (e.g. "Wisconsin") and outputs the date when the number of reported cases within the state exceeded 1000.

In [8]:
def locate_date_1000(state_name):
    """
    the method 'locate_date_1000' accepts a state_name as an argument,
    and returns the first date with >1000 reported covid case in that state
    """
    if not state_exists(state_name): return # ensure that state_name exists in data set
    dates = [] # empty list of dates
    for row in state_covid_data[1:]: # bc of headers, data starts at row 1
        if row[1] == state_name and int(row[3]) > 1000: # check if the row has the correct state_name and if cases are greater than 1000
            dates.append(row[0]) # if so, add date to our list
    dates.sort() # sort list of dates, in case they were not sorted in original data
    if len(dates) == 0: 
        return "No dates with >1000 cases" # make sure at least one date was found with >1000 cases
    else: 
        return dates[0] # return first date in list

In [9]:
locate_date_1000('District of Columbia')

'2020-04-06'

### (5) Write a function that outputs the date of the first case in every state

In [10]:
def first_case_all():
    """
    the method 'first_case_all' returns a dictionary with states as keys,
    and values of the date of the first reported covid case in that state
    """
    all_dates_in_state = {} # empty dictionary to contain all dates with covid cases in each state
    first_date_in_state = {} # empty dictionary to contain the FIRST date of a covid case in each state
    for row in state_covid_data[1:]: # bc of headers, data starts at row 1
        date = row[0] # get date from row
        state = row[1] # get state name from row
        if state in all_dates_in_state.keys(): 
            all_dates_in_state[state].append(date) # if state is already in dictionary, append date to date list
        else:
            all_dates_in_state[state] = [date] # if state not in dictionary, initialize list with date
            
    for state in all_dates_in_state.keys():
        all_dates_in_state[state].sort() # sort date list, in case they were not sorted in original data
        first_date_in_state[state] = all_dates_in_state[state][0] # extract first date and add to other dictionary
    
    return first_date_in_state # return dictionary with state and date of first covid case

In [11]:
first_case_all()

{'Washington': '2020-01-21',
 'Illinois': '2020-01-24',
 'California': '2020-01-25',
 'Arizona': '2020-01-26',
 'Massachusetts': '2020-02-01',
 'Wisconsin': '2020-02-05',
 'Texas': '2020-02-12',
 'Nebraska': '2020-02-17',
 'Utah': '2020-02-25',
 'Oregon': '2020-02-28',
 'Florida': '2020-03-01',
 'New York': '2020-03-01',
 'Rhode Island': '2020-03-01',
 'Georgia': '2020-03-02',
 'New Hampshire': '2020-03-02',
 'North Carolina': '2020-03-03',
 'New Jersey': '2020-03-04',
 'Colorado': '2020-03-05',
 'Maryland': '2020-03-05',
 'Nevada': '2020-03-05',
 'Tennessee': '2020-03-05',
 'Hawaii': '2020-03-06',
 'Indiana': '2020-03-06',
 'Kentucky': '2020-03-06',
 'Minnesota': '2020-03-06',
 'Oklahoma': '2020-03-06',
 'Pennsylvania': '2020-03-06',
 'South Carolina': '2020-03-06',
 'District of Columbia': '2020-03-07',
 'Kansas': '2020-03-07',
 'Missouri': '2020-03-07',
 'Vermont': '2020-03-07',
 'Virginia': '2020-03-07',
 'Connecticut': '2020-03-08',
 'Iowa': '2020-03-08',
 'Louisiana': '2020-03-09

### (6) Write a function that outputs the maximum daily cases (with date) in every state

In [12]:
def max_cases_all():
    """
    the method 'max_cases_all' returns a dictionary with states as keys,
    and value of a list with maximum daily covid cases, and the date which it occured
    """
    max_daily_cases_in_state = {} # empty dictionary to contain the maximum daily cases (and date) in every state
    for row in state_covid_data[1:]: # bc of headers, data starts at row 1
        date = row[0] # get date from row
        state = row[1] # get state name from row
        cases = int(row[3]) # get daily case count from row, as an integer
        if not state in max_daily_cases_in_state.keys(): # if state not in dict, create list with cases and date
            max_daily_cases_in_state[state] = [cases, date] 
        elif cases > max_daily_cases_in_state[state][0]: # check if new date has more cases than previous maximum
            max_daily_cases_in_state[state] = [cases, date] # if so, replace mwith new maximum and date
            
    return max_daily_cases_in_state # return dictionary with state and date of first covid case

In [13]:
max_cases_all()

{'Washington': [78062, '2020-09-01'],
 'Illinois': [239115, '2020-09-01'],
 'California': [716628, '2020-09-01'],
 'Arizona': [202375, '2020-09-01'],
 'Massachusetts': [128888, '2020-09-01'],
 'Wisconsin': [81688, '2020-09-01'],
 'Texas': [645534, '2020-09-01'],
 'Nebraska': [34574, '2020-09-01'],
 'Utah': [52505, '2020-09-01'],
 'Oregon': [26958, '2020-09-01'],
 'Florida': [631032, '2020-09-01'],
 'New York': [440237, '2020-09-01'],
 'Rhode Island': [22002, '2020-09-01'],
 'Georgia': [256544, '2020-09-01'],
 'New Hampshire': [7297, '2020-09-01'],
 'North Carolina': [169966, '2020-09-01'],
 'New Jersey': [194070, '2020-09-01'],
 'Colorado': [58118, '2020-09-01'],
 'Maryland': [109353, '2020-09-01'],
 'Nevada': [69708, '2020-09-01'],
 'Tennessee': [153274, '2020-09-01'],
 'Hawaii': [8628, '2020-09-01'],
 'Indiana': [96988, '2020-09-01'],
 'Kentucky': [51762, '2020-09-01'],
 'Minnesota': [76404, '2020-09-01'],
 'Oklahoma': [59399, '2020-09-01'],
 'Pennsylvania': [139591, '2020-09-01'],
 