# Imports

In [15]:
# Import packages
import csv
import numpy as np

## Read in the data (we would provide this)
with open('us-states.csv') as file:
        state_covid_data = []
        for row in csv.reader(file):
            state_covid_data.append(row)

len(state_covid_data)

10080

# Exploration

In [2]:
# First 5 entries
state_covid_data[:5]

[['date', 'state', 'fips', 'cases', 'deaths'],
 ['2020-01-21', 'Washington', '53', '1', '0'],
 ['2020-01-22', 'Washington', '53', '1', '0'],
 ['2020-01-23', 'Washington', '53', '1', '0'],
 ['2020-01-24', 'Illinois', '17', '1', '0']]

In [4]:
# First row
state_covid_data[:][1]

['2020-01-21', 'Washington', '53', '1', '0']

In [5]:
# Also returns the first row!
state_covid_data[1][:]

['2020-01-21', 'Washington', '53', '1', '0']

In [8]:
# Returns the first 5 entries in the first column, not including the variable names
[x[0] for x in state_covid_data[1:]][:5]

['2020-01-21', '2020-01-22', '2020-01-23', '2020-01-24', '2020-01-24']

# Questions

## (1) Count up the number of unique dates in the data

In [11]:
# Creates a list of only the date variable
dates = [x[0] for x in state_covid_data[1:]]
dates[:5]

['2020-01-21', '2020-01-22', '2020-01-23', '2020-01-24', '2020-01-24']

In [20]:
# Determines and prints the number of unique dates using numpy
print("Number of unique dates: " + str(len(np.unique(dates))))

Number of unique dates: 225


In [21]:
# Determines the number of unique dates using a loop
# Inspired by code found at https://www.geeksforgeeks.org/python-get-unique-values-list/

# Unique dates list
unique_dates = []

# Iterates through the dates list and appends 
# those not in the unique dates list to the unique dates list
for date in dates:
    if date not in unique_dates:
        unique_dates.append(date)

# Finds and prints the length of the unique dates list
print("Number of unique dates: " + str(len(unique_dates)))

Number of unique dates: 225


## (2) Find the first date in which the District of Columbia recorded a case

In [22]:
# Creates a list of states
states = [x[1] for x in state_covid_data[1:]]
states[:5]

['Washington', 'Washington', 'Washington', 'Illinois', 'Washington']

In [24]:
# Used to determine how the name for the District of Columbia is recorded
np.unique(states)

array(['Alabama', 'Alaska', 'Arizona', 'Arkansas', 'California',
       'Colorado', 'Connecticut', 'Delaware', 'District of Columbia',
       'Florida', 'Georgia', 'Guam', 'Hawaii', 'Idaho', 'Illinois',
       'Indiana', 'Iowa', 'Kansas', 'Kentucky', 'Louisiana', 'Maine',
       'Maryland', 'Massachusetts', 'Michigan', 'Minnesota',
       'Mississippi', 'Missouri', 'Montana', 'Nebraska', 'Nevada',
       'New Hampshire', 'New Jersey', 'New Mexico', 'New York',
       'North Carolina', 'North Dakota', 'Northern Mariana Islands',
       'Ohio', 'Oklahoma', 'Oregon', 'Pennsylvania', 'Puerto Rico',
       'Rhode Island', 'South Carolina', 'South Dakota', 'Tennessee',
       'Texas', 'Utah', 'Vermont', 'Virgin Islands', 'Virginia',
       'Washington', 'West Virginia', 'Wisconsin', 'Wyoming'],
      dtype='<U24')

In [26]:
# Finds the index of the first occurance of "District of Columbia" in the states list
firstDCCaseIndex = states.index("District of Columbia")
firstDCCaseIndex

353

In [27]:
# Returns the date of the first DC case
dates[firstDCCaseIndex]

'2020-03-07'

**Answer:** The first case in DC was recorded on March 7th, 2020. 

## (3) Write a function that takes in a *state name* as input (e.g. "Michigan") and outputs the date of its first case

In [32]:
def first_case(state_name):
    '''
    A function which takes in a state name and
    returns the date of that state's first case
    
    state_name: Name of the state in question as
    a string
    '''
    
    # Finds the index of the first case for the given state
    index = states.index(state_name)
    
    # Determines the date of the first case
    first_case_date = dates[index]
    
    # Returns the first case's date for the selected state
    return(first_case_date)

In [33]:
first_case("Michigan")

'2020-03-10'

In [34]:
first_case("Wisconsin")

'2020-02-05'

## (Optional) Bonus

In [53]:
# Creates the cases list required for the function below
cases = [x[2] for x in state_covid_data[1:]]

# Changes the cases list to all integers
i = 0
for case in cases:
    cases[i] = int(case)
    i += 1

In [54]:
def locate_date_1000(state_name):
    '''
    
    '''
    
    # Sets up an indexing variable
    index = 0
    
    # Sets up a total cases variable
    total_cases = 0
    
    # Loops through the states list
    for state in states:

        # Determines if this is the correct state
        # and if the number of cases has exceeded 1000
        if (state == state_name):
            
            # Adds the daily case load to the total cases
            total_cases += cases[index]
            
            # Checks to see if the state has surpased 1000 cases
            if (total_cases > 1000):
            
                # Returns the date the given state surpasses 1000 cases
                return dates[index]
        
        # Iterates the indexing variable
        index += 1
    

In [55]:
locate_date_1000("Michigan")

'2020-04-17'