# Statistics for COVID

- Install matplotlib: pip3 install matplotlib
- Install scipy: pip3 install scipy


In [8]:
import numpy as np
import matplotlib.pyplot as plt
import csv
import datetime
import json
from scipy import stats
from urllib.request import urlopen

In [9]:
covid_data_dir='/home/student/data/COVID-19/'
covid_data_daily_reports_dir=covid_data_dir + '/csse_covid_19_data/csse_covid_19_daily_reports/'
covid_data_daily_file=covid_data_daily_reports_dir + '/06-07-2020.csv'


In [10]:
class DailyEntry:
    def __init__(self, fips, admin2, province_state, country_region,
                 last_update, lat, long, confirmed, deaths, 
                 recovered, active, combined_key,
                 incident_rate, case_fatality_ratio):
        self.fips = fips
        self.admin2 = admin2
        self.province_state = province_state
        self.country_region = country_region
        self.last_update = last_update
        self.lat = lat
        self.long = long
        self.confirmed = confirmed
        self.deaths = deaths
        self.recovered = recovered
        self.active = active
        self.combined_key = combined_key
        self.incident_rate = incident_rate
        self.case_fatality_ratio = case_fatality_ratio
    fips: int
    admin2: str
    province_state: str
    country_region: str
    last_update: str
    lat: str
    long: str
    confirmed: int
    deaths: int
    recovered: int
    active: int
    combined_key: str
    incident_rate: float
    case_fatality_ratio: float

## Load CSV Data
- Data is in the column oriented list
- Data is in row of daily_entries

In [11]:
columns = {}
daily_entries = []

with open(covid_data_daily_file, 'r') as csvfile:
    reader = csv.reader(csvfile)
    headers = next(reader, None)
    
    for h in headers:
        columns[h] = []
       
    for row in reader:
        # last_update = datetime.datetime.strptime(row[4], '%Y-%m-%d %H:%M:%S')
        last_update = row[4]
        if not row[12]: row[12] = '0.0'
        if not row [13]: row[13] = '0.0'
        daily_entry = DailyEntry(row[0], row[1], row[2], row[3], last_update, row[5], row[6],
                                    row[7], row[8], row[9], row[10], row[11], 
                                    float(row[12]), float(row[13]))
        daily_entries.append(daily_entry)
        for h, v in zip(headers, row):
            columns[h].append(v)

## Convert to JSON format for daily entries

In [12]:

class MyEncoder(JSONEncoder):
    def default(self, o):
        return o.__dict__    

dump = json.dumps(daily_entries[0], cls=MyEncoder, indent=2)
print(dump)


{
  "fips": "45001",
  "admin2": "Abbeville",
  "province_state": "South Carolina",
  "country_region": "US",
  "last_update": "2020-06-08 03:33:22",
  "lat": "34.22333378",
  "long": "-82.46170658",
  "confirmed": "51",
  "deaths": "0",
  "recovered": "0",
  "active": "51",
  "combined_key": "Abbeville, South Carolina, US",
  "incident_rate": 207.9341134260203,
  "case_fatality_ratio": 0.0
}


In [13]:
daily_confirmed_by_country = map(lambda x: (x.country_region, x.confirmed), daily_entries)

In [14]:
from itertools import groupby
mapping = map(lambda x: (x.country_region, x.confirmed), daily_entries)
sorted_mapping = sorted(mapping)

grouper = groupby(mapping, lambda x: x[0])
result = map(lambda l: (l[0], reduce(lambda x, y: x + y, map(lambda p:p[1], l[1]))), grouper)
dir(result)

['__class__',
 '__delattr__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__iter__',
 '__le__',
 '__lt__',
 '__ne__',
 '__new__',
 '__next__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__']

In [15]:
print('Sorted by Country Region, Province State and City:\n')
for daily_entry in daily_entries:
    if daily_entry.province_state:
        print(f'{daily_entry.country_region}, {daily_entry.province_state}, {daily_entry.admin2}')


Sorted by Country Region, Province State and City:

US, South Carolina, Abbeville
US, Louisiana, Acadia
US, Virginia, Accomack
US, Idaho, Ada
US, Iowa, Adair
US, Kentucky, Adair
US, Missouri, Adair
US, Oklahoma, Adair
US, Colorado, Adams
US, Idaho, Adams
US, Illinois, Adams
US, Indiana, Adams
US, Iowa, Adams
US, Mississippi, Adams
US, Nebraska, Adams
US, Ohio, Adams
US, Pennsylvania, Adams
US, Washington, Adams
US, Wisconsin, Adams
US, Vermont, Addison
US, South Carolina, Aiken
US, Minnesota, Aitkin
US, Florida, Alachua
US, North Carolina, Alamance
US, California, Alameda
US, Colorado, Alamosa
US, New York, Albany
US, Wyoming, Albany
US, Virginia, Albemarle
US, Michigan, Alcona
US, Mississippi, Alcorn
US, Illinois, Alexander
US, North Carolina, Alexander
US, Virginia, Alexandria
US, Oklahoma, Alfalfa
US, Iowa, Allamakee
US, Michigan, Allegan
US, Maryland, Allegany
US, New York, Allegany
US, North Carolina, Alleghany
US, Virginia, Alleghany
US, Pennsylvania, Allegheny
US, Indiana, Allen

In [16]:
flatten = list(x for x in daily_entries)
print(flatten)

[<__main__.DailyEntry object at 0x7f81993761d0>, <__main__.DailyEntry object at 0x7f8199376390>, <__main__.DailyEntry object at 0x7f8199376518>, <__main__.DailyEntry object at 0x7f8199376668>, <__main__.DailyEntry object at 0x7f81993767f0>, <__main__.DailyEntry object at 0x7f8199376940>, <__main__.DailyEntry object at 0x7f8199376d68>, <__main__.DailyEntry object at 0x7f8199376ef0>, <__main__.DailyEntry object at 0x7f8199376fd0>, <__main__.DailyEntry object at 0x7f81fc41c080>, <__main__.DailyEntry object at 0x7f8198fbe128>, <__main__.DailyEntry object at 0x7f8198fbe240>, <__main__.DailyEntry object at 0x7f8198fbed30>, <__main__.DailyEntry object at 0x7f8198fbeeb8>, <__main__.DailyEntry object at 0x7f8198f4b0b8>, <__main__.DailyEntry object at 0x7f8198f4b240>, <__main__.DailyEntry object at 0x7f8198f4b390>, <__main__.DailyEntry object at 0x7f8198f4b518>, <__main__.DailyEntry object at 0x7f8198f4b630>, <__main__.DailyEntry object at 0x7f8198f4b7b8>, <__main__.DailyEntry object at 0x7f8198

In [18]:
# daily_confirmed_by_country = flatten.flatMap(lambda x: x).map(lambda x: x.country_region, x.confirmed).reduce(lambda a, b: a + b)

In [19]:
daily_by_country.keys()

NameError: name 'daily_by_country' is not defined

In [None]:
type(daily_by_country)

In [None]:
for key in daily_by_country:
    print(key, daily_by_country[key].confirmed)
    # print(key)

# Statistics

In [67]:

from random import seed
from random import randint
from datetime import datetime

random.seed(datetime.now())

total = 0
reds_eq_6 = 0

# Draw without replacement.

for _ in range (100):    
    total_reds = 12
    total_blacks = 8

    reds = 0
    blacks = 0
    for _ in range(10):
        x = randint(1, total_reds + total_blacks)
        if x < total_reds:
            reds += 1
            total_reds -= 1
        else:
            blacks += 1
            total_blacks -=1
#     print(reds, blacks, total_reds, total_blacks)
    total += reds
    if reds == 6:
        reds_eq_6 += 1
print(total/100)
print('times 6 reds: ', reds_eq_6)

5.41
times 6 reds:  38


In [39]:
def experiment_without_replacement(num_draws, num_reds, num_black)