In [None]:
from datascience import *
import numpy as np

%matplotlib inline
import matplotlib.pyplot as plots
plots.style.use('fivethirtyeight')

# Lecture 12

## Review of joining tables

In [None]:
# create a table with product prices
prices = Table().with_columns(
    "Products", make_array("Kiwis", "Onions", "Tomatos"),
    "Price", make_array(6, 3, 7)
)

prices

In [None]:
# create a table with product quantities
quantities = Table().with_columns(
    "Products", make_array("Kiwis", "Onions", "Brococoli"),
    "Quantity", make_array(10, 6, 5)
)

quantities

In [None]:
# join the tables
prices.join("Products", quantities)


In [None]:
# suppose we modify the prices table to be
prices = Table().with_columns(
    "Products", make_array("Kiwis", "Onions", "Tomatos", "Kiwis"),
    "Price", make_array(6, 3, 7, 10)
)

prices


In [None]:
# what happend when we join tables with duplicate rows? 
prices.join("Products", quantities)

## Maps

We can create maps using the `datascience` package using the `Circle.map_table(location_table)` or `Marker.map_table(location_table)` methods. We pass these methods a `location_table` that has columns with the following names:

- Column 0: latitudes
- Column 1: longitudes
- Column 2: labels 
- Column 3: colors
- Column 4: sizes  (only for the Circle Marker)

In [None]:
yale_table = Table().with_columns(
    "lat", 41.3163,
    "long", -72.9223,
    "labels", "Yale",
    "areas", 10000,
    "colors", "Blue"
)


In [None]:
# plotting as a circle
Circle.map_table(yale_table)


In [None]:
# using a marker
Marker.map_table(yale_table.drop("areas"))


## Example of maps and joining tables: Covid-19 cases in New England

Let's illustrate the use of maps with the New York Times Covid-19 database. Recall that this is 
a county-level database of confirmed cases and deaths, updated daily,
compiled from state and local governments and health departments across the United States.

The Times has created many visualizations that are effective communications of important information about the pandemic. Here we will construct some simple circle maps.

The data are publically available via GitHub: [https://github.com/nytimes/covid-19-data](https://www.nytimes.com/interactive/2020/us/coronavirus-us-cases.html).  


In [None]:
covid_table = Table.read_table("https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-counties.csv")
covid_table

Now we load in a table of geographical data for counties.

In [None]:
county_geo = Table.read_table("https://raw.githubusercontent.com/jdlafferty/covid-19/master/data/geo-counties.csv") 
county_geo

### Process the recent data

Let's get the data from New England states from the most recent date.

In [None]:
most_recent_date = max(covid_table.column("date"))

states = ['Connecticut', 'Massachusetts', 'Vermont', 'New Hampshire', 'Rhode Island', 'Maine']

recent_data = covid_table.where('date', most_recent_date)
recent_state_data = recent_data.where('state', are.contained_in(states))
recent_state_data

[FIPS](https://en.wikipedia.org/wiki/FIPS_county_code) is a number assigned to each county.  This will be used to join the Covid-19 data with the geo location data.

In [None]:
joined_data = recent_state_data.join("fips", county_geo)

# notice extra columns with _2 which occurs when there are columns with the same name in both tables
joined_data


In [None]:
map_data = Table().with_columns('lat', joined_data.column('lat'), 
                           'long', joined_data.column('lon'), 
                           'labels', joined_data.column('county'),
                           'areas', joined_data.column('cases')/10,
                           'colors', 'red')
map_data.show(10)
Circle.map_table(map_data, weight=1)

What is [wrong](https://xkcd.com/1138/) with the map you created above? How could we make it more informative? 

Note, you can read more about the mapping functions in the [chapter 8.5](https://inferentialthinking.com/chapters/08/5/Bike_Sharing_in_the_Bay_Area.html) of the class textbook and on the `datascience` [package documentation](http://data8.org/datascience/maps.html). You will also have a change for more practice with the map functions on project 1, so pleased get started on the project soon!


In [None]:
pop_county_data = Table.read_table("PopulationCounties.csv")
pop_county_data


joined_data2 = joined_data.join("fips", pop_county_data, "FIPS")
joined_data2 = joined_data2.with_column("Cases per capita", joined_data2.column("cases")/joined_data2.column("Population 2020"))

joined_data2.sort("Cases per capita", descending = True)


In [None]:
map_data2 = Table().with_columns('lat', joined_data2.column('lat'), 
                           'long', joined_data2.column('lon'), 
                           'labels', joined_data2.column('county'),
                           'areas', joined_data2.column('Cases per capita') * 20000,
                           'colors', 'red')
map_data2.show(10)
Circle.map_table(map_data2, weight=1)

## Comparison ##

We can do simple mathematical and string comparisons in Python which return Boolean values.


In [None]:
# basic math comparison
3 > 1

In [None]:
# checking the type of a basic math comparison
type(3 > 1)

In [None]:
# another basic math comparison
3 < 1

In [None]:
# We can type in Boolean values ourselves
True

In [None]:
# We use == to compare whether two items are equal (not 3 = 3)
3 == 3

In [None]:
x = 14
y = 3

In [None]:
# we can compare whether a value is between two values
12 < x < 18

In [None]:
# we can also do mathematical operations between logical comparisons
12 < x-y < 18

In [None]:
# we can use the `and` keyword to combine multiple logical statements 
x > 10 and y > 5

In [None]:
# we can also use the `or` keyword to combine multiple logical statements 
x > 10 or y > 5

In [None]:
# We can also compare strings
"my string" == "my string"

In [None]:
# Stings compare alphabetically
"cats" < "dogs"

In [None]:
# Shorter words occur earlier than longer words that have matching letters
"cat" < "catastrophe"

## Comparisons with arrays

We can do comparisons across arrays as well. 

In [None]:
pets = make_array('cat', 'dog', 'cat', 'cat', 'dog', 'rabbit')
pets

In [None]:
# find all the dogs
pets == 'dog'

In [None]:
# True's are treated as one's and False's are treated as 0's 
True == 1

In [None]:
# Since True's are treated as one's and False's are treated as 0's we can sum Boolean arrays
# 0 + 1 + 0 + 0 + 1 + 0
sum(make_array(False, True, False, False, True, False))

In [None]:
# counting how many dogs there are by summing the number of Trues
sum(pets == 'dog')

In [None]:
# we can alternatively use the np.count_nonzero() function to count Trues
np.count_nonzero(pets == 'dog')

In [None]:
# we can use great than comparisons with arrays of strings as well
pets > 'cat'

In [None]:
sum(pets > 'cat')

## Conditional Statements 

In [None]:
def year_from_semesters(x):
    if x <= 0:
        return 'Not a valid input'
    elif x <= 2:
        return 'First Year'
    elif x <= 4:
        return 'Sophomore'
    elif x <= 6:
        return 'Junior'
    elif x <= 8:
        return 'Senior'
    else:
        return "NA"

year_from_semesters(-15.6)

In [None]:
year_from_semesters(5)

In [None]:
year_from_semesters(9001)