In [None]:
from datascience import *
import numpy as np

%matplotlib inline
import matplotlib.pyplot as plots
plots.style.use('fivethirtyeight')

## Bikes

**Please run all cells before this cell, including the previous example cells and the import cell at the top of the notebook.**

In [None]:
trips = Table.read_table('trips_2.csv')
trips

In [None]:
# Remember that these bikes were used generally for 30 
# minutes or less: 30*60 = 1800 seconds

commute = trips.where('Duration', are.below(1800))
commute.hist('Duration')

In [None]:
commute.hist('Duration', bins=60, unit='second')

In [None]:
# what is the area of this peak here?  What percentage of all 
# the trips in the data does this represent?  

In [None]:
starts = commute.group('Start Station').sort('count', descending=True)
starts

In [None]:
commute.pivot('Start Station', 'End Station')

In [None]:
trips.show(3)

In [None]:
duration = commute.select(3, 6, 1)
duration

In [None]:
shortest = duration.group([0, 1], min)
shortest

In [None]:
from_cc = shortest.where(0, are.containing('Civic Center BART')).sort(2)
from_cc

## Maps

**Please run all cells before this cell, including the previous example cells and the import cell at the top of the notebook.**

In [None]:
stations = Table.read_table('station.csv')
stations

In [None]:
Marker.map_table(stations.select('lat', 'long', 'name'))

In [None]:
sf = stations.where('landmark', 'San Francisco')
Circle.map_table(sf.select('lat', 'long', 'name'), color='green', radius=150)

In [None]:
from_cc

In [None]:
stations.group('landmark')

In [None]:
colors = stations.group('landmark').with_column(
    'color', make_array('blue', 'red', 'green', 'orange', 'purple'))
colors

In [None]:
stations.join('landmark', colors)

In [None]:
map_table = stations.join('landmark', colors).select('lat', 'long', 'name', 'color')
Marker.map_table(map_table)

In [None]:
station_starts = stations.join('name', starts, 'Start Station')
station_starts

In [None]:
station_starts.select('lat', 'long', 'name').with_columns(
    'color', 'blue',
    'area', station_starts.column('count') * 1000
)

In [None]:
Circle.map_table(station_starts.select('lat', 'long', 'name').with_columns(
    'color', 'blue',
    'area', station_starts.column('count') 
))

## Combining Table Methods

In [None]:
duration

In [None]:
#the value of the average duration of a rental.


In [None]:
# the name of the station where most rentals ended.


In [None]:
# number of stations for which the average duration ending at that station was
# more than 10 minutes. 


## Drinks - More Table Review

In [None]:
drinks = Table(['Drink', 'Cafe', 'Price']).with_rows([
    ['Milk Tea', 'Tea One', 4],
    ['Espresso', 'Nefeli',  2],
    ['Coffee',    'Nefeli', 3],
    ['Espresso', "Abe's",   2]
])
drinks

In [None]:
discounts = Table().with_columns(
    'Coupon % off', make_array(5, 50, 25),
    'Location', make_array('Tea One', 'Nefeli', 'Tea One')
)
discounts

## Comparisons and Booleans

In [None]:
pop = ('http://www2.census.gov/programs-surveys/popest/datasets/2010-2015/national/asrh/nc-est2015-agesex-res.csv')
full_census_table = Table.read_table(pop)
partial = full_census_table.select('SEX', 'AGE', 'POPESTIMATE2010', 'POPESTIMATE2015')
us_pop = partial.relabeled(2, '2010').relabeled(3, '2015')
us_pop

In [None]:
us_pop.where('AGE', 70)

In [None]:
us_pop.column('2015') / us_pop.column('2010') 

In [None]:
# Let's do one more table with advanced where
duration

In [None]:
# The average duration of trips that started and ended at the same station
np.average(duration.column(2))

In [None]:
# The average duration of trips that started and ended at the same station
np.average(duration.where(duration.column(0) == duration.column(1)).column(2))

In [None]:
# The average duration of trips that started and ended at different stations
np.average(duration.where(duration.column(0) != duration.column(1)).column(2))

## Comparison and bool values

In [None]:
# comparing numbers 


In [None]:
# What about strings?


In [None]:
# Let's try writing a function



## Combining Bool Values

In [None]:
x = 3
y = 4

In [None]:
x < 2 or y > 5

In [None]:
x < 2 or y > 3

In [None]:
x < 2 and y > 3

Aggregating bool values

In [None]:
True + True

In [None]:
ages

In [None]:
ages.column(1)

In [None]:
sum(ages.column(1))

Predicates

In [None]:
# create a predicate for "still young" 

## Choice

In [None]:
two_groups = make_array('treatment', 'control')
np.random.choice(two_groups)