In [1]:
from datascience import *
import numpy as np

%matplotlib inline
import matplotlib.pyplot as plots
plots.style.use('fivethirtyeight')

## Ranges ##

In [2]:
np.arange(5)

array([0, 1, 2, 3, 4])

In [3]:
np.arange(7, 25)

array([ 7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
       24])

In [4]:
np.arange(5, 25, 10)

array([ 5, 15])

In [5]:
np.arange(5, 26, 10)

array([ 5, 15, 25])

In [6]:
np.arange(5, 25.01, 10)

array([ 5., 15., 25.])

## Creating a Table from Scratch ##

In [7]:
Table()

In [8]:
streets = make_array('Crown', 'George', 'Orange', 'State')

In [9]:
streets

array(['Crown', 'George', 'Orange', 'State'], dtype='<U6')

In [10]:
Table().with_column('Street name', streets)

Street name
Crown
George
Orange
State


In [11]:
southside = Table().with_column('Street name', streets)

In [12]:
# creates a new table with the specified column
southside.with_column('Blocks away from campus', np.arange(4))

Street name,Blocks away from campus
Crown,0
George,1
Orange,2
State,3


In [13]:
southside

Street name
Crown
George
Orange
State


In [14]:
southside = southside.with_column('Blocks away from campus', np.arange(4))

In [15]:
southside

Street name,Blocks away from campus
Crown,0
George,1
Orange,2
State,3


## Reading a Table from a File  ##

In [16]:
minard = Table.read_table('minard.csv')

In [17]:
minard

Longitude,Latitude,City,Direction,Survivors
32.0,54.8,Smolensk,Advance,145000
33.2,54.9,Dorogobouge,Advance,140000
34.4,55.5,Chjat,Advance,127100
37.6,55.8,Moscou,Advance,100000
34.3,55.2,Wixma,Retreat,55000
32.0,54.6,Smolensk,Retreat,24000
30.4,54.4,Orscha,Retreat,20000
26.8,54.3,Moiodexno,Retreat,12000


## Selecting data in a column ##

In [18]:
minard.select('Survivors')

Survivors
145000
140000
127100
100000
55000
24000
20000
12000


In [19]:
minard.column('Survivors')

array([145000, 140000, 127100, 100000,  55000,  24000,  20000,  12000])

In [20]:
minard.column('Survivors').item(0)

145000

## Extending a table with a new column ##

In [21]:
initial_count = minard.column('Survivors').item(0)
initial_count

145000

In [22]:
proportion_surviving = minard.column('Survivors')/initial_count
proportion_surviving

array([1.        , 0.96551724, 0.87655172, 0.68965517, 0.37931034,
       0.16551724, 0.13793103, 0.08275862])

In [23]:
minard = minard.with_column('Percent surviving', proportion_surviving)

In [24]:
minard

Longitude,Latitude,City,Direction,Survivors,Percent surviving
32.0,54.8,Smolensk,Advance,145000,1.0
33.2,54.9,Dorogobouge,Advance,140000,0.965517
34.4,55.5,Chjat,Advance,127100,0.876552
37.6,55.8,Moscou,Advance,100000,0.689655
34.3,55.2,Wixma,Retreat,55000,0.37931
32.0,54.6,Smolensk,Retreat,24000,0.165517
30.4,54.4,Orscha,Retreat,20000,0.137931
26.8,54.3,Moiodexno,Retreat,12000,0.0827586


In [25]:
minard.set_format('Percent surviving', PercentFormatter)

Longitude,Latitude,City,Direction,Survivors,Percent surviving
32.0,54.8,Smolensk,Advance,145000,100.00%
33.2,54.9,Dorogobouge,Advance,140000,96.55%
34.4,55.5,Chjat,Advance,127100,87.66%
37.6,55.8,Moscou,Advance,100000,68.97%
34.3,55.2,Wixma,Retreat,55000,37.93%
32.0,54.6,Smolensk,Retreat,24000,16.55%
30.4,54.4,Orscha,Retreat,20000,13.79%
26.8,54.3,Moiodexno,Retreat,12000,8.28%


## Working with Columns ##

In [26]:
movies = Table.read_table('movies_by_year_with_ticket_price.csv')

In [27]:
movies.show()

Year,Average Ticket Price,Total Gross,Number of Movies,#1 Movie
2015,8.43,11128.5,702,Star Wars: The Force Awakens
2014,8.17,10360.8,702,American Sniper
2013,8.13,10923.6,688,Catching Fire
2012,7.96,10837.4,667,The Avengers
2011,7.93,10174.3,602,Harry Potter / Deathly Hallows (P2)
2010,7.89,10565.6,536,Toy Story 3
2009,7.5,10595.5,521,Avatar
2008,7.18,9630.7,608,The Dark Knight
2007,6.88,9663.8,631,Spider-Man 3
2006,6.55,9209.5,608,Dead Man's Chest


In [28]:
movies.labels

('Year', 'Average Ticket Price', 'Total Gross', 'Number of Movies', '#1 Movie')

In [29]:
movies.num_rows

36

In [30]:
number_of_tix = movies.column('Total Gross') * (10 ** 6) / movies.column('Average Ticket Price')

In [31]:
movies = movies.with_column('Number of tickets', number_of_tix)

In [32]:
movies

Year,Average Ticket Price,Total Gross,Number of Movies,#1 Movie,Number of tickets
2015,8.43,11128.5,702,Star Wars: The Force Awakens,1320110000.0
2014,8.17,10360.8,702,American Sniper,1268150000.0
2013,8.13,10923.6,688,Catching Fire,1343620000.0
2012,7.96,10837.4,667,The Avengers,1361480000.0
2011,7.93,10174.3,602,Harry Potter / Deathly Hallows (P2),1283010000.0
2010,7.89,10565.6,536,Toy Story 3,1339110000.0
2009,7.5,10595.5,521,Avatar,1412730000.0
2008,7.18,9630.7,608,The Dark Knight,1341320000.0
2007,6.88,9663.8,631,Spider-Man 3,1404620000.0
2006,6.55,9209.5,608,Dead Man's Chest,1406030000.0


In [None]:
movies.set_format(5, NumberFormatter)

In [None]:
movies.plot('Year', 'Number of tickets')

## Rows ##

In [None]:
movies.where('Year', are.between(2000, 2005))

In [None]:
movies.where('#1 Movie', are.equal_to('Avatar'))

In [None]:
movies.where('#1 Movie', 'Avatar')

In [None]:
movies.where('#1 Movie', are.containing('Harry Potter'))

In [None]:
movies.where('Number of Movies', are.below(450))

In [None]:
movies.where('Year', are.above(2010))

In [None]:
movies.take(3)

In [None]:
movies.take(np.arange(4))