In [1]:
from datascience import *
import numpy as np

%matplotlib inline
import matplotlib.pyplot as plots
plots.style.use('fivethirtyeight')

## Arrays ##

Arrays are a data structure that holds a sequence of values of the same type. For example, a squence of all numbers, or a squence of all strings, etc. 

We can use use the `make_array` function from the `datascience` package to create what are called `ndarray` that are array implemented by the `NumPy` package. One can perform a range of operations on these arrays in a very efficient manner. 

In [None]:
make_array(1, 2, 3, 4)

In [None]:
my_array = make_array(5, 6, 7, 8)
my_array

In [None]:
len(my_array)

In [None]:
sum(my_array)

In [None]:
sum(my_array) / len(my_array)

In [None]:
my_array

In [None]:
my_array * 2

In [None]:
another_one = make_array(20, 30, 40, 50)

In [None]:
my_array + another_one

In [None]:
yet_another = make_array(1, 2, 3, 4, 5, 6)
my_array + yet_another

In [None]:
my_array

In [None]:
my_array.item(0)

## Ranges ##

Range functions allow one to create arrays of ordered sequences of numbers. We can use the `np.arange()` function to create NumPy ndarrays. 

In [2]:
np.arange(5)

array([0, 1, 2, 3, 4])

In [3]:
np.arange(7, 25)

array([ 7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
       24])

In [4]:
np.arange(5, 25, 10)

array([ 5, 15])

In [5]:
np.arange(5, 26, 10)

array([ 5, 15, 25])

In [6]:
np.arange(5, 25.01, 10)

array([ 5., 15., 25.])

## Tables ##

Tables stored structured data. We can use the `datascience` package to create `Table` objects that we can perform data manipulation operations on (the `Table` object is a simplified version of a Pandas DataFrame). 

Some methods we can perform on `Table` objects are:
- `tb.show(k)`: show the first k rows of the table
- `tb.select('col1', 'col2')`: select `col1` and `col2` from the table
- `tb.drop('col')`: remove `col` from the table
- `tb.sort('col')`: sort the rows in the table based on the values in `col`
- `tb.where('col', value)`: reduce the table to rows where `col` is equal to `value` 

These methods all return Table objects that have been modified based on the methods that have been called. 


Let's look at data on ice cream cones that is described in the class textbook. 

In [None]:
# Load the ice cream data. Each row represents one ice cream cone.
cones = Table.read_table('cones.csv')
cones

In [None]:
type(cones)

In [None]:
# Show the first 2 rows of the data
cones.show(2)

In [None]:
# select only the Flavor column
only_flavor = cones.select('Flavor')
only_flavor

In [None]:
# the original cones Table is not modified
cones

In [None]:
# select the Flavor and Price columns
cones.select('Flavor', 'Price')

In [None]:
# remove the Color column
no_color = cones.drop('Color')
no_color

In [None]:
# sort by price
cones.sort('Price')

In [None]:
# sort by price highest to loweset
cones.sort('Price', descending=True)

In [None]:
# select only the chocolate cones
cones.where('Flavor', 'chocolate')

In [None]:
# We can combine mulitple method called. Let's drop the color and then sort by price
cones.drop('Color').sort('Price', descending=True)

## Example: NBA Salaries ##

Let's look basketball (NBA) salaries from the 2015-2016 season. The data is originally from https://www.statcrunch.com/app/index.php?dataid=1843341


In [None]:
# NBA players, 2015-2016 season
nba = Table.read_table('nba_salaries.csv').relabeled(3, 'SALARY')

nba

In [None]:
# Let's get Stephen Curry's data
nba.where('PLAYER', 'Stephen Curry')

In [None]:
# Let's get data from the New York Knicks
knicks = nba.where('TEAM', 'New York Knicks')
knicks.show()

## Columns of Tables are Arrays ##

We can extract columns from a `Table` as either:

- A new `Table` with fewer columns using `tb.select()`
- An `ndarray` using `tb.column()` 

In [None]:
cones.select('Price')  # still a table

In [None]:
type(cones.select('Price'))

In [None]:
cones.column('Price') # an array

In [None]:
type(cones.column('Price'))

## Creating a Table from Scratch ##

We can also create tables from scratch using the `Tables()` method and then adding columns to the table using the `tb.with_colum("col_name", ndarray)` method. 

In [7]:
Table()

In [8]:
streets = make_array('Crown', 'George', 'Orange', 'State')

In [9]:
streets

array(['Crown', 'George', 'Orange', 'State'], dtype='<U6')

In [10]:
Table().with_column('Street name', streets)

Street name
Crown
George
Orange
State


In [11]:
southside = Table().with_column('Street name', streets)

In [12]:
# creates a new table with the specified column
southside.with_column('Blocks away from campus', np.arange(4))

Street name,Blocks away from campus
Crown,0
George,1
Orange,2
State,3


In [13]:
southside

Street name
Crown
George
Orange
State


In [14]:
southside = southside.with_column('Blocks away from campus', np.arange(4))

In [15]:
southside

Street name,Blocks away from campus
Crown,0
George,1
Orange,2
State,3
