In [1]:
from datascience import *
import numpy as np

## Columns of Tables are Arrays ##

In [2]:
nba = Table.read_table('nba_salaries.csv').where('season', 2020).drop('season')
nba.show(5)

rank,name,position,team,salary
1,Stephen Curry,G,Golden State Warriors,40231758
2,Russell Westbrook,G,Houston Rockets,38506482
3,Chris Paul,G,Oklahoma City Thunder,38506482
4,Kevin Durant,F,Houston Rockets,38199000
5,James Harden,G,Houston Rockets,38199000


In [3]:
nba.num_rows

520

In [4]:
nba.num_columns

5

In [5]:
nba.labels

('rank', 'name', 'position', 'team', 'salary')

In [6]:
warriors = nba.where('team', 'Golden State Warriors')
warriors.show(5)

rank,name,position,team,salary
1,Stephen Curry,G,Golden State Warriors,40231758
28,Andrew Wiggins,F,Golden State Warriors,27504630
60,Draymond Green,F,Golden State Warriors,18539130
221,Kevon Looney,F,Golden State Warriors,4464226
329,Jordan Poole,G,Golden State Warriors,1964760


In [7]:
warriors.relabeled('salary', '$')

rank,name,position,team,$
1,Stephen Curry,G,Golden State Warriors,40231758
28,Andrew Wiggins,F,Golden State Warriors,27504630
60,Draymond Green,F,Golden State Warriors,18539130
221,Kevon Looney,F,Golden State Warriors,4464226
329,Jordan Poole,G,Golden State Warriors,1964760
335,Omari Rasulala Spellman,F,Golden State Warriors,1897800
346,Dragan Bender,F,Golden State Warriors,1678854
482,Eric Paschall,F,Golden State Warriors,898310
485,Alen Smailagic,PF,Golden State Warriors,898310
493,Damion Lee,SG,Golden State Warriors,842327


In [8]:
warriors

rank,name,position,team,salary
1,Stephen Curry,G,Golden State Warriors,40231758
28,Andrew Wiggins,F,Golden State Warriors,27504630
60,Draymond Green,F,Golden State Warriors,18539130
221,Kevon Looney,F,Golden State Warriors,4464226
329,Jordan Poole,G,Golden State Warriors,1964760
335,Omari Rasulala Spellman,F,Golden State Warriors,1897800
346,Dragan Bender,F,Golden State Warriors,1678854
482,Eric Paschall,F,Golden State Warriors,898310
485,Alen Smailagic,PF,Golden State Warriors,898310
493,Damion Lee,SG,Golden State Warriors,842327


In [9]:
warriors = nba.where('team', 'Golden State Warriors').drop('team').relabeled('salary', '$')
warriors.show(3)

rank,name,position,$
1,Stephen Curry,G,40231758
28,Andrew Wiggins,F,27504630
60,Draymond Green,F,18539130


In [10]:
warriors.select('$')

$
40231758
27504630
18539130
4464226
1964760
1897800
1678854
898310
898310
842327


In [11]:
type(warriors.select('$'))

datascience.tables.Table

In [12]:
warriors.select(3)

$
40231758
27504630
18539130
4464226
1964760
1897800
1678854
898310
898310
842327


In [13]:
warriors.select(3).show(5)

$
40231758
27504630
18539130
4464226
1964760


In [14]:
warriors.column('$')

array([40231758, 27504630, 18539130,  4464226,  1964760,  1897800,
        1678854,   898310,   898310,   842327,   654469,   350189,   350189])

In [15]:
warriors.column(3)

array([40231758, 27504630, 18539130,  4464226,  1964760,  1897800,
        1678854,   898310,   898310,   842327,   654469,   350189,   350189])

In [16]:
type(warriors.column('$'))

numpy.ndarray

In [17]:
np.average(warriors.column('$'))

7713457.846153846

In [18]:
suns = nba.where('team', 'Phoenix Suns').drop('team').relabeled('salary', '$')

In [19]:
np.average(warriors.column('$')) - np.average(suns.column('$'))

1642180.623931624

In [20]:
np.sum(warriors.column('$')) - np.sum(suns.column('$'))

-9008038

In [21]:
warriors.num_rows

13

In [22]:
suns.num_rows

18

## Ranges ##

In [23]:
make_array(0, 1, 2, 3, 4, 5, 6)

array([0, 1, 2, 3, 4, 5, 6])

In [24]:
np.arange(7)

array([0, 1, 2, 3, 4, 5, 6])

In [25]:
np.arange(5, 11)

array([ 5,  6,  7,  8,  9, 10])

In [26]:
np.arange(0, 20, 2)

array([ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18])

In [27]:
np.arange(0, 21, 2)

array([ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18, 20])

In [28]:
np.arange(0, 1, 0.1)

array([ 0. ,  0.1,  0.2,  0.3,  0.4,  0.5,  0.6,  0.7,  0.8,  0.9])

In [29]:
warriors.take(0)

rank,name,position,$
1,Stephen Curry,G,40231758


In [30]:
warriors.take(0, 1, 2)

rank,name,position,$
1,Stephen Curry,G,40231758
28,Andrew Wiggins,F,27504630
60,Draymond Green,F,18539130


In [31]:
warriors.take(np.arange(3))

rank,name,position,$
1,Stephen Curry,G,40231758
28,Andrew Wiggins,F,27504630
60,Draymond Green,F,18539130


In [None]:
np.sum(warriors.take(np.arange(5)).column('$')) # sum the first five salaries of the warriors

92704504

In [None]:
np.sum(suns.take(np.arange(5)).column('$')) # sum the first five salaries of the suns

76137832

(slides)

# Ways to Create a Table #

## Creating a Table from Scratch ##

In [46]:
streets = make_array('Cleveland', 'Wilder', 'Moore', 'Prior')
streets

array(['Cleveland', 'Wilder', 'Moore', 'Prior'],
      dtype='<U9')

In [47]:
Table()

In [48]:
merriam_park = Table().with_column('Streets', streets)
merriam_park

Streets
Cleveland
Wilder
Moore
Prior


In [49]:
merriam_park.with_column('Blocks from campus', np.arange(4))

Streets,Blocks from campus
Cleveland,0
Wilder,1
Moore,2
Prior,3


In [None]:
# eternal reminder.. the table is unchanged until we reassign it
merriam_park

Streets
Cleveland
Wilder
Moore
Prior


In [51]:
merriam_park = merriam_park.with_column('Blocks from campus', np.arange(4))
merriam_park

Streets,Blocks from campus
Cleveland,0
Wilder,1
Moore,2
Prior,3


In [None]:
# You can add multiple columns at once
merriam_park = merriam_park.with_columns(
    'Time to get there', make_array(1, 3, 5, 7),
    'City', make_array('St. Paul', 'St. Paul', 'St. Paul', 'St. Paul')
)
merriam_park

Streets,Blocks from campus,Time to get there,City
Cleveland,0,1,St. Paul
Wilder,1,3,St. Paul
Moore,2,5,St. Paul
Prior,3,7,St. Paul


In [None]:
# Use .select() to reorder columns if you want
merriam_park.select(2, 1, 0)

Time to get there,Blocks from campus,Streets
1,0,Cleveland
3,1,Wilder
5,2,Moore
7,3,Prior


(slides)

## Reading a Table from a File  ##

In [53]:
du_bois = Table.read_table('du_bois.csv')
du_bois

CLASS,ACTUAL AVERAGE,RENT,FOOD,CLOTHES,TAXES,OTHER,STATUS
100-200,139.1,0.19,0.43,0.28,0.001,0.099,POOR
200-300,249.45,0.22,0.47,0.23,0.04,0.04,POOR
300-400,335.66,0.23,0.43,0.18,0.045,0.115,FAIR
400-500,433.82,0.18,0.37,0.15,0.055,0.245,FAIR
500-750,547.0,0.13,0.31,0.17,0.05,0.34,COMFORTABLE
750-1000,880.0,0.0,0.37,0.19,0.08,0.36,COMFORTABLE
1000 and over,1125.0,0.0,0.29,0.16,0.045,0.505,WELL-TO-DO


In [54]:
du_bois.column('ACTUAL AVERAGE')

array([  139.1 ,   249.45,   335.66,   433.82,   547.  ,   880.  ,  1125.  ])

In [55]:
du_bois.column('FOOD')

array([ 0.43,  0.47,  0.43,  0.37,  0.31,  0.37,  0.29])

In [56]:
du_bois.column('ACTUAL AVERAGE') * du_bois.column('FOOD')

array([  59.813 ,  117.2415,  144.3338,  160.5134,  169.57  ,  325.6   ,
        326.25  ])

In [72]:
food_dollars = du_bois.column('ACTUAL AVERAGE') * du_bois.column('FOOD')
du_bois = du_bois.with_column('Food $', food_dollars)
du_bois

CLASS,ACTUAL AVERAGE,RENT,FOOD,CLOTHES,TAXES,OTHER,STATUS,Food $
100-200,139.1,0.19,0.43,0.28,0.001,0.099,POOR,59.813
200-300,249.45,0.22,0.47,0.23,0.04,0.04,POOR,117.241
300-400,335.66,0.23,0.43,0.18,0.045,0.115,FAIR,144.334
400-500,433.82,0.18,0.37,0.15,0.055,0.245,FAIR,160.513
500-750,547.0,0.13,0.31,0.17,0.05,0.34,COMFORTABLE,169.57
750-1000,880.0,0.0,0.37,0.19,0.08,0.36,COMFORTABLE,325.6
1000 and over,1125.0,0.0,0.29,0.16,0.045,0.505,WELL-TO-DO,326.25


In [59]:
np.max(du_bois.column('Food $'))

326.25

In [60]:
du_bois.with_column(
    'Fraction of well-to-do food $', 
    du_bois.column('Food $') / np.max(du_bois.column('Food $')))

CLASS,ACTUAL AVERAGE,RENT,FOOD,CLOTHES,TAXES,OTHER,STATUS,Food $,Fraction of well-to-do food $
100-200,139.1,0.19,0.43,0.28,0.001,0.099,POOR,59.813,0.183335
200-300,249.45,0.22,0.47,0.23,0.04,0.04,POOR,117.241,0.359361
300-400,335.66,0.23,0.43,0.18,0.045,0.115,FAIR,144.334,0.442402
400-500,433.82,0.18,0.37,0.15,0.055,0.245,FAIR,160.513,0.491995
500-750,547.0,0.13,0.31,0.17,0.05,0.34,COMFORTABLE,169.57,0.519755
750-1000,880.0,0.0,0.37,0.19,0.08,0.36,COMFORTABLE,325.6,0.998008
1000 and over,1125.0,0.0,0.29,0.16,0.045,0.505,WELL-TO-DO,326.25,1.0


# Practice question
Use the table functions we learned this week to find the income bracket (“class”) that spent the highest percentage of their income on rent.

In [74]:
du_bois.sort('RENT', descending=True).column('CLASS').item(0)

'300-400'

(slides)

## Where Method

In [None]:
# We already saw .where(), e.g.
# suns = nba.where('team', 'Phoenix Suns')
# This usage is taking rows where team == 'Phoenix Suns'
# But you can also take rows using more complicated logic by using "predicates" are.[something]
# for example:

In [None]:
nba = Table.read_table('nba_salaries.csv').where('season', 2025).drop('season') # let's look at the 2025 season now
nba.where('salary', are.above(10000000))

rank,name,position,team,salary
1,Stephen Curry,G,Golden State Warriors,55761216
2,Joel Embiid,C,Philadelphia 76ers,51415938
3,Nikola Jokic,C,Denver Nuggets,51415938
4,Kevin Durant,F,Phoenix Suns,51179021
5,Bradley Beal,G,Phoenix Suns,50203930
6,Kawhi Leonard,F,LA Clippers,49350000
7,Devin Booker,G,Phoenix Suns,49205800
8,Paul George,F,Philadelphia 76ers,49205800
9,Karl-Anthony Towns,C,New York Knicks,49205800
10,Jaylen Brown,G,Boston Celtics,49205800


In [77]:
nba.where('salary', are.between(10000000, 20000000))

rank,name,position,team,salary
86,Myles Turner,C,Indiana Pacers,19928500
87,Jakob Poeltl,C,Toronto Raptors,19500000
88,Duncan Robinson,F,Miami Heat,19406000
89,Norman Powell,G,LA Clippers,19241379
90,Bojan Bogdanovic,SF,Brooklyn Nets,19032850
91,Keldon Johnson,F,San Antonio Spurs,19000000
92,D'Angelo Russell,G,Brooklyn Nets,18692307
93,Patrick Williams,F,Chicago Bulls,18500000
94,Collin Sexton,G,Utah Jazz,18350000
95,Josh Hart,G,New York Knicks,18144000


In [93]:
wolves = nba.where('team', are.containing('wolves'))
wolves

rank,name,position,team,salary
16,Rudy Gobert,C,Minnesota Timberwolves,43827587
22,Anthony Edwards,G,Minnesota Timberwolves,42176400
45,Julius Randle,F,Minnesota Timberwolves,33073920
70,Jaden McDaniels,F,Minnesota Timberwolves,23017242
117,Naz Reid,C,Minnesota Timberwolves,13986432
144,Donte DiVincenzo,G,Minnesota Timberwolves,11445000
164,Mike Conley,G,Minnesota Timberwolves,9975962
222,Rob Dillingham,G,Minnesota Timberwolves,6262920
264,Nickeil Alexander-Walker,G,Minnesota Timberwolves,4312500
269,Bones Hyland,G,Minnesota Timberwolves,4158439


## Print and string formatting

In [87]:
1 + 2
3 + 4

7

In [88]:
print(1 + 2)
print(3 + 4)

3
7


In [86]:
'Go Wolves'

'Go Wolves'

In [89]:
print('Go Wolves')

Go Wolves


In [90]:
state_name = 'Minnesota'

In [91]:
'{state_name} is great'

'{state_name} is great'

In [92]:
f'{state_name} is great'

'Minnesota is great'

In [96]:
print(f'{state_name} is great')
print(f'In 2025, the highest paid basketball player in {state_name} was {wolves.column("name").item(0)}')

Minnesota is great
In 2025, the highest paid basketball player in Minnesota was Rudy Gobert
