## tuple
- fixed-length
- immutable

In [1]:
tup = 4, 5, 6
tup

(4, 5, 6)

In [2]:
(4, None, 'foo') + (6, 0) + ('bar',)

(4, None, 'foo', 6, 0, 'bar')

In [3]:
('foo', 'bar') * 2

('foo', 'bar', 'foo', 'bar')

In [4]:
(4, 3, 2) + (1, 2, 6)

(4, 3, 2, 1, 2, 6)

In [5]:
(4, 3, 2) * 2

(4, 3, 2, 4, 3, 2)

In [6]:
tup[1]

5

Unpacking tuples

In [7]:
tup = (4, 5, 6)
a, b, c = tup
b

5

Iteration

In [8]:
seq = [( 1, 2, 3), ( 4, 5, 6), ( 7, 8, 9)]

for a, b, c in seq:
    print('a={0}, b={1}, c={2}'.format(a, b, c))

a=1, b=2, c=3
a=4, b=5, c=6
a=7, b=8, c=9


Tuple methods

In [9]:
tup = (1, 2, 2, 2, 3, 4, 5, 6)
tup.count(2)

3

In [10]:
tup[1]

2

zip

In [11]:
tup1 = tuple(['foo', 'bar'])
tup2 = tuple(['one', 'two'])
tuple(zip(tup1, tup2))

(('foo', 'one'), ('bar', 'two'))

## list
- variable-length
- can be modified in place
- `list`, `[]`

Convert a tuple to list

In [12]:
tup = ('foo', 'bar')
list(tup)

['foo', 'bar']

In [13]:
type(tup)

tuple

In [14]:
type(list(tup))

list

Adding elements

In [15]:
a_list = list(tup)
a_list

['foo', 'bar']

In [16]:
a_list.append('new')
a_list

['foo', 'bar', 'new']

In [17]:
a_list.insert(1, 'red')
a_list

['foo', 'red', 'bar', 'new']

Removing elements

In [18]:
a_list.pop(2)

'bar'

In [19]:
a_list

['foo', 'red', 'new']

In [20]:
a_list.remove('foo')
a_list

['red', 'new']

Check if a list contains a value using the `in` keyword

In [21]:
'red' in a_list

True

In [22]:
'red' not in a_list

False

Concatenating & combining lists

In [23]:
[4, None, 'bar'] + ['foo', 2, (6, 2)]

[4, None, 'bar', 'foo', 2, (6, 2)]

In [24]:
[4, None, 'bar'] * 2

[4, None, 'bar', 4, None, 'bar']

In [25]:
x = [4, None, 'bar']
x.extend(['new', 'bar']) # faster than '+'
x

[4, None, 'bar', 'new', 'bar']

In [26]:
[4, 3, 2] + [1, 2, 6]

[4, 3, 2, 1, 2, 6]

In [27]:
[4, 3, 2] * 2

[4, 3, 2, 4, 3, 2]

Sorting

In [28]:
a = [7, 4, 8, 2, 5]
a.sort()
a

[2, 4, 5, 7, 8]

In [29]:
a = [7, 4, 8, 2, 5]
sorted(a)

[2, 4, 5, 7, 8]

Slicing

In [30]:
seq = [7, 1, 3, 6, 2, 8, 9, 12, 16]

In [31]:
seq[1:3]

[1, 3]

In [32]:
seq[1:]

[1, 3, 6, 2, 8, 9, 12, 16]

In [33]:
seq[:3]

[7, 1, 3]

In [34]:
seq[-3:-1]

[9, 12]

In [35]:
seq[-3:]

[9, 12, 16]

In [36]:
seq[:-2]

[7, 1, 3, 6, 2, 8, 9]

In [37]:
seq[::-1]

[16, 12, 9, 8, 2, 6, 3, 1, 7]

In [38]:
seq[::2]

[7, 3, 2, 9, 16]

Iteration

In [39]:
for i in seq:
    print(i)

7
1
3
6
2
8
9
12
16


In [40]:
for i, c in enumerate(seq):
    print(i, c)

0 7
1 1
2 3
3 6
4 2
5 8
6 9
7 12
8 16


list comprehension

`[expr for val in collection if condition]`

In [41]:
strings = ['f', 'bar', 'car', 'dove']
[string.upper() for string in strings]

['F', 'BAR', 'CAR', 'DOVE']

In [42]:
[string.upper() for string in strings if len(string) > 2]

['BAR', 'CAR', 'DOVE']

Zip

In [43]:
seq1 = ['foo', 'bar']
seq2 = ['one', 'two']
list(zip(seq1, seq2))

[('foo', 'one'), ('bar', 'two')]

In [44]:
# simultaneously iterate over multiple sequences
for i, (a, b) in enumerate(zip(seq1, seq2)):
    print('{0}: {1}, {2}'.format(i, a, b))

0: foo, one
1: bar, two


In [45]:
# unzip
pitchers = [('Nolan', 'Ryan'), ('Schilling', 'Curt')]
first_name, last_name = zip(*pitchers)
print('first_name:', first_name)
print('last_name:', last_name)

first_name: ('Nolan', 'Schilling')
last_name: ('Ryan', 'Curt')


Reversed

In [46]:
list(reversed(range(10)))

[9, 8, 7, 6, 5, 4, 3, 2, 1, 0]

## dict

In [47]:
d1 = {'a': 1, 'b': 2}
d1

{'a': 1, 'b': 2}

In [48]:
d1['b']

2

In [49]:
'a' in d1

True

Add a new key

In [50]:
d1[7] = 'seven'
d1

{'a': 1, 'b': 2, 7: 'seven'}

Update value & add new key

In [51]:
d1.update({'b':'new b value', 'c':12})
d1

{'a': 1, 'b': 'new b value', 7: 'seven', 'c': 12}

Delete key

In [52]:
d1[5] = 'some value'
d1['dummy'] = 'another value'
d1

{'a': 1,
 'b': 'new b value',
 7: 'seven',
 'c': 12,
 5: 'some value',
 'dummy': 'another value'}

In [53]:
del d1[5]
d1

{'a': 1, 'b': 'new b value', 7: 'seven', 'c': 12, 'dummy': 'another value'}

In [54]:
ret = d1.pop('dummy')
print('ret:', ret)
print(d1)

ret: another value
{'a': 1, 'b': 'new b value', 7: 'seven', 'c': 12}


Creating dicts from sequences

In [55]:
map = {}
for key, value in zip(range(5), reversed(range(5))):
    map[key] = value

map

{0: 4, 1: 3, 2: 2, 3: 1, 4: 0}

In [56]:
dict(zip(range(5), reversed(range(5))))

{0: 4, 1: 3, 2: 2, 3: 1, 4: 0}

Default values

In [57]:
words = ['apple', 'bar', 'book', 'foo']
by_letter = {}

for word in words:
    letter = word[0]
    if letter not in by_letter:
        by_letter[letter] = [word]
    else:
        by_letter[letter].append(word)

by_letter

{'a': ['apple'], 'b': ['bar', 'book'], 'f': ['foo']}

In [58]:
words = ['apple', 'bar', 'book', 'foo']
by_letter_2 = {}

for word in words:
    letter = word[0]
    by_letter_2.setdefault(letter, []).append(word)

by_letter_2

{'a': ['apple'], 'b': ['bar', 'book'], 'f': ['foo']}

Iteration

In [59]:
europe = {'Spain': 'Madrid',
          'France': 'Paris',
          'Germany': 'Berlin',
          'Norway': 'Oslo',
          'Italy': 'Rome',
          'Poland': 'Warsaw',
          'Australia': 'Canberra'}

for country, city in europe.items():
    print('The capital of {0} is {1}.'.format(country, city))

The capital of Spain is Madrid.
The capital of France is Paris.
The capital of Germany is Berlin.
The capital of Norway is Oslo.
The capital of Italy is Rome.
The capital of Poland is Warsaw.
The capital of Australia is Canberra.


dict comprehension

`{key-expr : value-expr for value in collection if condition}`

In [60]:
words = ['apple', 'banana', 'candy']
{word[0]:word for word in words}

{'a': 'apple', 'b': 'banana', 'c': 'candy'}

## set
- unordered collection of unique elements
- `set()`
- `{}`

In [61]:
set([2, 2, 2, 3, 3, 1, 1])

{1, 2, 3}

In [62]:
{2, 2, 2, 3, 1, 1, 3}

{1, 2, 3}

Set operations

In [63]:
a = {1, 2, 3}
b = {2, 3, 4, 5, 6}

In [64]:
# union
print(a.union(b))
print(a | b)

{1, 2, 3, 4, 5, 6}
{1, 2, 3, 4, 5, 6}


In [65]:
# intersection
print(a.intersection(b))
print(a & b)

{2, 3}
{2, 3}


In [66]:
# subset
a_set = {1, 2, 3, 4, 5}
{1, 2, 3}.issubset(a_set)

True

In [67]:
a_set.issuperset({1, 2, 3})

True

set comprehension

`{expr for value in collection if condition}`

In [68]:
strings = ['f', 'bar', 'car', 'dove']
{len(string) for string in strings}

{1, 3, 4}

## numpy

In [69]:
import numpy as np
rdm_ary = np.random.randn(2, 3)
rdm_ary

array([[0.00989472, 0.13852316, 1.99090893],
       [0.93793587, 0.65680151, 1.93083945]])

In [70]:
rdm_ary * 10

array([[ 0.09894718,  1.38523157, 19.90908933],
       [ 9.37935872,  6.56801512, 19.30839446]])

In [71]:
rdm_ary.ndim

2

In [72]:
rdm_ary.shape

(2, 3)

In [73]:
# zero
np.zeros((2, 3))

array([[0., 0., 0.],
       [0., 0., 0.]])

In [74]:
# arange
np.arange(5)

array([0, 1, 2, 3, 4])

### dtype

In [75]:
np.array([1, 2, 3], dtype=np.float64)

array([1., 2., 3.])

In [76]:
np.array([1, 2, 3], dtype=np.int32)

array([1, 2, 3], dtype=int32)

### arithmetic

In [77]:
arr = np.array([[1, 2, 3], [4, 5, 6]])
arr

array([[1, 2, 3],
       [4, 5, 6]])

In [78]:
arr - arr

array([[0, 0, 0],
       [0, 0, 0]])

In [79]:
arr * arr

array([[ 1,  4,  9],
       [16, 25, 36]])

### boolean arrays

In [80]:
bools = np.array([False, True, True])
bools

array([False,  True,  True])

In [81]:
bools.any()

True

In [82]:
bools.all()

False

### sorting

In [83]:
arr = np.random.randn(10)
arr

array([ 1.61267712, -0.12920603, -0.03927641,  0.67087274, -0.95567344,
       -0.09618218,  0.28395417, -0.39972865, -0.69805524, -1.11392764])

In [84]:
arr.sort() # in place
arr

array([-1.11392764, -0.95567344, -0.69805524, -0.39972865, -0.12920603,
       -0.09618218, -0.03927641,  0.28395417,  0.67087274,  1.61267712])

### unique

In [85]:
names = np.array(['Tom', 'Jerry', 'Amy', 'Jerry'])
names

array(['Tom', 'Jerry', 'Amy', 'Jerry'], dtype='<U5')

In [86]:
np.unique(names)

array(['Amy', 'Jerry', 'Tom'], dtype='<U5')

In [87]:
sorted(set(names))

['Amy', 'Jerry', 'Tom']

## pandas series

Creation

In [88]:
import pandas as pd
obj = pd.Series([4, -7, 2])
obj

0    4
1   -7
2    2
dtype: int64

In [89]:
mdata = {'Ohio': 35000, 'Texas': 70000, 'Utah': 5000}
obj2 = pd.Series(mdata)
obj2

Ohio     35000
Texas    70000
Utah      5000
dtype: int64

In [90]:
states = ['Califonia', 'Ohio', 'Texas', 'Utah']
obj3 = pd.Series(mdata, index=states)
obj3

Califonia        NaN
Ohio         35000.0
Texas        70000.0
Utah          5000.0
dtype: float64

arithmetic

In [91]:
obj[[1]] * 2

1   -14
dtype: int64

In [92]:
obj * 2

0     8
1   -14
2     4
dtype: int64

In [93]:
obj2 + obj3

Califonia         NaN
Ohio          70000.0
Texas        140000.0
Utah          10000.0
dtype: float64

`in`

In [94]:
-7 in obj

False

In [95]:
0 in obj

True

Check `null`

In [96]:
pd.isnull(obj3)

Califonia     True
Ohio         False
Texas        False
Utah         False
dtype: bool

In [97]:
pd.notnull(obj3)

Califonia    False
Ohio          True
Texas         True
Utah          True
dtype: bool

### drop

In [98]:
new_obj = obj.drop(1)
new_obj

0    4
2    2
dtype: int64

### indexing, selection, filtering

In [99]:
obj = pd.Series([4, -7, 2], index=['a', 'b', 'c'])
obj

a    4
b   -7
c    2
dtype: int64

In [100]:
obj['b']

-7

In [101]:
obj[1]

-7

In [102]:
obj[[1]]

b   -7
dtype: int64

In [103]:
obj[1:2]

b   -7
dtype: int64

In [104]:
obj[0:]

a    4
b   -7
c    2
dtype: int64

In [105]:
obj[:2]

a    4
b   -7
dtype: int64

In [106]:
# slicing with labels behaves differently than normal Python slicing in that the end-point is inclusive
obj['b':'c']

b   -7
c    2
dtype: int64

In [107]:
obj[[1, 2, 0]]

b   -7
c    2
a    4
dtype: int64

## pandas dataframe

In [108]:
data = {'state': ['Ohio', 'Ohio', 'Nevada', 'Nevada'],
        'Year':[2000, 2001, 2000, 2001],
        'pop':[1.5, 1.7, 2.4, 2.9]}
df = pd.DataFrame(data)
df

Unnamed: 0,state,Year,pop
0,Ohio,2000,1.5
1,Ohio,2001,1.7
2,Nevada,2000,2.4
3,Nevada,2001,2.9


In [109]:
df[['state']]

Unnamed: 0,state
0,Ohio
1,Ohio
2,Nevada
3,Nevada


### drop

In [110]:
df.drop(2) # row

Unnamed: 0,state,Year,pop
0,Ohio,2000,1.5
1,Ohio,2001,1.7
3,Nevada,2001,2.9


In [111]:
df.drop('pop', axis='columns')

Unnamed: 0,state,Year
0,Ohio,2000
1,Ohio,2001
2,Nevada,2000
3,Nevada,2001


In [112]:
df.drop(columns='pop')

Unnamed: 0,state,Year
0,Ohio,2000
1,Ohio,2001
2,Nevada,2000
3,Nevada,2001


### indexing, selection, filtering

In [113]:
df = pd.DataFrame(np.arange(16).reshape(4, 4),
                  index=['Ohio', 'Colorado', 'Utah', 'New York'],
                  columns=['one', 'two', 'three', 'four'])
df

Unnamed: 0,one,two,three,four
Ohio,0,1,2,3
Colorado,4,5,6,7
Utah,8,9,10,11
New York,12,13,14,15


In [114]:
df['two']

Ohio         1
Colorado     5
Utah         9
New York    13
Name: two, dtype: int64

In [115]:
df[['two']]

Unnamed: 0,two
Ohio,1
Colorado,5
Utah,9
New York,13


In [116]:
df[:2]

Unnamed: 0,one,two,three,four
Ohio,0,1,2,3
Colorado,4,5,6,7


`loc` & `iloc`

In [117]:
df.loc['Ohio']

one      0
two      1
three    2
four     3
Name: Ohio, dtype: int64

In [118]:
df.loc['Ohio', 'two']

1

In [119]:
df.loc[:'Utah', 'two']

Ohio        1
Colorado    5
Utah        9
Name: two, dtype: int64

In [120]:
df.iloc[0]

one      0
two      1
three    2
four     3
Name: Ohio, dtype: int64

In [121]:
df.iloc[0, 1]

1

In [122]:
df.iloc[:, :2]

Unnamed: 0,one,two
Ohio,0,1
Colorado,4,5
Utah,8,9
New York,12,13
