# Python for Data Analysis - Workbook

### Preliminaries

In [5]:
import numpy as np
import pandas as pd

In [6]:
df = pd.read_csv('../Memorial Day DS Project/us_only_v1.0.csv')

## Built-in Data Structures, Functions, and Files

### Data Structures and sequences

#### Tuples

In [8]:
# a tup is a fixed length, immutable sequence of python obj's. They can be created like this:

tup = 4,5,6

In [9]:
# This one is nested
nested_tup = (4,5,6),(7,8)

In [11]:
# Any sequence or iterator (more on that later) can be converted to a tuple like this:

tuple([4,4,2])
tuple('string')

('s', 't', 'r', 'i', 'n', 'g')

In [12]:
# Just like any C, C++, or aa array, tuples can be accessed with thte [] notation
tup[0]

4

In [15]:
# You can concatenate tuples using the '+' symbol to produce longer tuples
tup + nested_tup

(4, 5, 6, (4, 5, 6), (7, 8))

In [16]:
# But using the '*' character ('*' means multiply) instead of the '+' character has the
# effect of creating that many copies of the tuple

tup * 4

(4, 5, 6, 4, 5, 6, 4, 5, 6, 4, 5, 6)

In [17]:
# Replicate a swap function as in e.g. C

tup = [4,5,6]
a, b = 1, 2

In [21]:
# Common use for variable unpacking is iterating over sequences or lists

seq = [1,2,3], [4,5,6], [7,8,9]

for a,b,c in seq:
    print('a={0} b={1}, c={2}'.format(a,b,c))

a=1 b=2, c=3
a=4 b=5, c=6
a=7 b=8, c=9


In [22]:
# Count the number of occurences of a particular value in a list

a = (1,2,2,2,3,4,2)
a.count(2)

4

#### Lists

In [23]:
# List is variable length and contents can be modified in-place
# Defined using square brackets
# Equivalent to Javascript Array

In [24]:
# List function is commonly used in data processing as a way to create an iterator expression

gen = range(10)

In [25]:
gen

range(0, 10)

In [27]:
# Now we have a list from 0 - 9 (10 values)

list(gen)

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

In [34]:
ex_list = ['foo', 'bar', 'baz']

In [35]:
# Can append or insert elements
ex_list.append('bow')

In [36]:
# Note: inserting/popping elements in a list is computationally expensive
ex_list.insert(0, 'dog')
ex_list

['dog', 'foo', 'bar', 'baz', 'bow']

In [37]:
ex_list.pop(0)
ex_list

['foo', 'bar', 'baz', 'bow']

In [38]:
# Or remove by value
ex_list.remove('baz')
ex_list

['foo', 'bar', 'bow']

In [42]:
# Check to see if a value is included in a specific list
'foo' in ex_list

True

In [43]:
'foo' not in ex_list

False

In [45]:
# Concatenation (expensive)

ex_list2 = ['dog', 'cat', 'mouse']
ex_list + ex_list2

['foo', 'bar', 'bow', 'dog', 'cat', 'mouse']

In [47]:
# Extension (cheaper)

ex_list.extend(ex_list2)
ex_list

['foo', 'bar', 'bow', 'dog', 'cat', 'mouse', 'dog', 'cat', 'mouse']

In [49]:
# Sorting lists

a = [3,4,1]
a.sort()
a

[1, 3, 4]

In [51]:
# Can pass in different keys, or functions that produce a value to search the objects

ex_list.sort(key=len)
ex_list

['foo', 'bar', 'bow', 'dog', 'cat', 'dog', 'cat', 'mouse', 'mouse']

##### Binary Search

In [62]:
# Binary search is implemented through the native "bisect" module

import bisect
c = [1,2,2,2,3,4,7]

In [63]:
bisect.bisect(c,2)

4

In [64]:
bisect.bisect(c,7)

7

In [65]:
# Inserting an element
# Note...list has to be sorted first!

bisect.insort(c,6)
c

[1, 2, 2, 2, 3, 4, 6, 7]

##### Slicing a List

In [66]:
# Slicing - basic form is start:stop to the indexing operator

seq = [7,2,3,7,5,6,0,1]
seq[1:5]

[2, 3, 7, 5]

In [67]:
# Can also assign with a sequence

seq[3:4] = [7,8]
seq

[7, 2, 3, 7, 8, 5, 6, 0, 1]

In [68]:
# Use negative indices to slice the sequence relative to the end

seq[-4:]

[5, 6, 0, 1]

In [69]:
seq[4:]

[8, 5, 6, 0, 1]

In [72]:
# Step size can be defined after a second colon, as below
seq[1:8:2]

[2, 7, 5, 0]

In [73]:
# Here's a handy way to reverse a list

seq[::-1]

[1, 0, 6, 5, 8, 7, 3, 2, 7]

##### Built in Sequence Functions

In [79]:
# Keep track of index of current item when enumerating over a list/dict

# Hard Way:

collection = [1,2,3]

i = 0
print('Hard Way\n')
for item in collection:
    print(item)
    i += 1

# Easy Way

print('\nEasy Way\n')
for i, value in enumerate(collection):
    print(item)

Hard Way

1
2
3

Easy Way

3
3
3


In [128]:
# Example - create some dict that is a mapping between two lists

some_list = ['foo', 'bar', 'baz']
mapping = {}

for i,v in enumerate(some_list):
    mapping[i] = v
    
print(mapping)

# Really easy way to make a dictionary
print(dict(mapping))

{0: 'foo', 1: 'bar', 2: 'baz'}
{0: 'foo', 1: 'bar', 2: 'baz'}


In [85]:
# Sorting

sorted([5,2,1,12,2])

[1, 2, 2, 5, 12]

In [86]:
sorted('horse race')

[' ', 'a', 'c', 'e', 'e', 'h', 'o', 'r', 'r', 's']

In [89]:
# Zipping

seq1 = ['foo', 'bar', 'baz']
seq2 = ['dog', 'cat', 'three']

zipped = zip(seq1, seq2)
list(zipped)

[('foo', 'dog'), ('bar', 'cat'), ('baz', 'three')]

In [92]:
# Combining zip and enumerate for ultimate pythonism


seq1 = ['foo', 'bar', 'baz']
seq2 = ['dog', 'cat', 'three']

for i, (a,b) in enumerate(zip(seq1, seq2)):
    print('{0}: {1}, {2}'.format(i, a, b))

0: foo, dog
1: bar, cat
2: baz, three


In [108]:
# Unzip in a hacky way using '*'

pitchers = [('Nolan', 'Ryan'), ('Roger', 'Clemens'),('Schilling', 'Curt')]
first_names, last_names = zip(*pitchers)
first_names

('Nolan', 'Roger', 'Schilling')

In [112]:
# The above uses the '*' operator to 'unpack' the list. See below example

a,*b = pitchers
print(a)
print(b)

('Nolan', 'Ryan')
[('Roger', 'Clemens'), ('Schilling', 'Curt')]


#### Dicts (Also called key-value pairs or hashmaps)

In [122]:
ex_dict = {'a': 'some value', 'b': [1, 2, 3, 4]}

In [123]:
# Access dict item
ex_dict['a']

'some value'

In [124]:
# Replace item
ex_dict['b'] = [5, 6, 7, 8]
print(ex_dict)

{'a': 'some value', 'b': [5, 6, 7, 8]}


In [125]:
# Add item
ex_dict['fish'] = 'red'
print(ex_dict)

{'a': 'some value', 'b': [5, 6, 7, 8], 'fish': 'red'}


In [126]:
# Check for a key
'b' in ex_dict

True

In [127]:
# Two ways to delete: del which deletes the k/v or pop which returns the value and deletes key

del ex_dict['b']
print(ex_dict.pop('a'))

some value
