# Useful Links and Resources


Python references:

* Jupyter blog https://blog.jupyter.org/
* Jupyter lab extensions: https://github.com/topics/jupyterlab-extension


# Basics

Basic Data types: 

* Integers
* Floating point values
* Strings ``` "my string"``` (immutable, iterable, ordered, unicode in Python3)
* Lists ```["one",2]```(mutable, itrable, ordered, heterogenous)
* Tuples ```("one",2)``` (immutable, iterable, ordered, heterogenous, labelable) 
* Dictionaries ```{"k1": val1, "k2": "val2"}``` (mutable, iterable, unordered, string keys, heterogenous vals)  


In [145]:
# variables
x = "string variable"  # assign a variable
del x  # delete a variable

# print function
print("Solution is:", 42, end="\n")  # end argument is optional

# string interpolation
print("I'm going to inject %s (string) here, and %4.2f (float) here." % ("one",2.13)) # old style
print("Its fleece was {x} as {y}.".format(x="white", y="snow"))  # format
age = 44
height = 175
weight = 70
print(f"So, you're {age} old, {height} tall and {weight} heavy.")  # f-string


Solution is: 42
I'm going to inject one (string) here, and 2.13 (float) here.
Its fleece was white as snow.
So, you're 44 old, 175 tall and 70 heavy.


In [147]:
# dictionaries
d = {'a': 1, 'b': 2, 'c': 3, 'd': 4, 'e': 5}  # creating a dict object
d = dict(zip(['a','b','c','d','e'], [1,2,3,4,5]))  # creating a dict with zip function
for k,v in d.items():  # iterating over dict; d.items() returns (k,v) tuples in d
    print(k, end=": ")
    print(v)

a: 1
b: 2
c: 3
d: 4
e: 5


In [152]:
print(list(range(0, 10, 1)))  # create list 0, 1, ..., 0

# if-elif-else statements
x = 6
if x%2 == 0 and x in list(range(0,10)):   # note the use of the keyword in
    print("2 is even and in [0,...,9]")
elif x%2 == 0:
    print("x is even")
else:
    print("x is odd and is either negative or >= 10")

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
2 is even and in [0,...,9]


In [157]:
# For loops
my_list = zip(list(range(10)), 'a' * 10)
for a,_ in my_list:  # tuple unpbacking; using _ for unused vals
    print(a, end=" ")

for i,letter in enumerate('abcde'):  # enumerate() returns (index,val) tuples
    print("\nThe letter at index {} is {}".format(i, letter), end="")

0 1 2 3 4 5 6 7 8 9 
The letter at index 0 is a
The letter at index 1 is b
The letter at index 2 is c
The letter at index 3 is d
The letter at index 4 is e

In [165]:
import random
lst = list(range(10))
random.shuffle(lst)  # in-place shuffle
print(lst)
for _ in range(10):
    num = random.randint(0,3)  # random integer between 0 and 2 (inclusive)
    print(num, end=", ")

[4, 7, 6, 8, 9, 2, 0, 3, 5, 1]
1, 3, 0, 2, 3, 1, 0, 1, 1, 2, 

In [173]:
# list comprehensions
[num**2 for num in range(1,5)]  # yields [1, 4, 9, 16]
[num**2 for num in range(1,5) if num%2==0]  # yields [4, 16]
celsius_lst = [0, 10, 20.1, 34.5]
fahrenheit_lst = [((9/5)*temp + 32) for temp in celsius_lst]
print(fahrenheit)
lst = [(x,y) for x in range(1,4) for y in range(1,4)]  # cross product
print(lst)

[32.0, 50.0, 68.18, 94.1]
[(1, 1), (1, 2), (1, 3), (2, 1), (2, 2), (2, 3), (3, 1), (3, 2), (3, 3)]


In [178]:
# functions
def say_hello(name = "John Doe"):  # note default parameter value
    '''DOCSTRING'''
    print("Hello " + name.title())  # string.title() captializes first letter of every word
    return name

say_hello("guy lebanon")
say_hello()

Hello Guy Lebanon
Hello John Doe


'John Doe'

In [183]:
# filter: removes elements from a container based on boolean function
a = filter(lambda x: x%2==0, [1,2,3,4])  # lambda function
print(a)  # note lazy evaluation
print(list(a))

# map: applies a function to elements in container
b = map(lambda num: num**2, range(0,10))
print(b)  # note lazy evaluation
print(list(b))

<filter object at 0x1195745c0>
[2, 4]
<map object at 0x119574898>
[0, 1, 4, 9, 16, 25, 36, 49, 64, 81]


# Tests

In [193]:
import unittest

def sieve(n):
    pass

class SieveTest(unittest.TestCase):  
    '''Test suite for sieve() implementing the prime number sieve algorithm'''
    
    def test_1(self):
        self.assertEqual(sieve(1),[])

    def test_2(self):
        self.assertEqual(sieve(2),[2])

    def test_3(self):
        self.assertEqual(sieve(3),[2, 3])

    def test_4(self):
        self.assertEqual(sieve(4),[2, 3])

    def test_10(self):
        self.assertEqual(sieve(10),[2, 3, 5, 7])

    def test_100(self):
        self.assertEqual(sieve(100),[2, 3, 5, 7, 11, 13, 17, 19, 23, 29, \
            31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97])

if __name__ == '__main__':
    unittest.main()

# OOP

In [194]:
class Circle():
    pi = 3.14  # static variable - same for all Circle objects
    def __init__(self, rad = 1):
        self.radius = rad
    def get_area(self):
        return Circle.pi * self.radius **2
    
my_circle = Circle(1)
print(my_circle.get_area())

3.14


In [197]:
# dunder methods

def __getitem__(self, n):  # support subscripting and iteration
    return(self.attr1[n])
def __repr__(self):  # support string representation
    return(self.attr1 + " " + self.attr2)
def __str__(self):  # support printing to end customers
    return(self.attr1 + " " + self.attr2)
def __len__(self):  # support len function and iteration
    return len(self.attr1)
def __del__(self):  # support deleting the object
    pass


In [210]:
# inheritance

class Animal():
    def __init__(self):
        print("Animal Created")
    def who_am_i(self):
        print("I am an animal")
    def eat(self):
        print("I am eating")

class Dog(Animal):
    def __init__(self):
        super().__init__()
        print("Dog created")
    def who_am_i(self):
        super().who_am_i()
        print("I am also a dog.")
    def eat(self):
        print("I am chewing a bone")
        
fido = Dog()
fido.who_am_i()
fido.eat()

Animal Created
Dog created
I am an animal
I am also a dog.
I am chewing a bone


# Exceptions

In [215]:
try:                        
    f = open("test_file","w")
    f.write("test line")
except OSError: # catch specific error
    pass
except:  # catch all exceptions
    pass
else: # execute if exception is not raised
    pass
finally:
    pass  # execute last regardless of exception


# Shell Interaction

In [223]:
import subprocess

comp = subprocess.run('echo $HOME', shell = True, stdout = subprocess.PIPE)
print(comp.stdout)

b'/Users/lebanon\n'


# Numpy

In [262]:
import numpy as np

my_arr = np.array([[1,2,3],[4,5,6]])  # creating a numpy array
print(my_arr)
print(my_arr.shape)  # tuple expressing shape
print(my_arr.dtype)  # data type

[[1 2 3]
 [4 5 6]]
(2, 3)
int64


In [234]:
np.arange(0,10)  # similar to range()
np.random.rand(3,4)
np.linspace(0,1,5)  # linearly separated range of numbers
my_arr = np.zeros((2,5,2))  # 3-D array
my_arr = my_arr.reshape((4,5))  # reshape given array 
print(my_arr)

[[0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0.]]


In [264]:
my_arr = np.arange(25).reshape(5,5)
my_arr < 10  # 5x5 boolean array
my_arr[my_arr < 10] = 10
print(my_arr)

[[10 10 10 10 10]
 [10 10 10 10 10]
 [10 11 12 13 14]
 [15 16 17 18 19]
 [20 21 22 23 24]]


In [265]:
print(my_arr[3])  # fourth row
print(my_arr[3][3])  # fourth row fourth column
print(my_arr[3,3])  # fourth row fourth column
print("max is: " + str(my_arr.max()))
print("occurs at: " + str(my_arr.argmax()) + " index")

[15 16 17 18 19]
18
18
max is: 24
occurs at: 24 index


In [272]:
my_arr[:2,:2] = 3  # broadcast value 3 to a subarray specified by slicing
print(my_arr)

[[ 3  3 10 10 10]
 [ 3  3 10 10 10]
 [10 11 12 13 14]
 [15 16 17 18 19]
 [20 21 22 23 24]]


In [274]:
slice_of_array = my_arr[:2,:2]
slice_of_array[:] = 9  # modifies original array since slicing creates new reference, not copy
print(my_arr)
new_arr = my_arr.copy()  # copy array

[[ 9  9 10 10 10]
 [ 9  9 10 10 10]
 [10 11 12 13 14]
 [15 16 17 18 19]
 [20 21 22 23 24]]


In [277]:
print(my_arr.sum())  # sum of array
sum(my_arr)  # sum of array columns


351


array([63, 66, 71, 74, 77])

# Pandas

In [281]:
import numpy as np
import pandas as pd
from numpy.random import randn
df = pd.DataFrame(randn(5,4), ['A','B','C','D','E'], ['W','X','Y','Z'])
print(df.shape)
df

(5, 4)


Unnamed: 0,W,X,Y,Z
A,-0.257178,-0.700406,0.368985,-1.534784
B,-0.22853,0.865259,0.726362,-0.394829
C,0.797078,-0.08774,-1.869681,0.86993
D,0.541646,-0.935262,0.174845,0.621015
E,1.104319,1.167444,0.957255,0.17154


In [280]:
df[['W','Z']]  # grab columns by labels
df.loc['C']  # grab a row by label
df.iloc[2]  # grab a row by index
df.loc[['A','C'],['W','Z']] # grab some rows and columns

Unnamed: 0,W,Z
A,0.482105,-0.252642
C,-1.022937,0.877061


In [282]:
df['ZZ'] = df['W'] + df['Y']  # create a new column
df.drop('E', inplace = True)  # drop a row
df.drop('X', axis = 1, inplace = True)  # drop a column

In [283]:
df[df > 0] = 0  # similar to numpy arrays

In [286]:
df[(df['W']<0) & (df['Y'] < -0.2)]  # use & since we have multiple boolean values
df

Unnamed: 0,W,Y,Z,ZZ
A,-0.257178,0.0,-1.534784,0.0
B,-0.22853,0.0,-0.394829,0.0
C,0.0,-1.869681,0.0,-1.072603
D,0.0,0.0,0.0,0.0


In [137]:
df.reset_index(inplace=True)  # remove row labels
df

Unnamed: 0,level_0,index,W,Y,Z,ZZ
0,0,A,0.0,-0.747542,0.0,0.0
1,1,B,0.0,-0.090479,0.0,0.0
2,2,C,-0.884802,-0.747678,-0.895382,-1.63248
3,3,D,-0.339508,-0.349222,0.0,-0.68873


In [287]:
df['States'] = ['NY', 'OR', 'CA', 'TX']  # add a new column
df.set_index('States', inplace=True)  # create new row labels
df

Unnamed: 0_level_0,W,Y,Z,ZZ
States,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
NY,-0.257178,0.0,-1.534784,0.0
OR,-0.22853,0.0,-0.394829,0.0
CA,0.0,-1.869681,0.0,-1.072603
TX,0.0,0.0,0.0,0.0


In [313]:
# missing data
d = {'A': [1,2,np.nan], 'B': [5, np.nan, np.nan], 'C': [1,2,3]}
df = pd.DataFrame(d)
print(df.dropna(thresh=2))  # drop any rows with 2 or more missing values
print(df.dropna(axis=1))  # drop any columns with some missing values
df.fillna(value=-1)  # replace na with -1


     A    B  C
0  1.0  5.0  1
1  2.0  NaN  2
   C
0  1
1  2
2  3


Unnamed: 0,A,B,C
0,1.0,5.0,1
1,2.0,-1.0,2
2,-1.0,-1.0,3


In [315]:
# group by
data = {'Company':['GOOG','GOOG','MSFT','MSFT','FB','FB'],
       'Person':['Sam','Charlie','Amy','Vanessa','Carl','Sarah'],
       'Sales':[200,120,340,124,243,350]}
df = pd.DataFrame(data)
df

Unnamed: 0,Company,Person,Sales
0,GOOG,Sam,200
1,GOOG,Charlie,120
2,MSFT,Amy,340
3,MSFT,Vanessa,124
4,FB,Carl,243
5,FB,Sarah,350


In [317]:
byComp = df.groupby('Company')  # group by company 
byComp.mean()  # aggregate by mean (drops non-numeric columns)

Unnamed: 0_level_0,Sales
Company,Unnamed: 1_level_1
FB,296.5
GOOG,160.0
MSFT,232.0


In [318]:
byComp.describe()

Unnamed: 0_level_0,Sales,Sales,Sales,Sales,Sales,Sales,Sales,Sales
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max
Company,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
FB,2.0,296.5,75.660426,243.0,269.75,296.5,323.25,350.0
GOOG,2.0,160.0,56.568542,120.0,140.0,160.0,180.0,200.0
MSFT,2.0,232.0,152.735065,124.0,178.0,232.0,286.0,340.0


In [333]:
pd.concat([df, df], axis = 0)  # concatanate rows
pd.concat([df, df], axis = 1)  # concatanate columns
pd.merge(df, df, how = "inner", on = "Person")  # join of columns
# alternative method for join of columns: df1.join(df2, how = "outer")

Unnamed: 0,Company_x,Person,Sales_x,Company_y,Sales_y
0,GOOG,Sam,200,GOOG,200
1,GOOG,Charlie,120,GOOG,120
2,MSFT,Amy,340,MSFT,340
3,MSFT,Vanessa,124,MSFT,124
4,FB,Carl,243,FB,243
5,FB,Sarah,350,FB,350
