# Python reviews for data analysis


In [None]:
import numpy as np
import pandas as pd

## before start

In [None]:
## error types:
'''
syntax errors: compile-time errors
runtime errors: "crash"
logical errors: compiles and runs, but is wrong
'''

## several output methods to make the output more readable
# print single output
print("Carpe")
print("diem")
print() # add a blank line

#print on same line
print("Carpe", end="")
print("diem")
print()

#print multiple items
a = 3
b = 4
c = ((a**2) + (b**2))**0.5
print("side a =", a) # add text for the output results
print("side b =", b)
print("hypotenuse c =", c)

## Data and expressions

In [None]:
## builtin types
print(type(2))           # int
print(type(2.2))         # float
print(type("2.2"))       # str  (string)
print(type(True))     # bool (boolean)

# These are the most common types, there are also others: list, range, tuple, set, dict, function, module

In [None]:
## builtin constants
print("Some builtin constants:")
print(True)
print(False)
print(None)
print()

print("And some more constants in the math module:")
import math
print(math.pi)
print(math.e)

In [None]:
## builtin functions
print("Type conversion functions:")
print(bool(0))   # convert to boolean (True or False)
print(float(42)) # convert to a floating point number
print(int(2.8))  # convert to an integer (int)

print()
print("And some basic math functions:")
print(abs(-5))   # absolute value
print(max(2,3))  # return the max value
print(min(2,3))  # return the min value
print(pow(2,3))  # raise to the given power (pow(x,y) == x**y)
print(round(2.354, 1)) # round with the given number of digits

## ATTENTION on the round function
print()
print(round(0.5)) # This evaluates to 0 - what!
print(round(1.5)) # And this will be 2 - so confusing!

### builtin mathmatical operations
![image.png](attachment:image.png)

In [None]:
##things to be noted for the builtin operations
# intefer division always go to left
print("5 // 3 = ", 5//3)
print("-5 //3 = ", -5//3)

#modulo on negative values returns positive values
print()
print("5 % 3 = ", 5%3)
print("-5 % 3 = ", -5%3)

# approximate values of floating-point numbers
print()
print(0.1 + 0.1 == 0.2)
print(0.1 + 0.1 + 0.1 == 0.3) #use a helper function
print(0.1 + 0.1 +0.1)

## functions

In [None]:
def f(x, y=2): #default arguments
    '''
    here can be some documentaion about the function
    '''
    result = x**y
    return result # return terminate a function; print doesn't
print(f(2))
print(f(2, 4))
print()


In [None]:
# local and global varaibles
def f(x):
    print("In f, x =", x)
    x += 5   #change local variable doesn't change the global variable
    return x

def g(x):
    return f(x*2) + f(x*3)
print(g(2))

In [None]:
# use global to change a global variable
g = 100

def f(x):
    # If we modify a global variable, we must declare it as global.
    # Otherwise, Python will assume it is a local variable.
    global g
    g += 1
    return x + g

print(f(5)) # 106
print(f(5)) # 107
print(g)    # 102, the value stored in g is changed!

## conditionals
- if statement
- if-else statement
- if-else expression
- if-elif-else statement

In [None]:
## if statement
def abs1(n):
    if (n < 0):
        n = -n
    return n
print(abs1(-2))
print(abs1(2))
print()

## if-else statement
def abs5(n):
    if (n >= 0):
        return n
    else:          # for illustration only, don't use unnecessary else
        return -n
print(abs1(-2))
print(abs1(2))
print()

## if-else expression
def abs7(n):
    return n if (n >= 0) else -n
print(abs1(-2))
print(abs1(2))
print()

## if-elif-else statement
def getGrade(score):
    if (score >= 90):
        grade = "A"
    elif (score >= 80): 
        grade = "B"
    elif (score >= 70): # can contains several elif
        grade = "C"
    elif (score >= 60):
        grade = "D"
    else:
        grade = "F"
    return grade

print("103 -->", getGrade(103))
print(" 88 -->", getGrade(88))
print(" 70 -->", getGrade(70))
print(" 61 -->", getGrade(61))
print(" 22 -->", getGrade(22))

## Loops
- for loops
- while loops
- break and continue

In [None]:
## a for loop is the preferred way to loop over a fixed reange
n = 5
for row in range(n):
    for col in range(n+3):
        print("*", end="")
    print()

In [None]:
## use while loop when there is an indeterminate number of iterations
def leftmostDigit(n):
    n = abs(n)
    while (n >= 10):
        n = n//10
    return n

print(leftmostDigit(72658489290098) == 7)

In [None]:
## continue and break in the loop
for n in range(200):
    if (n % 3 == 0):
        continue # continue skips rest of this loop, so 0, 3, 6 is not printed here
    elif (n == 8):
        break # break skips rest of entire loop, so any integer bigger than 7 is not printed
    print(n, end=" ")
print()

## Strings
- string literals and escape
- string constants and and operations
- string methods
- string formating

In [None]:
## basic string literals and how to escape them

print('single-quotes')
print("double-quotes") # triple quotes are also availabel
print("Two types of quotes are useful when you want to 'cite something'")
print("abc\ndef") # \n starts a newline
print("abc\tdef") # \t give a tab

print()
# ways to escape special characters
print("Double-quote: \"")
print("Backslash: \\")
print("These items are tab-delimited, 3-per-line:")
print("abc\tdef\tg\nhi\tj\\\tk\n---")

In [None]:
## string constants are useful in data processing
import string
print(string.ascii_letters)   # abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ
print(string.ascii_lowercase) # abcdefghijklmnopqrstuvwxyz
print("-----------")
print(string.ascii_uppercase) # ABCDEFGHIJKLMNOPQRSTUVWXYZ
print(string.digits)          # 0123456789
print("-----------")
print(string.punctuation)     # '!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~'
print(string.printable)       # digits + letters + punctuation + whitespace
print("-----------")
print(string.whitespace)      # space + tab + linefeed + return + ...

In [None]:
## basic string operations

print("abc" + "def") # string + operator
print("ring" in "strings") # string in operator
print()


## string index is very similar to a list
s = "abcdef"
print(s[2]) # indexing a single character
print(s[-1]) # also support negative indexes
print(s[1:3]) # slicing a range of characters
print(s[1:7:2]) # slicing with a step parameter
print(s[::-1]) # reversing a string with slicing

In [None]:
## loop with or without indexes
for i in range(len(s)):
    print(i, s[i])
    
for c in s:
    print(c)

In [None]:
## string methods for determing types
def p(test):
    print("True     " if test else "False    ", end="")
def printRow(s):
    print(" " + s + "  ", end="")
    p(s.isalnum())
    p(s.isalpha())
    p(s.isdigit())
    p(s.islower())
    p(s.isspace())
    p(s.isupper())
    print()
def printTable():
    print("  s   isalnum  isalpha  isdigit  islower  isspace  isupper")
    for s in "ABCD,ABcd,abcd,ab12,1234,    ,AB?!".split(","):
        printRow(s)
printTable()

In [None]:
## strng edits
print("This is nice. Yes!".lower())
print("So is this? Sure!!".upper())
print("   Strip removes leading and trailing whitespace only    ".strip())
print("This is nice.  Really nice.".replace("nice", "sweet"))
print("This is nice.  Really nice.".replace("nice", "sweet", 1)) # count = 1

print("----------------")
s = "This is so so fun!"
t = s.replace("so ", "")
print(t)
print(s) # note that s is unmodified (strings are immutable!)

In [None]:
## substring search
print("This is a history test".count(" is ")) # 3
print("This IS a history test".count("is")) # 2, caps are different
print("-------")
print("Dogs and cats!".startswith("Do"))    # True
print("Dogs and cats!".startswith("Don't")) # False
print("-------")
print("Dogs and cats!".endswith("!"))       # True
print("Dogs and cats!".endswith("rats!"))   # False
print("-------")
print("Dogs and cats!".find("and"))         # 5, returns the first index
print("Dogs and cats!".find("or"))          # If not found, it returns -1.
print("-------")
print("Dogs and cats!".index("and"))        # 5, the starting index
print("Dogs and cats!".index("or"))         # crash!

In [None]:
## formatting string with placeholders
breed = "beagle" # %s for string
print("Did you see a %s?" % breed)

dogs = 42 
print("There are %d dogs." % dogs)

grade = 87.385
print("Your current grade is %f!" % grade)

grade = 87.385 # format a float with %.[precision]f
print("Your current grade is %0.2f!" % grade)

dogs = 42
cats = 18
exclamation = "Wow"
print("There are %d dogs and %d cats. %s!!!" % (dogs, cats, exclamation)) # be in the same order

# 1D lists and tuples
- list index, slicing, aliases
- list methods to manipunate lists
- looping a list is the same as string
-tuples and others

In [None]:
## basic properties of list
a = [ 2, 3, 5, 2 ]
print("a = ", a)
print("len =", len(a))
print("min =", min(a))
print("max =", max(a))
print("sum =", sum(a))

## indexing and slicing is similar to string

## original and liases change at the same time
a = [ 2, 3, 5, 7 ] # Create a list
b = a # Create an alias to the list
a[0] = 42
b[1] = 99 # Both are changed! We now have two references (aliases) to the SAME list
print(a)
print(b)

In [None]:
## lists can be modified destructively (change the list itself) or non-destructively (create new list)
a = [ 2, 3 ] # add one item
a.append(7)
print(a)
print()

a = [ 2, 3 ] # add a list of items
a += [ 11, 13 ]
print(a)
print()

a = [ 2, 3 ] # another way to add a list of items
a.extend([ 17, 19 ])
print(a)
print()

a = [ 2, 3, 5, 7, 11 ] # insert a item at a given index
a.insert(2, 42)  # at index 2, insert 42
print(a)
print()

# above methods change a, if you don't want to change it
# either construct a new list or use slicing
a = [ 2, 3 ]
b = a + [ 13, 17 ]
print(a)
print(b)

In [None]:
## removing elements
a = [ 2, 3, 5, 3, 7, 6, 5, 11, 13 ]
print("a =", a)

a.remove(5) # first 5 is removed
print("After a.remove(5), a=", a)

a.remove(5)
print("After another a.remove(5), a=", a)
print()
# removing at a certain index
a = [ 2, 3, 4, 5, 6, 7, 8 ]
print("a =", a)

item = a.pop(3)
print("After item = a.pop(3)")
print("   item =", item)
print("   a =", a)

# can also remove with slice assignment or with the del operator

In [None]:
## sort a list
a = [ 7, 2, 5, 3, 5, 11, 7 ]
print("At first, a =", a)
a.sort() # destructively
print("After a.sort(), a =",a)
print()

a = [ 7, 2, 5, 3, 5, 11, 7 ]
print("At first")
print("   a =", a)
b = sorted(a) #non-destructively
print("After b = sorted(a)")
print("   a =", a)
print("   b =", b)

In [None]:
## tuple syntax
t = (1, 2, 3)
print(type(t), len(t), t)

a = [1, 2, 3] # tuple can be constructed from a list
t = tuple(a)
print(type(t), len(t), t)
print()

## tuples help to assignment, for store pair information.
(x, y, z) = (1, 2, 3)
print(x)
print(y)
print(z)
print()

## tuples are immutable
t = (1, 2, 3)
print(t[0])

t[0] = 42    # crash!
print(t[0])

In [None]:
## list comprehension, a fast way to getcreate new list
a = [i for i in range(10)]
print(a)

a = [(i*100) for i in range(20) if i%5 == 0]
print(a)

# sets and dictinaries

In [1]:
## sets basics
# can be created via a list, any iterable object, or a statically-allocated set
s = set(["cat", "cow", "dog"])
print(s)     # prints {'cow', 'dog', 'cat'}
print()
s = { 2, 3, 5 }  # a set features a curly bracket
print(s)     

{'cat', 'dog', 'cow'}

{2, 3, 5}


In [2]:
## sets properties
# sets are unordered, meaning no indexing
s = set([2,4,8, 10, 9, 7])
print(s)          
for element in s:  # no specific order
    print(element) 
print()

# elements are unique
s = set([2,2,2])
print(s)          # prints {2}
print(len(s))     # prints 1

# elements must be immutable
a = ["lists", "are", "mutable"]
s = set([a])       # TypeError: unhashable type: 'list'
print(s)

# sets are more efficient than lists

{2, 4, 7, 8, 9, 10}
2
4
7
8
9
10

{2}
1


TypeError: unhashable type: 'list'

In [None]:
## some set operations
s = set([1, 2, 3])
# l = len(s) 
# s2 = s.copy()
# s.clear()
# x in s test for membership
# s.add(x)
# s.remove(x)
# s.discard(x) remove x from set s if present
# s.issubset(s2)
# s.union(s2) new set with elements from both set s and set s2
# s.intersection(s2)
# s.difference(s2)

In [None]:
## dictionary is a list of (key, value) paires, mapping key to value. Keys forms a set, values are unrestricted.
d = dict() # construct a dictionary
d[2] = 100
d[4] = "CMU"
d[8] = [300]
print(d)  # unpredictable order

d = dict() # changge the key:value pair information
d[2] = 100
d[2] = 200
d[2] = 400
print(d)  # { 2:400 }

# Object-Oriented-Programming (OOP)

In [None]:
# object is a data strucuture that has use-defined properties and methods associated with it.
# objects are defined using a class, which can be thought as a template for a generic object.

def gcd(x, y):
    '''find the greatest common divisor'''
    if (y == 0): return x
    else: return gcd(y, x%y)

class Fraction(object):
    def __init__(self, num, den): # constructor
        # Partial implementation -- does not deal with 0 or negatives, etc
        g = gcd(num, den)
        self.num = num // g
        self.den = den // g

    def __repr__(self): # helps to print the instance as string
        return '%d/%d' % (self.num, self.den)

    def __eq__(self, other): # helps to compare different instances
        return (isinstance(other, Fraction) and
                ((self.num == other.num) and (self.den == other.den)))

    def times(self, other): # attributes, emphasis differences between the above
        if (isinstance(other, int)):
            return Fraction(self.num * other, self.den)
        else:
            return Fraction(self.num * other.num, self.den * other.den)

In [None]:
def testFractionClass():
    print('Testing Fraction class...', end='')
    assert(str(Fraction(2, 3)) == '2/3')
    assert(str([Fraction(2, 3)]) == '[2/3]')
    assert(Fraction(2,3) == Fraction(2,3))
    assert(Fraction(2,3) != Fraction(2,5))
    assert(Fraction(2,3) != "Don't crash here!")
    assert(Fraction(2,3).times(Fraction(3,4)) == Fraction(1,2))
    assert(Fraction(2,3).times(5) == Fraction(10,3))
    print('Passed.')

if (__name__ == '__main__'):
    testFractionClass()

In [None]:
## static methods
class A(object):
    @staticmethod
    def f(x): # doesn't need to call self
        return 10*x 

print(A.f(42))

In [None]:
## Inheritance
class A(object):
    def __init__(self, x):
        self.x = x
    def f(self):
        return 10*self.x
    def g(self):
        return 100*self.x

class B(A): # inherit the template from class A
    def __init__(self, x=42, y=99):
        super().__init__(x) # call overridden init!
        self.y = y
    def f(self): # can override methods too
        return 1000*self.x


a = A(5)
b = B(7)
print(a.f()) # 50
print(a.g()) # 500
print(b.f()) # 7000
print(b.g()) # 700

## Reference
The materials mainly comes from a introduction programming course, 15-112. Here is the link:
http://www.krivers.net/15112-s19/schedule.html