# Python Fundamentals

In [58]:
import this

The Zen of Python, by Tim Peters

Beautiful is better than ugly.
Explicit is better than implicit.
Simple is better than complex.
Complex is better than complicated.
Flat is better than nested.
Sparse is better than dense.
Readability counts.
Special cases aren't special enough to break the rules.
Although practicality beats purity.
Errors should never pass silently.
Unless explicitly silenced.
In the face of ambiguity, refuse the temptation to guess.
There should be one-- and preferably only one --obvious way to do it.
Although that way may not be obvious at first unless you're Dutch.
Now is better than never.
Although never is often better than *right* now.
If the implementation is hard to explain, it's a bad idea.
If the implementation is easy to explain, it may be a good idea.
Namespaces are one honking great idea -- let's do more of those!


# Python Data Types

* Text: str
* Numeric: int, float, complex
* Boolean: bool
* Null: NoneType
* Sequence: list, tuple, range
* Set: set, frozenset
* Mapping: dict
* Binary: bytes, bytearray, memoryview

In [56]:
for i in [1,1.0,"1.0",True, None]: # This is a for loop
    print(type(i))

<class 'int'>
<class 'float'>
<class 'str'>
<class 'bool'>
<class 'NoneType'>


In [38]:
for i in [(1,0),[1,0],range(1),{1,0}, dict({0:1})]:
    print(type(i))

<class 'tuple'>
<class 'list'>
<class 'range'>
<class 'set'>
<class 'dict'>


# Identifiers

Name variables in python with the format var = value

Named objects in python start with "_" or a letter.

six_var is a valid object name

_6var is a valid object name

6_var is not

In [24]:
_6var =6
print(_6var)

6


In [25]:
6_var = 6
print(6_var)

SyntaxError: invalid decimal literal (935359935.py, line 1)

# Python has keywords

don't use them to name things

In [26]:
import keyword
print(keyword.kwlist)

['False', 'None', 'True', '__peg_parser__', 'and', 'as', 'assert', 'async', 'await', 'break', 'class', 'continue', 'def', 'del', 'elif', 'else', 'except', 'finally', 'for', 'from', 'global', 'if', 'import', 'in', 'is', 'lambda', 'nonlocal', 'not', 'or', 'pass', 'raise', 'return', 'try', 'while', 'with', 'yield']


# Python syntax: Whitespace

Python uses four whitespaces to delineate code structure

* Code blocks start on the left.
* Code lines end without punctuation (except when defining loops/functions)
* Use 4 whitespaces within code blocks.

# Python syntax: Comments

Use __#__ for _block_ and _inline_ comments.

Use a __""" """__ for _multiline_ comments.

# Strings

Python recognizes three kinds of string definition using quotes. 

Always match quotes, often this is automatic.

single quote: __' '__

double quote: __" "__

triple double quote: __""" """__

In [207]:
astring = 'a string' # I am an inline comment
isathing = "is a thing"
tobehold = """to behold!"""

print(astring, isathing, tobehold)

a string is a thing to behold!


In [170]:
# this won't work
astring = 'a 
    string'

SyntaxError: EOL while scanning string literal (2543934472.py, line 2)

In [185]:
#we need an escape character for multiline text using single or double quotes
astring = 'a \
    string'
astring # don't do this

'a     string'

In [172]:
"""the triple double quote is used for docstrings and works on one
or separate lines but also allows multi-line text to be held in a variable"""

tobehold = """to 
    behold!"""
tobehold

'to \n    behold!'

In [176]:
# print ignores the newline character 
print(tobehold)

to 
    behold!


# About that escape character...

a \ can be used to continue on multiple lines,

or to ignore something.

In [190]:
badApostrophe = 'It's fine' # nope, not fine

SyntaxError: invalid syntax (4260612961.py, line 1)

In [191]:
goodApostrophe = 'It\'s fine' #totally fine
goodApostrophe

"It's fine"

In [195]:
#what about a path string!?
badPathstring = '\Users\brandanscully\Documents\DATA_510_DRAFT' #windows likes escape characters in path strings
badPathstring 

SyntaxError: (unicode error) 'unicodeescape' codec can't decode bytes in position 0-1: truncated \UXXXXXXXX escape (2716227525.py, line 2)

In [203]:
pathString1='/Users/brandanscully/Documents/DATA_510_DRAFT' # reverse the backslashes

pathString1

'/Users/brandanscully/Documents/DATA_510_DRAFT'

In [204]:
pathString2= r'\Users\brandanscully\Documents\DATA_510_DRAFT' #use raw strings

pathString2

'\\Users\\brandanscully\\Documents\\DATA_510_DRAFT'

In [208]:
def badpoem():
    """docstrings are great 
    for provding multi line
    comments, e.g. to describe what your function is doing"""
    astring = 'a string'
    isathing = "is a thing"
    tobehold = """to behold!"""
    return astring + isathing + tobehold #<- this is string concatenation

badpoem()

'a stringis a thingto behold!'

# Compare Behaviors 

print(astring, isathing, tobehold):   __a string is a thing to behold!__

gets proper spacing and no quotes

return astring + isathing + tobehold: __'a stringis a thingto behold!'__

gets literal spacing and quotes

# A string is a sequnce of characters

You can access string characters using their index

In [211]:
someString = "Data are great!"

print(someString[0]) # string index starts at 0 and increases from left to right
print(someString[-1]) # string index can be accessed from right to left with -

D
!


In [218]:
# find the length of a string using len()
len(someString)

15

In [219]:
for i,n in enumerate(someString):
    print(i,n)

0 D
1 a
2 t
3 a
4  
5 a
6 r
7 e
8  
9 g
10 r
11 e
12 a
13 t
14 !


In [222]:
"""slice a string, 
notice slice includes the first index and excludes the final index"""
print(someString[9:15])

great!


In [224]:
# the start index defaults to 0
print(someString[:4])

Data


In [225]:
# you already saw concatenation
print(someString + ' Seriously!')

Data are great! Seriously!


In [230]:
# you may find this to be handy IN THE FUTURE.
fileName = "myFile.txt"
newFile = fileName[:-4]+'.csv'

newFile

'myFile.csv'

# Numbers

Integers are whole numbers.

Float has decimals.

You can use _ in numbers for readability

In [232]:
10_000_000.00

10000000.0

# Converting Types

Sometimes you may want something to be in a format it's not.

In [233]:
int(10_000_000.00)

10000000

In [234]:
str(10_000_000.00)

'10000000.0'

In [238]:
float(str(10_000_000))

10000000.0

# Arithmetic Operators

* Add: +
  
  3 + 4

* Subtract: -
  
  4 - 3

* Multiply: *
  
  3 * 4

* Divide: /
  
  4 / 3      (_division always returns a float_)

# Arithmetic Operators

* Exponent: **

  3 ** 4
  
* Modulo: %

  3 % 4
  
* Div: //

  4 // 3

In [50]:
3 ** 4

81

In [43]:
7 % 3 # modulo returns the remainder of division

1

In [44]:
7//3 # div returns floor (n/d)

2

# Mind data types with arithmetic operators

Python with modify the output data type when mixing float and int.

Division always returns a float.

In [48]:
1 * 10

10

In [47]:
1.0 *10.0

10.0

In [49]:
1.0 * 10

10.0

# Comparison Operators

These return boolean types

* Less than: <
* Less than or equal to: <=
* Greater than: >
* Greater than or equal to: >=
* Equal to: ==
* Not equal to: != ("bang equals")

# Logical Operators

* and
* or
* not

# Flow Control: For

__for__ var __in__ interable:

    do something

# Flow Control: If...else...elif

__if__ if-condition:

    do something
    
__elif__ elif-condition:

    do something else
    
__else__:

    do something completely different

# Flow Control: Ternary Operator

true_value __if__ condition __else__ false_value

It's a streamlined if else statement.

In [92]:
"same" if "x"=="x" else "different"

'same'

# Flow Control: While

__while__ true_condition:

    do something

# Flow Control: break, continue, pass

these are helpful tools that can be used for iterative development.

__break__: typically used in an if statement to leave a loop when a condition is met

if condition:

    do something
    
else:
    
    break # the loop stops

# Flow Control: continue

__continue__: use this to skip a current iteration when a condition is met


while condition:

    do something

    if newCondition:
    
        continue # the current iteration is skipped

# Flow Control: pass

__pass__: it does nothing! but it avoids a syntax error. use it as a placeholder 

if condition:

    do something
    
else:

    pass # nothing happens

# FUNctions ;)

Pythonic programming uses modularity.

Functions are reusable pieces of code.

__def__ newFUNction():

    """This is a docstring. Use it to describe your function!"""
    
    var = do something
    
    return var # if you want to pass the output of your function to global

In [303]:
def newFUNction():

    """This is a docstring. Use it to describe your function!"""
    
    var = "do something"
    
    return var # if you want to pass the output of your function to global

newFUNction()

'do something'

# Functions: parameters & default parameters

You can supply your functions with parameters and default parameters.

__def__ _add5orsomething_( _param_, _defParam_ = 5):
    
    return param+defParam
    
add5orsomething(10)

In [64]:
def add5orsomething(param, defParam=5):
    return param+defParam

print(add5orsomething(10)) # the function takes a parameter and adds the default parameter
print(add5orsomething(10,10)) #th function takes a parameter and adds the parameter that replaces default

15
20


# Functions: Keyword Arguments

You can supply your functions with multiple default parameters that you can choose or not.

__def__ _crazyMath_( _param_, _defParam1_ = 5, _defParam2_ = .07):
    
    return param + (defParam1 *defParam2)
    
crazyMath(10)

In [69]:
def crazyMath(param, defParam1 = 5, defParam2 = 0.07):
    
    return param + (defParam1 * defParam2)
    
print(crazyMath(10)) # this is fine
print(crazyMath(10, 5, .08)) # also fine
print(crazyMath(10,.08)) # we have now used .08 in the defParam1 space
print(crazyMath(10,defParam2=0.08)) #now we're using 0.08 in defParam2, instead of 0.07

10.35
10.4
10.0056
10.4


# Functions: Lambda Expressions

Lambda expressions are anonymous functions. These are helpful for passing functions.

__lambda__ _parameter_: _expression_

is equivalent to

__def__ _someFunction_(_parameter_):

    reutrn expression
    
We'll come back to this.

# Lists

A list is a mutable ordered collection of items mapped to an index.

this_list = [item1, item2, item3]

In [93]:
# lists are iterable
this_list=["item1","item2","item3"]

for i in this_list:
    print(i)

item1
item2
item3


In [82]:
# list indexes start at 0

this_list=["item1","item2","item3"]

for e,i in enumerate(this_list): # enumerate returns an index
    print(e)
    print(i)

0
item1
1
item2
2
item3


In [94]:
# access list items using index

this_list[1] 

'item2'

In [84]:
# lists are mutable

this_list[2] = 'foo'

for i in this_list:
    print(i)

item1
item2
foo


# Tuples

An "n-tuple" is an immutable ordered collection of n items mapped to an index.

twoTuple = (item1,item2)
threeTuple = (item1,item2,item3)

In [85]:
# tuples are iterable
this_tuple=("item1","item2","item3")

for i in this_tuple:
    print(i)

item1
item2
item3


In [86]:
# access tuple items using index

this_tuple[1] 

'item2'

In [88]:
# tuples are immutable

this_tuple[2] = 'foo'

TypeError: 'tuple' object does not support item assignment

# Unpacking

Python has a handy feature for unpacking lists and tuples

var1, var2, var3 = this_list

In [90]:
var1, var2, var3 = this_list
var4, var5, var6 = this_tuple

print(var1,var4)
print(var2,var5)
print(var3,var6)

item1 item1
item2 item2
foo item3


# Mapping to a list

you can apply a function to a list using __map( )__

map(function: list)

In [97]:
list_to_double = [2,5,10]

doubled_list = map(lambda todouble: todouble*2, list_to_double)

print(list(doubled_list))

[4, 10, 20]


# Sorting a list

use __.sort( )__ method to sort a list in ascending order in place.

In [304]:
numbers = [2,5,7,1,3,6,8,4,9,0]
numbers.sort()
print(numbers)

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]


In [118]:
letters=['h','e','d','g','b','a','c','f']
letters.sort()
print(letters)

['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h']


In [119]:
zero2nine = list(range(10))

print(zero2nine)

zero2nine.sort(reverse=True)

print(zero2nine)

letters.sort(reverse=True)

print(letters)

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
[9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
['h', 'g', 'f', 'e', 'd', 'c', 'b', 'a']


# Dictionaries

A dictionary is a mutable unordered collection of key:value pairs.

{key1:value1, key2:value2}

In [120]:
class_dict = {"Brandan":"Instructor", "Lydia":"Student", "David":"Student",
             "Cayden":"Student", "Maddie":"Stuent", "Trevor":"Student",
             "Giselle":"Student", "Sheldon":"Student", "Will":"Student"}

class_dict["Trevor"]

'Student'

In [125]:
class_dict["Mike"]

KeyError: 'Mike'

In [126]:
# the .get() method catches key errors.
print(class_dict.get("Mike"))

None


In [128]:
# we can add Mike as a Graduate Assistant.
class_dict["Beth"] = "Graduate Assistant"
class_dict["Beth"]

'Graduate Assistant'

# Dict Comprehension

It's basically a for loop that runs on k:v pairs that generates a new dict of results.

{key:value for (key,value) in dict.items() if condition}

In [132]:
class_dict

{'Brandan': 'Instructor',
 'Lydia': 'Student',
 'David': 'Student',
 'Cayden': 'Student',
 'Maddie': 'Stuent',
 'Trevor': 'Student',
 'Giselle': 'Student',
 'Sheldon': 'Student',
 'Will': 'Student',
 'Beth': 'Graduate Assistant'}

In [133]:
# use dict comprehension to filter
student_dict = {key:value for (key,value) in class_dict.items() if class_dict[key]=='Student'}
student_dict

{'Lydia': 'Student',
 'David': 'Student',
 'Cayden': 'Student',
 'Trevor': 'Student',
 'Giselle': 'Student',
 'Sheldon': 'Student',
 'Will': 'Student'}

In [134]:
# use dict comprehension to modify
alumni_dict = {key:value.replace("Student","Alum") for (key,value) in class_dict.items() if class_dict[key]=='Student'}
alumni_dict

{'Lydia': 'Alum',
 'David': 'Alum',
 'Cayden': 'Alum',
 'Trevor': 'Alum',
 'Giselle': 'Alum',
 'Sheldon': 'Alum',
 'Will': 'Alum'}

# Sets

A mutable unorderd list of unique immutable elements.

newSet = {item1, item2, item3} __# looks like a dict, but note the lack of k:v pairs__

In [241]:
# Empty set is False
set() == True

False

In [243]:
# Create sets with an iterable

set([8,6,7,5,3,0,9])

{0, 3, 5, 6, 7, 8, 9}

In [255]:
characters = set("the quick brown fox jumps over the lazy dog") #strings are iterable, duplicates are reduced
print(characters) 
print(len(characters)) # whitespace counts as a letter in this case

{'u', 'w', 'f', 'g', 'd', 'j', 'r', 'b', 'm', 'o', 'y', 'v', 'q', 's', 'l', 'n', 'p', 'i', 'z', 'a', 'h', ' ', 'x', 'e', 't', 'c', 'k'}
27


In [258]:
# set contains
"a" in characters

True

In [259]:
# set excludes
6 not in characters

True

In [261]:
characters.add(6)
6 in characters

True

In [262]:
characters.remove(6)
6 in characters

False

In [264]:
# if an element isn't in a set you might throw an error
characters.remove(6)

KeyError: 6

In [265]:
# so discard might be better if you don't want to check set contents
characters.discard(6)

# Other set methods

__.pop( )__ - removes and returns an element

__.clear( )__ - clears a set

__.issubset(otherSet)__ - checks if current set is subset of otherSet

__.issuperset(otherSet)__ - checks if otherSet is superset of current set

__.difference(otherSet)__ - coputs the difference of current set and otherSet

__frozenset()__ makes the set immutable

In [305]:
# Remember our venn diagram from last time?

compSci = {'Machine Learning', 'Software Development', 'Data Science'}
mathStats = {'Machine Learning', 'Traditional Research', 'Data Science'}
domainKnowledge = {'Traditional Research', 'Software Development', 'Data Science'}

compSci & mathStats & domainKnowledge # & for set intersect, | for set union

{'Data Science'}

In [306]:
# Set Difference

print(compSci - mathStats)
print(mathStats - domainKnowledge)
print(domainKnowledge - compSci)

{'Software Development'}
{'Machine Learning'}
{'Traditional Research'}


# Import packages

import packages with __import__ or __import as__

import modules __from__ packages

In [307]:
import os
import pandas as pd
from pandas import DataFrame as DF

In [296]:
os.getcwd()

'/Users/brandanscully/Documents/DATA_510_DRAFT'

In [308]:
# here we use the DataFrame module of pandas, but pd still has all modules
df = pd.DataFrame(alumni_dict, index=range(1))
df

Unnamed: 0,Lydia,David,Cayden,Trevor,Giselle,Sheldon,Will
0,Alum,Alum,Alum,Alum,Alum,Alum,Alum


In [309]:
# here we only have access to the pandas DataFrame module and its methods.
DF(alumni_dict, index=range(1)) # using the DataFrame module of pandas

Unnamed: 0,Lydia,David,Cayden,Trevor,Giselle,Sheldon,Will
0,Alum,Alum,Alum,Alum,Alum,Alum,Alum


__[The Python Style Guide](https://peps.python.org/pep-0008)__ 

__[The Python tutorial that inspired this lecture](https://www.pythontutorial.net/python-basics/)__

__[The Python docs](https://docs/python.org/3/)__