In [1]:
# create test list
# each value is the same as it's index
my_list = [0, 1, 2, 3, 4, 5]
my_list

[0, 1, 2, 3, 4, 5]

In [2]:
# what kind of object is the list
# can also check types in Jypter tab in terminal
type(my_list)

list

In [3]:
# see the attributes and methods available for the object
dir(my_list)

['__add__',
 '__class__',
 '__class_getitem__',
 '__contains__',
 '__delattr__',
 '__delitem__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getitem__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__iadd__',
 '__imul__',
 '__init__',
 '__init_subclass__',
 '__iter__',
 '__le__',
 '__len__',
 '__lt__',
 '__mul__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__reversed__',
 '__rmul__',
 '__setattr__',
 '__setitem__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 'append',
 'clear',
 'copy',
 'count',
 'extend',
 'index',
 'insert',
 'pop',
 'remove',
 'reverse',
 'sort']

In [4]:
# how to iterate through a list w/o a formal for loop
# returns a list iterator
my_list.__iter__()

<list_iterator at 0x11cdcb35f90>

In [6]:
# could also use list comprehension instead of a for loop
[num for num in my_list]

# for each # in list, mult by 25
# Could be helpful for converting btw units
[num*25 for num in my_list]

[0, 25, 50, 75, 100, 125]

In [7]:
# lists are also helpful if we need to
# update some state of the program each time we go through a loop, so we maybe need to build on a result each time we go through

# initiate list to accumulate into
numbers = []

# loop
for num in my_list:
    # for each num in list, append the num to the empty list
    numbers.append(num)

numbers

[0, 1, 2, 3, 4, 5]

In [10]:
# for loops are good for building dataframe/lists iteratively

#initiate a list
fib = [1, 1]

#loop
# could also use an _ instead of num for something that isn't a defined variable
for num in my_list:
    # take the number two places back and add it to the number one place back from where I am
    fib.append(fib[-2]+fib[-1])

fib

[1, 1, 2, 3, 5, 8, 13, 21]

In [11]:
#initiate a list
fib = [1, 1]

#loop
# range(6) tells us to go thru every number between 0 and 6
for _ in range(6):
    # take the number two places back and add it to the number one place back from where I am
    fib.append(fib[-2]+fib[-1])

fib

[1, 1, 2, 3, 5, 8, 13, 21]

In [12]:
# ranges are inclusive of the first number in the range but not the top number
range(6)

range(0, 6)

In [None]:
# all values predefined in list
# w/ range, it has the first value, loops thru it, generating the second value and looping thru that, etc

In [13]:
# range from 4 to 10, counting by 2s
# 10 not incl b/c it's the top number of the range
range(4, 10, 2)
list(range(4, 10, 2))

[4, 6, 8]

iterating over a dataframe

In [14]:
# iterating over a df
import pandas as pd
import numpy as np
import math

In [15]:
# initialize a dataframe
my_df = pd.DataFrame(
    {'sqrt': np.sqrt(my_list)},
    index = my_list
)
my_df

Unnamed: 0,sqrt
0,0.0
1,1.0
2,1.414214
3,1.732051
4,2.0
5,2.236068


In [16]:
#initialize a df w/ math package
# this is a vectorized function that will usually be faster than looping
my_math_df = pd.DataFrame(
    {
        'sqrt': [math.sqrt(num) for num in my_list]},
    index = my_list
)
my_math_df

Unnamed: 0,sqrt
0,0.0
1,1.0
2,1.414214
3,1.732051
4,2.0
5,2.236068


In [18]:
my_big_df = pd.DataFrame(
    {
        'sqrt': np.sqrt(my_list),
        'log': np.log(my_list),
        'group': ('a', 'b') * int(len(my_list)/2)
                  },
    index = my_list
)
my_big_df

  'log': np.log(my_list),


Unnamed: 0,sqrt,log,group
0,0.0,-inf,a
1,1.0,0.0,b
2,1.414214,0.693147,a
3,1.732051,1.098612,b
4,2.0,1.386294,a
5,2.236068,1.609438,b


In [22]:
#loop through my_big_df
#I want to iterrate all the rows in my dataframe
for i, row in my_big_df.iterrows():
    print(type(row))
    print(row)
    #what's the sqrt value of that row
    print(row.sqrt)

<class 'pandas.core.series.Series'>
sqrt     0.0
log     -inf
group      a
Name: 0, dtype: object
0.0
<class 'pandas.core.series.Series'>
sqrt     1.0
log      0.0
group      b
Name: 1, dtype: object
1.0
<class 'pandas.core.series.Series'>
sqrt     1.414214
log      0.693147
group           a
Name: 2, dtype: object
1.4142135623730951
<class 'pandas.core.series.Series'>
sqrt     1.732051
log      1.098612
group           b
Name: 3, dtype: object
1.7320508075688772
<class 'pandas.core.series.Series'>
sqrt          2.0
log      1.386294
group           a
Name: 4, dtype: object
2.0
<class 'pandas.core.series.Series'>
sqrt     2.236068
log      1.609438
group           b
Name: 5, dtype: object
2.23606797749979


In [25]:
# iterate by group
# group df by column group and treat those as two separate data frames where one df is where all groups is a and one df is where all grous is b
for group, df in my_big_df.groupby('group'):
    print(group)
    print(df)

a
       sqrt       log group
0  0.000000      -inf     a
2  1.414214  0.693147     a
4  2.000000  1.386294     a
b
       sqrt       log group
1  1.000000  0.000000     b
3  1.732051  1.098612     b
5  2.236068  1.609438     b


TUPLES

* similar to lists
* use () to define them
* some things can be done on tuples but not lists
* tuples are immutable - you can't change them. w/ lists we can easily change what's in the list

In [26]:
# define tuple
my_tuple = (0, 1, 2, 3, 4, 5)
my_tuple

(0, 1, 2, 3, 4, 5)

In [27]:
type(my_tuple)

tuple

In [28]:
my_list[3]=32
my_list

[0, 1, 2, 32, 4, 5]

In [29]:
### this gives error
# my_tuple[3] = 32
# my_tuple

TypeError: 'tuple' object does not support item assignment