### Printing:

https://docs.python.org/3/tutorial/inputoutput.html

In [10]:
name = 'Myanmar'
status = 'country'
location = 'Asia'

print('{} is a {} in {}.'.format(name,status,location))

Myanmar is a country in Asia.


In [11]:
print('There are %d dozens in %d.' % (100/12, 100))

There are 8 dozens in 100.


In [58]:
import math
print(f'The value of pi is approximately {math.pi:.3f}.')

The value of pi is approximately 3.142.


### Strings and formatting:

In [1]:
"file_{filenum:03}".format(filenum=1)

'file_001'

### Permutations:

Permutation of values up to an integer:

In [12]:
import numpy as np
np.random.permutation(15)

array([ 2,  3,  9,  7,  5,  0,  8, 11,  6, 13,  4, 14,  1, 12, 10])

Permutation of values in a vector:

In [13]:
m = np.arange(10)**2
np.random.permutation(m)

array([81, 36,  1, 64, 49,  0,  4,  9, 25, 16], dtype=int32)

Permutation of values in a matrix:

In [14]:
M = np.arange(16).reshape(4,4)
np.random.permutation(M)

array([[12, 13, 14, 15],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [ 0,  1,  2,  3]])

### Sampling from a vector of probabilities:

In [2]:
import numpy as np
np.random.choice(10) # accepts vector of probabilities through argument p

3

### Definition of methods inside classes:

Methods definided inside classes expect a _self_ argument. Not declaring it incurs in errors:

In [15]:
class MyClass:
    def method(arg):
        print(arg)
        
MyObject = MyClass()

MyObject.method('This will not work')

TypeError: method() takes 1 positional argument but 2 were given

Workaround 1 - use _self_ in method header:

In [None]:
class MyClass:
    def method(self,arg):
        print(arg)
        
MyObject = MyClass()

MyObject.method('This will work')

Workaround 2 - use _@staticmethod_ before method header:

In [None]:
class MyClass:
    @staticmethod
    def method(arg):
        print(arg)
        
MyObject = MyClass()

MyObject.method('This will work')

### Dictionaries

Adding multiple dictionaries together:

In [9]:
dict1 = {'a':1, 'b':2}
dict2 = {'c':3, 'd':4}
{**dict1, **dict2}

{'a': 1, 'b': 2, 'c': 3, 'd': 4}

### Indexing:

Lists can only be indexed one at a time or with a range. To index with a list, convert to np.array:

In [60]:
l = [0,1,2,3,4,5]
l[3]

3

In [63]:
l[3:5]

[3, 4]

In [64]:
# this does not work
l[[1,3,2]]

TypeError: list indices must be integers or slices, not list

In [66]:
# this works fine
np.array(l)[[1,3,2]]

array([1, 3, 2])

#### Making unique directories

_specifically for TensorBoard logs_

In [67]:
def get_logdir():
    import time
    from os import getcwd
    from os.path import join
    run_id = time.strftime("run_%Y_%m_%d-%H_%M_%S")
    return join(getcwd(), 'TensorBoard_logs', run_id)

get_logdir()

'C:\\Users\\fabio\\Documents\\GitHub\\ds sandbox\\TensorBoard_logs\\run_2021_02_09-09_49_28'

#### Thin wrapper

* Create a thin wrapper to use another function with pre-defined parameters   
* Effectively allows to use non-standard parameter settings as if they were default values

In [None]:
from functools import partial

# pass another function and the desired parameters as arguments
RegularizedDense = partial(keras.layers.Dense, 
                           activation='relu', 
                           kernel_initializer='he_normal', 
                           kernel_regularizer=keras.regularizers.l2(0.01))

model = keras.models.Sequential()

# use new function without need to explicitly provide those inputs
model.add(RegularizedDense(100))

### Mapping functions

In [11]:
from itertools import starmap

pairs_of_arguments = [(2,3), (3,4), (2,5)]
list(starmap(pow, pairs_of_arguments))

[8, 81, 32]

In [None]:
# how does map() from base python differ??

### Comprehensions

In [None]:
# list comprehension with an if clause at the end
#train_features_filepaths = [file for file in os.listdir('files') if "train_features_" in file]

### Files and Directories

In [None]:
# os.mkdir()
# os.getcwd()
# os.path.join()

### Reading and parsing XML

In [None]:
import xml.etree.ElementTree as ET # python module to work with xml
from os import listdir, mkdir
from os.path import join

filepath = join('dataset', 'ECFR-title14.xml') # file already fetched from https://www.govinfo.gov/bulkdata/ECFR/title-14/ECFR-title14.xml
tree = ET.parse(filepath)
root = tree.getroot() # root used to navigate the xml file

# create a temp dir if not yet created
if 'temp' not in listdir():
    mkdir('temp')

filepath = join('temp', 'ECFR-title14.txt')

# this is the part that parses the xml file and saves the contents to a text file
with open(filepath, 'w', encoding='utf-8') as file: # choose encoding when creating the file, otherwise errors due to reading unknown characters from the xml file (had problems with rho in the standard encoding)
    for element in root.iter(): # iter() returns an iterable over all elements of the xml tree (recursively)
        if element.tag == 'DIV1': # this is the tag corresponding to the volumes in the original xml file
            if element.attrib['N'] == '3': # checking that reached volume 3
                break
        [file.write(line) for line in element.itertext() if line[0] != '\n'] # iterate over the text in the element, otherwise will not print more than one line
        #[print(line) for line in element.itertext() if line[0] != '\n']

## Tensorflow & keras

### Learning rate scheduling
* via _decay_ argument on optimizer
* by passing a lr scheduling function to the optimizer
    * either a function or an instance of keras.optimizers.schedules
    * evaluated on every optimizer step (batch)
* by using a LearningRateScheduler callback
    * reasonable flexibility and performance
    * require custom function of lr and epoch
    * evaluated on every batch, number of batchs or epoch
* by using a EarlyStopping callback
    * use _restore_best_weights=True_
    * perform a second loop with reduced learning rate
    * does not play nice with tensorboard in steps mode (number of epochs is reset to 0), but OK with relative

### Tensorboard

##### Performance warning:
_"WARNING:tensorflow:Callbacks method `on_train_batch_end` is slow compared to the batch time (batch time: 2.4947s vs `on_train_batch_end` time: 7.4660s). Check your callbacks."_    
This seams to appear at the end of the first batch when using a tensorboard callback. I suspect it is related to setting up the directory and creating the files for the tb logs.

### Links:

* don't: creating an empty DF and appending to it: https://stackoverflow.com/questions/13784192/creating-an-empty-pandas-dataframe-then-filling-it/56746204#56746204

### Commands:

* clearing packages and variables from environment:   
        %reset   
        %reset -f

### Command line stuff:

* installing packages:  
        conda install package  
        pip install package  
        if there are permissions issues: pip install --user package  

# TEMP

In [None]:
import numpy as np

a = np.arange(0,6).reshape((2,3))

print(a[1:].shape)
print(a[1,:].shape)

In [None]:
a = []

In [None]:
len(a)

In [None]:
a = np.random.randn(3,3)

In [None]:
m = np.random.rand(3,3) > .5

In [None]:
m

In [None]:
(a * m).ravel()

In [18]:
d = {'a':1, 'b':2, 'c':3}
d

{'a': 1, 'b': 2, 'c': 3}

In [54]:
d.values()
len(d)

3

In [27]:
np.random.choice(int(10),p=1/10)

TypeError: object of type 'float' has no len()

In [31]:
a = [0,1,2,3,4,5,6]
a

[0, 1, 2, 3, 4, 5, 6]

In [40]:
a[1:len(a)] + [10]

[1, 2, 3, 4, 5, 6, 10]

In [49]:
d = {'a':1, 'b':2, 'c':3}
tuple([d[key] for key in ['a','c']])

(1, 3)

In [53]:
len('fasdf rweqer, poiu. Asadf'.split())

4

In [52]:
np.zeros(10).shape

(10,)

In [55]:
5511/1375

4.008

In [56]:
10/5511

0.0018145527127563057

In [57]:
(5511-14)/4 + 1

1375.25

In [5]:
import numpy as np
np.arange(10)[-1]

9

TypeError: unsupported operand type(s) for +: 'dict' and 'dict'