$
\newcommand{\nc}{\newcommand} 
\nc{\t}{\text}
\nc{\tb}{\textbf}
\nc{\ti}{\textit}
$

# $$\textbf{Python Programming for Machine Learning} $$ 
<br>

### $$\textbf{Python Basics II}$$

#### $$\text{Winter Semester 2022/23}$$

<br>

#### $$\text{Sergej Dogadov and Panagiotis Tomer Karagianis}$$
<br><br>
<center>
<img src='images/pyt.png' width=450>

### $$\lambda \textbf{-function}$$
<hr> 

In [42]:
# function can be seen as a variable defined via inline expression

def f(x, y, coeff=1):
    return coeff * (x ** 2 + y ** 2) ** 0.5

f = lambda x, y, coeff=1: coeff * (x ** 2 + y ** 2) ** 0.5
f(3, 4)

5.0

*  $\text{In-line function definition}$

In [43]:
# definition and execution without a name   
(lambda x, y, coeff=1: coeff*(x ** 2 + y ** 2) ** 0.5) (3, 4)

5.0

In [44]:
# Reassign function to other variable

my_function = f
my_function(3, 4, 5)

25.0

### $$\textbf{In-function }\lambda \textbf{ usage}$$
<hr> 

In [45]:
numbers = [1, 2, 3, 4, 5, 6]

def square_(x: float):
    return x ** 2

squares = map(square_, numbers) # map(fn, x) apply fn on "numbers" element-wise

type(squares), squares, list(squares)

(map, <map at 0x103c660a0>, [1, 4, 9, 16, 25, 36])

In [46]:
#equivalent
squares = map(lambda x: x**2, numbers) # map(fn, x) applies lambda function on "numbers" element-wise

list(squares)

[1, 4, 9, 16, 25, 36]

* $\textbf{Example: } \text{filter out the odd numbers.}$

In [47]:
result = filter(lambda x: x % 2 == 0, numbers) # only even numbers are allowed
type(result), result, list(result)

(filter, <filter at 0x103c75610>, [2, 4, 6])

### $$\textbf{Dictionaries} - \text{(key, value) stores}$$
<hr> 

* $\text{Create a data point (representing a fruit)}$

In [48]:
fruit = {
    'color': 'green',
    'taste': 'sweet',
    'size 3d': [1, 3, 2]
}

type(fruit), fruit

(dict, {'color': 'green', 'taste': 'sweet', 'size 3d': [1, 3, 2]})

* $\text{Keys, getting access to values by a key}$

In [49]:
#inplace values change by a key
fruit['size 3d'] = [2, 3, 1]
fruit['color']

'green'

### $$\textbf{Dictionaries (cont'd)}$$

* $\text{Getting a default value}$

In [50]:
# there was not key like 'price'
fruit.setdefault('price', 1) # default value is returned

1

In [51]:
new_fruit = {
    'color': 'red',
    'taste': 'sweet',
    'price': 2.5
}

# in case the key exists the actual value is returned
new_fruit.setdefault('price', 1)

2.5

### $$\textbf{Merge dictionaries}$$

In [52]:
# create a new dict to be merged

properties = {
    'price' : 4,
    'best before': 7,
    True : 'sour',
    1: 'two'
}

properties[1]

'two'

In [53]:
# merge the dictionaries

fruit.update(properties) # values will be updated
fruit

{'color': 'green',
 'taste': 'sweet',
 'size 3d': [2, 3, 1],
 'price': 4,
 'best before': 7,
 True: 'two'}

### $$\textbf{Sets} - \text{unique elements}$$
<hr> 

In [54]:
lst = [2, 3, 4, 2, 4, "text" ] 
#init set from from list
set_one = set(lst)

type(set_one), set_one

(set, {2, 3, 4, 'text'})

* $\textbf{Note: }\text{list element is unhashable.}$ 

In [55]:
lst = [ 1, [3, 3], [3, 4], [3, 4]]
set(lst)

TypeError: unhashable type: 'list'

In [56]:
# explicit initialization
set_two = {2, 3, 4, 2, 6, 2, 1, 4.2, True}
type(set_two), set_two, list(set_two)[-1]

(set, {1, 2, 3, 4, 4.2, 6}, 6)

In [57]:
# merge two sets
set_one.update({'s'})
set_one

{2, 3, 4, 's', 'text'}

### $$\textbf{Conditional expressions}$$
<hr> 

* $\t{Example of classifying the following fruits: }$

<br><br>

<center>
    <img src='./images/fruits.png' width='1200'/>

### $$\textbf{Decision for watermelon vs. apple vs. other}$$
<hr> 


In [58]:
def classify(x: dict) -> str:
    
    if x['color'] == 'green':   
         
        if x['size'] == 'big':
            decision = 'watermelon'

        elif x['size'] == 'medium':
            decision = 'apple'                

        else:
            decision = 'other'           
    else:
        
        decision = 'other'
    
    return decision

In [59]:
# Multiple fruits represented by the dictionaries

fruit_1 = {'color': 'green', 'size': 'big'}

fruit_2 = {'color': 'green', 'size': 'medium'}

fruit_3 = {'color': 'red', 'size': 'small'}

classify(fruit_1), classify(fruit_2), classify(fruit_3)


('watermelon', 'apple', 'other')

### $$\textbf{Ternary condition operator}$$
<hr> 


In [60]:
def compare(x: dict, y: dict) -> str: 
    """ 
        Compares two fruits either they are the same 
        
        Args:
            x (dict) : first fruit
            y (dict) : second fruit
            
        Returns:
            string : either 'same' or 'different'  
    """
    
    # Syntax: var =  {true_case} if {cond} else {false_case} 
    
    return "same" if x == y else "different" # short if-else form 

compare(fruit_1, fruit_2), compare(fruit_1, fruit_1)

('different', 'same')

### $$\textbf{Iterators}$$
<hr>  

 * $\t{Build-in }$ range $\t{ function}$

In [61]:
list(range(10)), type(range(10))


([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], range)

In [62]:
for i in range(2, 13, 4): # start (opt), stop-1 (mandatory), step (opt)
    print(i)

2
6
10


### $$\textbf{Enumerator}$$
<hr>  


In [63]:
for n, i in enumerate(range(3, 13, 4)): # start, stop-1, step
    print(f'{n} -> {i:02d}')

0 -> 03
1 -> 07
2 -> 11


### $$\textbf{Iteration through a list with different objects}$$
<hr>  

In [64]:
a = [1, 2, 3]
f = lambda x: sum(x)
# [float, tuple, expr, str, var, func call, set]

lst = [5., (2, 3), 2**-0.5, 'some text', a, f(a), {2, 5} ]

print(f'Full list: {lst}')
print(100 * '=')


for i in range(2):

    for i, element in enumerate(lst):

        if type(element) == str:
            break  # iteration is interrupted
            #continue # the current cycle is skipped 

        print(f"\n{i} -> {element}")

Full list: [5.0, (2, 3), 0.7071067811865476, 'some text', [1, 2, 3], 6, {2, 5}]

0 -> 5.0

1 -> (2, 3)

2 -> 0.7071067811865476

0 -> 5.0

1 -> (2, 3)

2 -> 0.7071067811865476


$\text{Checking of the memory usage of your objects}$

In [65]:
import sys # conda install p_name, torch, cupy, scipy

mylist = range(0, 1_000_000) # generator object
sys.getsizeof(mylist), sys.getsizeof(list(mylist)) # explicit casting to int 

(48, 8000056)

### $$\textbf{Programming exercise}$$
<hr>  

#### $\textbf{Example:} \text{ Given a data which is a list of dictionaries.}$

In [66]:
data = [
  {'color': 'green', 'size': 'big'},
  {'color': 'yellow', 'shape': 'round', 'size': 'big'},
  {'color': 'red', 'size': 'medium'},
  {'color': 'green', 'size': 'big'},
  {'color': 'red', 'size': 'small', 'taste': 'sour'},
  {'color': 'green', 'size': 'small'}
]

type(data), type(data[0])

(list, dict)

#### $\textbf{Goal:} \text{ to make a prediction for multiple observations.}$

In [67]:
results = []

for x in data:
    
    res = classify(x)
    
    print(f'Fruit: {x} \nClass: {res} \n') # \n is a so called "carriage return" sign it stands for a new line
    
    results.append(res) # equiv.to  results += [classify(x)]
    
"All:", results # tuple object is printed

Fruit: {'color': 'green', 'size': 'big'} 
Class: watermelon 

Fruit: {'color': 'yellow', 'shape': 'round', 'size': 'big'} 
Class: other 

Fruit: {'color': 'red', 'size': 'medium'} 
Class: other 

Fruit: {'color': 'green', 'size': 'big'} 
Class: watermelon 

Fruit: {'color': 'red', 'size': 'small', 'taste': 'sour'} 
Class: other 

Fruit: {'color': 'green', 'size': 'small'} 
Class: other 



('All:', ['watermelon', 'other', 'other', 'watermelon', 'other', 'other'])

#### $\text{The same goal can be achieved with list comprehensions:}$
<hr> 

In [68]:
results = [classify(x) for x in data]

type(results), results

(list, ['watermelon', 'other', 'other', 'watermelon', 'other', 'other'])

 * This can also be combined with conditions:

In [69]:
# Syntax: [expr iterator {condition}]

[classify(x) if x['color'] == 'green' else 'other' for x in data]

['watermelon', 'other', 'other', 'watermelon', 'other', 'other']

### $$\textbf{Counting "watermelon" objects in the data}$$
<hr> 

In [70]:
result = [classify(x) for x in data]

obj = 'watermelon'

count = 0
for res in result:
    
    if res == obj:
        count += 1

f'Total number of {obj}s is {count}'

'Total number of watermelons is 2'

* $\ti{Pythonic} \text{ way using list comprehensions and build-in } \tb{sum } \t{function}: $

In [71]:
# sum([True, False, True, ...]) -> sum([1, 0, 1,...])

lst = [classify(x) for x in data]
cnt = lst.count('watermelon')

f'Total number of {obj}s is {cnt}'

'Total number of watermelons is 2'

### $$\textbf{Dictionary comprehensions}$$
<hr>  

In [72]:
names = ['Anna', 'Bob', 'Carl']
ages = [21, 18, 34]

In [73]:
# zip creates a list of tuples elementwise

names_and_ages = zip(names, ages, (0, 1, 2), {1, 32, 4})

for el in names_and_ages:
    print(el)

# zipping
type(names_and_ages), list(names_and_ages)

('Anna', 21, 0, 32)
('Bob', 18, 1, 1)
('Carl', 34, 2, 4)


(zip, [])

In [74]:
# Comprehensions
# {'age': value}
# first loop key, value ('Anna', 21)

dct = {name: age for name, age in zip(names, ages)}
dct = {age: name for name, age in zip(names, ages)}
type(dct), dct

(dict, {21: 'Anna', 18: 'Bob', 34: 'Carl'})

### $$\textbf{Reading Data from a file}$$
<hr>  

Content of file _scores.txt_ that lists the performance of players at a certain game:

<br>

`80,55,16,26,37,62,49,13,28,56`

`43,45,47,63,43,65,10,52,30,18`

`63,71,69,24,54,29,79,83,38,56`

`46,42,39,14,47,40,72,43,57,47`

`61,49,65,31,79,62,9,90,65,44`

`10,28,16,6,61,72,78,55,54,48`

#### $\text{The following program reads a file and stores 
scores into a list}$
<hr> 

* `with` statement here takes care of opening and closing the file.

In [32]:
with open('./scores.txt', 'r') as f: # f is then only available within the code block
    
    data = []
    
    for line in f:
        
        line_entries = line.strip().split(',')
        
        print(line_entries)
        lst = [float(x) for x in line_entries]
        #data.append(lst)
        data.extend(lst)
        # in contrast with
        
      
    
print(f'Data length: {len(data)}')
f'File content: {data}'


['80', '55', '16', '26', '37', '62', '49', '13', '28', '56']
['43', '45', '47', '63', '43', '65', '10', '52', '30', '18']
['63', '71', '69', '24', '54', '29', '79', '83', '38', '56']
['46', '42', '39', '14', '47', '40', '72', '43', '57', '47']
['61', '49', '65', '31', '79', '62', '9', '90', '65', '44']
['10', '28', '16', '6', '61', '72', '78', '55', '54', '48']
Data length: 60


'File content: [80.0, 55.0, 16.0, 26.0, 37.0, 62.0, 49.0, 13.0, 28.0, 56.0, 43.0, 45.0, 47.0, 63.0, 43.0, 65.0, 10.0, 52.0, 30.0, 18.0, 63.0, 71.0, 69.0, 24.0, 54.0, 29.0, 79.0, 83.0, 38.0, 56.0, 46.0, 42.0, 39.0, 14.0, 47.0, 40.0, 72.0, 43.0, 57.0, 47.0, 61.0, 49.0, 65.0, 31.0, 79.0, 62.0, 9.0, 90.0, 65.0, 44.0, 10.0, 28.0, 16.0, 6.0, 61.0, 72.0, 78.0, 55.0, 54.0, 48.0]'

### $$\textbf{Training and Test data separation}$$
<hr>

In [33]:
N = len(data)

ratio = 0.8
split = int(ratio * N) # 80 % of length

train_data = data[:split]
test_data  = data[split:]

print(f"Train len: {len(train_data)} \nTest len: {len(test_data)}")

Train len: 48 
Test len: 12


#### $\text{Writing results back into a file with exception handling:}$
<hr> 

In [34]:
import os # imports package for file and dir handling


def write(data, outfile='outputs.txt', folder='./data'):
    
    os.makedirs(folder, exist_ok=True)
    filepath = os.path.join(folder, outfile)
    
    try:

        # Make sure not to overwrite an existing file
        if os.path.exists(filepath):
            
            raise Exception(f"File '{filepath}' already exists.")

        with open(filepath, 'w') as f:  # 'a'
            
            f.write(str(data))
            
            print(f'Sucessfuly written to {filepath}')

    except Exception as e:   
        
        #recreate_file(data, outfile)
        print(f"Exception occured: {e}")

In [35]:
write(train_data, outfile='train_scores.txt')
write(test_data, outfile='te_scores.txt')

Exception occured: File './data/train_scores.txt' already exists.
Exception occured: File './data/te_scores.txt' already exists.


### $$\textbf{Classes}$$
<hr>  

$\text{Classes are useful for modeling anything that has an internal state, for example, machine learning models.}$

$\text{The model below classifies whether a score is above/below the average.}$

In [36]:
class Classifier(object): # parent class 
    
    mu = 10
    
    def __init__(self, name='Score'): # constructor (special method)
        self.avg = 0
        self.name = name
        
    # special method to define an object representation of the class
    def __repr__(self):
        return f'{self.name} classifier with avg: {self.avg:0.3f}'
    
    # methods
    def train(self, data): 
        self.avg = sum(data) / len(data)
        return self
        
    def predict(self, data):
        return ['above' if x > self.avg else 'below' for x in data]
    

### $$\textbf{Creation of a new classifier object}$$
<hr> 

In [37]:
c = Classifier(name='Custom')
print(c.avg, c.name, c.mu)

 # __repr__ function is called
print(c)              

d = Classifier(name='Temp')
print(d)  

c is d

0 Custom 10
Custom classifier with avg: 0.000
Temp classifier with avg: 0.000


False

* Get training and test datasets from _scores.txt_ file

In [38]:
from utils import get_train_test_data

train_data, test_data = get_train_test_data(path='./scores.txt', train_size=0.8)

Data length: 60
Train len: 48 Test len: 12


 * Train the classifier and inspect what the classifier has learned:

In [39]:
c.train(train_data)
print(c)

Custom classifier with avg: 47.854


### $$\textbf{Application of the test data to the model}$$
<hr>

In [40]:
print(f"Test data len: {len(test_data)}")

test_preds = c.predict(test_data)

print(f"Avg: {c.avg:0.3f}")
print("Test: ", test_data)
print("Pred: ", test_preds)

Test data len: 12
Avg: 47.854
Test:  [65.0, 44.0, 10.0, 28.0, 16.0, 6.0, 61.0, 72.0, 78.0, 55.0, 54.0, 48.0]
Pred:  ['above', 'below', 'below', 'below', 'below', 'below', 'above', 'above', 'above', 'above', 'above', 'above']


In [41]:
n_samples_to_print = 4

res = zip(test_data[:n_samples_to_print], test_preds[:n_samples_to_print]) # zip combines two objects into a tuple elementwise

list(res) # cast to list for printing


[(65.0, 'above'), (44.0, 'below'), (10.0, 'below'), (28.0, 'below')]

# $$\textbf{Thank you for your attention.}$$