## Lecture 5 revision

In [4]:
import math
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import torch

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [5]:
class Value:

  def __init__(self, data, _children=(), _op='', label=''):
    self.data = data
    self.grad = 0.0
    self._backward = lambda: None
    self._prev = set(_children)
    self._op = _op
    self.label = label

  def __repr__(self):
    return f"Value(data={self.data})"

  def __add__(self, other):
    other = other if isinstance(other, Value) else Value(other)
    out = Value(self.data + other.data, (self, other), '+')

    def _backward():
      self.grad += 1.0 * out.grad
      other.grad += 1.0 * out.grad
    out._backward = _backward

    return out

  def __mul__(self, other):
    other = other if isinstance(other, Value) else Value(other)
    out = Value(self.data * other.data, (self, other), '*')

    def _backward():
      self.grad += other.data * out.grad
      other.grad += self.data * out.grad
    out._backward = _backward

    return out

  def __pow__(self, other):
    assert isinstance(other, (int, float)), "only supporting int/float powers for now"
    out = Value(self.data**other, (self,), f'**{other}')

    def _backward():
        self.grad += other * (self.data ** (other - 1)) * out.grad
    out._backward = _backward

    return out

  def __rmul__(self, other): # other * self
    return self * other

  def __truediv__(self, other): # self / other
    return self * other**-1

  def __neg__(self): # -self
    return self * -1

  def __sub__(self, other): # self - other
    return self + (-other)

  def __radd__(self, other): # other + self
    return self + other

  def tanh(self):
    x = self.data
    t = (math.exp(2*x) - 1)/(math.exp(2*x) + 1)
    out = Value(t, (self, ), 'tanh')

    def _backward():
      self.grad += (1 - t**2) * out.grad
    out._backward = _backward

    return out

  def exp(self):
    x = self.data
    out = Value(math.exp(x), (self, ), 'exp')

    def _backward():
      self.grad += out.data * out.grad # NOTE: in the video I incorrectly used = instead of +=. Fixed here.
    out._backward = _backward

    return out


  def backward(self):

    topo = []
    visited = set()
    def build_topo(v):
      if v not in visited:
        visited.add(v)
        for child in v._prev:
          build_topo(child)
        topo.append(v)
    build_topo(self)

    self.grad = 1.0
    for node in reversed(topo):
      node._backward()

In [6]:
class Neuron:

  def __init__(self, nin):
    self.w = [Value(random.uniform(-1,1)) for _ in range(nin)]
    self.b = Value(random.uniform(-1,1))

  def __call__(self, x):
    # w * x + b
    act = sum((wi*xi for wi, xi in zip(self.w, x)), self.b)
    out = act
#    out = act.tanh()
    return out

  def parameters(self):
    return self.w + [self.b]

class Layer:

  def __init__(self, nin, nout):
    self.neurons = [Neuron(nin) for _ in range(nout)]

  def __call__(self, x):
    outs = [n(x) for n in self.neurons]
    return outs[0] if len(outs) == 1 else outs

  def parameters(self):
    return [p for neuron in self.neurons for p in neuron.parameters()]

class MLP:

  def __init__(self, nin, nouts):
    sz = [nin] + nouts
    self.layers = [Layer(sz[i], sz[i+1]) for i in range(len(nouts))]

  def __call__(self, x):
    for layer in self.layers:
      x = layer(x)
    return x

  def parameters(self):
    return [p for layer in self.layers for p in layer.parameters()]

In [8]:
df = pd.read_csv('https://tinyurl.com/polite-data')
df.head()

Unnamed: 0,subject,gender,sentence,context,pitch
0,F1,F,S1,pol,213.3
1,F1,F,S1,inf,204.5
2,F1,F,S2,pol,285.1
3,F1,F,S2,inf,259.7
4,F1,F,S3,pol,203.9


In [24]:
x1 = [0 if x == "F" else 1 for x in df["gender"]]
x2 = [0 if x == "pol" else 1 for x in df["context"]]

xs = [list(x) for x in zip(x1, x2)]
print(xs)

ys = list(df["pitch"])
print(ys)

[[0, 0], [0, 1], [0, 0], [0, 1], [0, 0], [0, 1], [0, 0], [0, 1], [0, 0], [0, 1], [0, 0], [0, 1], [0, 1], [0, 0], [0, 0], [0, 1], [0, 0], [0, 1], [0, 0], [0, 1], [0, 0], [0, 1], [0, 0], [0, 1], [0, 0], [0, 1], [0, 1], [0, 0], [1, 0], [1, 1], [1, 0], [1, 1], [1, 0], [1, 1], [1, 0], [1, 1], [1, 0], [1, 1], [1, 1], [1, 1], [1, 0], [1, 0], [1, 1], [1, 0], [1, 1], [1, 0], [1, 1], [1, 0], [1, 1], [1, 0], [1, 1], [1, 0], [1, 1], [1, 1], [1, 0], [0, 0], [0, 1], [0, 0], [0, 1], [0, 0], [0, 1], [0, 0], [0, 1], [0, 0], [0, 1], [0, 0], [0, 1], [0, 1], [0, 0], [1, 0], [1, 1], [1, 0], [1, 1], [1, 0], [1, 1], [1, 0], [1, 1], [1, 0], [1, 1], [1, 0], [1, 1], [1, 1], [1, 0]]
[213.3, 204.5, 285.1, 259.7, 203.9, 286.9, 250.8, 276.8, 231.9, 252.4, 181.2, 230.7, 216.5, 154.8, 229.7, 237.3, 236.8, 251.0, 267.0, 266.0, 275.4, 306.8, 232.6, 252.5, 226.5, 278.8, 264.4, 185.5, 110.7, 123.6, 229.0, 114.9, 112.2, 213.6, 193.4, 162.9, 101.8, 126.9, 136.2, 146.0, 126.5, 86.1, 99.1, 82.2, 104.3, 85.9, 110.2, 97.1, 120

In [25]:
n = MLP(2, [2, 1])

In [26]:
# train the NN
for k in range(100):

    # forward pass
    # ypred = predizione del modello
    # yout = predizioni modello per verificare perdita del modello
    # ygt = ground truth, valore atteso per un input
    ypred = [n(x) for x in xs]
    loss = sum((yout - ygt) ** 2 for ygt, yout in zip(ys, ypred))

    # backward pass
    for p in n.parameters():
        p.grad = 0.0
    loss.backward()

    # update
    for p in n.parameters():
        p.data += -0.00001 * p.grad

    print(k, loss.data)

0 3477659.0198527025
1 3450609.741587045
2 3421845.6144680474
3 3381791.055546259
4 3317874.751962829
5 3209865.044357389
6 3025702.366244382
7 2722038.7964351773
8 2263061.7721712138
9 1676071.1513850891
10 1111524.4947761337
11 756390.4816771045
12 614645.0460746991
13 557330.8312534876
14 516338.20767397794
15 479068.22945311596
16 444207.3104835982
17 411560.7868163948
18 381037.9869407462
19 352573.2451390525
20 326113.8883776662
21 301613.6559474223
22 279027.471384587
23 258307.25652977853
24 239398.77464236045
25 222239.51105037105
26 206757.5732799723
27 192871.547540561
28 180491.19802096338
29 169518.85028499604
30 159851.2684070291
31 151381.82231278866
32 144002.74823678
33 137607.32892387445
34 132091.85631574472
35 127357.28191083691
36 123310.50283127998
37 119865.2700566278
38 116942.73614019858
39 114471.6816045592
40 112388.47220573912
41 110636.8044997513
42 109167.29635083323
43 107936.97405751505
44 106908.70037015353
45 106050.57925180641
46 105335.36483885182
47

In [27]:
ypred = [n(x) for x in xs[:5]]
print(list(zip(ys, ypred)))

[(213.3, Value(data=237.14670299736812)), (204.5, Value(data=256.78595151012405)), (285.1, Value(data=237.14670299736812)), (259.7, Value(data=256.78595151012405)), (203.9, Value(data=237.14670299736812))]


In [28]:
# average loss: per verificare la perdita, margine di errore dei risultati che si pu√≤ vedere anche dall'output della cella sopra. 
math.sqrt(loss.data / len(ys))

35.0249868630383

This part is made to compare non-neural model with linear model, verifying the loss

non siamo riusciti a fare interazione tra Python e R