In [1]:
import math
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
class Value:
  
  def __init__(self, data, _children=(), _op='', label=''):
    self.data = data
    self.grad = 0.0
    self._backward = lambda: None
    self._prev = set(_children)
    self._op = _op
    self.label = label

  def __repr__(self):
    return f"Value(data={self.data})"
  
  def __add__(self, other):
    other = other if isinstance(other, Value) else Value(other)
    out = Value(self.data + other.data, (self, other), '+')
    
    def _backward():
      self.grad += 1.0 * out.grad
      other.grad += 1.0 * out.grad
    out._backward = _backward
    
    return out

  def __mul__(self, other):
    other = other if isinstance(other, Value) else Value(other)
    out = Value(self.data * other.data, (self, other), '*')
    
    def _backward():
      self.grad += other.data * out.grad
      other.grad += self.data * out.grad
    out._backward = _backward
      
    return out
  
  def __pow__(self, other):
    assert isinstance(other, (int, float)), "only supporting int/float powers for now"
    out = Value(self.data**other, (self,), f'**{other}')

    def _backward():
        self.grad += other * (self.data ** (other - 1)) * out.grad
    out._backward = _backward

    return out
  
  def __rmul__(self, other): # other * self
    return self * other

  def __truediv__(self, other): # self / other
    return self * other**-1

  def __neg__(self): # -self
    return self * -1

  def __sub__(self, other): # self - other
    return self + (-other)

  def __radd__(self, other): # other + self
    return self + other

  def tanh(self):
    x = self.data
    t = (math.exp(2*x) - 1)/(math.exp(2*x) + 1)
    out = Value(t, (self, ), 'tanh')
    
    def _backward():
      self.grad += (1 - t**2) * out.grad
    out._backward = _backward
    
    return out
  
  def exp(self):
    x = self.data
    out = Value(math.exp(x), (self, ), 'exp')
    
    def _backward():
      self.grad += out.data * out.grad # NOTE: in the video I incorrectly used = instead of +=. Fixed here.
    out._backward = _backward
    
    return out
  
  
  def backward(self):
    
    topo = []
    visited = set()
    def build_topo(v):
      if v not in visited:
        visited.add(v)
        for child in v._prev:
          build_topo(child)
        topo.append(v)
    build_topo(self)
    
    self.grad = 1.0
    for node in reversed(topo):
      node._backward()

In [3]:
class Neuron:
  
  def __init__(self, nin):
    self.w = [Value(random.uniform(-1,1)) for _ in range(nin)]
    self.b = Value(random.uniform(-1,1))
  
  def __call__(self, x):
    # w * x + b
    act = sum((wi*xi for wi, xi in zip(self.w, x)), self.b)
    out = act
#    out = act.tanh()
    return out
  
  def parameters(self):
    return self.w + [self.b]

class Layer:
  
  def __init__(self, nin, nout):
    self.neurons = [Neuron(nin) for _ in range(nout)]
  
  def __call__(self, x):
    outs = [n(x) for n in self.neurons]
    return outs[0] if len(outs) == 1 else outs
  
  def parameters(self):
    return [p for neuron in self.neurons for p in neuron.parameters()]

class MLP:
  
  def __init__(self, nin, nouts):
    sz = [nin] + nouts
    self.layers = [Layer(sz[i], sz[i+1]) for i in range(len(nouts))]
  
  def __call__(self, x):
    for layer in self.layers:
      x = layer(x)
    return x
  
  def parameters(self):
    return [p for layer in self.layers for p in layer.parameters()]

In [4]:
n = MLP(2, [2, 1])

In [5]:
df = c=pd.read_csv('https://tinyurl.com/polite-data')
df.head()

Unnamed: 0,subject,gender,sentence,context,pitch
0,F1,F,S1,pol,213.3
1,F1,F,S1,inf,204.5
2,F1,F,S2,pol,285.1
3,F1,F,S2,inf,259.7
4,F1,F,S3,pol,203.9


In [6]:
x1 = [0 if x == 'F' else 1 for x in df['gender']]
x2 = [0 if x == 'pol' else 1 for x in df['context']]
xs = [list(x) for x in zip(x1,x2)]
ys = list(df['pitch'])

In [7]:
for k in range(100):
  
  # forward pass
  ypred = [n(x) for x in xs]
  loss = sum((yout - ygt)**2 for ygt, yout in zip(ys, ypred))
  
  # backward pass
  for p in n.parameters():
    p.grad = 0.0
  loss.backward()
  
  # update
  for p in n.parameters():
    p.data += -0.00001 * p.grad
  
  print(k, loss.data)
  



0 3455247.2997586513
1 3434566.1671578325
2 3411621.2429354987
3 3380109.430017223
4 3331001.9290969404
5 3249040.0067310026
6 3108559.7350397636
7 2870438.1183613315
8 2488515.9006357077
9 1944842.1258124528
10 1322524.2308111556
11 824062.391974523
12 579415.3510557609
13 496416.51287763496
14 455764.7020578941
15 421991.00167623477
16 390704.2231389075
17 361517.8165040246
18 334356.84778232884
19 309173.14878646785
20 285922.54698124784
21 264559.19527184416
22 245031.91943088293
23 227281.68479533793
24 211240.11635739583
25 196829.0226133111
26 183960.82673157688
27 172539.7589427398
28 162463.62518341426
29 153625.94587400532
30 145918.25796193606
31 139232.39180907095
32 133462.56786421328
33 128507.20029907754
34 124270.33952346134
35 120662.72721263567
36 117602.47225648776
37 115015.3818011074
38 112834.9979084024
39 111002.39818538747
40 109465.81967315021
41 108180.1612740005
42 107106.41291255257
43 106211.0510696857
44 105465.43151302189
45 104845.20179844844
46 104329.7

In [8]:
ypred = [n(x) for x in xs[:5]]
print(list(zip(ys[:5],ypred)))

[(213.3, Value(data=237.156619487682)), (204.5, Value(data=256.78224257047947)), (285.1, Value(data=237.156619487682)), (259.7, Value(data=256.78224257047947)), (203.9, Value(data=237.156619487682))]


In [9]:
math.sqrt(loss.data / len(ys)) #average loss

35.024976596735726

In [10]:
#Enabling R magic
%load_ext rpy2.ipython 

In [11]:
%%R
df.R = read.csv('https://tinyurl.com/polite-data')

m <- lm(data = df.R,
     formula = pitch ~ gender + context)
summary(m) #compare Residual standard error


Call:
lm(formula = pitch ~ gender + context, data = df.R)

Residuals:
    Min      1Q  Median      3Q     Max 
-82.409 -26.561  -4.262  24.690 100.140 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  256.762      6.756  38.006   <2e-16 ***
genderM     -108.349      7.833 -13.832   <2e-16 ***
contextpol   -19.553      7.833  -2.496   0.0146 *  
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 35.68 on 80 degrees of freedom
Multiple R-squared:  0.7109,	Adjusted R-squared:  0.7037 
F-statistic: 98.38 on 2 and 80 DF,  p-value: < 2.2e-16



In [26]:
print('NN Prediction for female in polite context: ', n([0,0]).data)
print('NN Prediction for female in informal context: ', n([0,1]).data)
print('NN Prediction for male in polite context: ', n([1,0]).data)
print('NN Prediction for male in informal context: ', n([1,1]).data)
print('\n\n')
print('LM Prediction for female in polite context: ', 256.762 - 19.553)
print('LM Prediction for female in informal context: ', 256.762)
print('LM Prediction for male in polite context: ', 256.762 - 108.349 - 19.553)
print('LM Prediction for male in informal context: ', 256.762 - 108.349)

NN Prediction for female in polite context:  237.156619487682
NN Prediction for female in informal context:  256.78224257047947
NN Prediction for male in polite context:  128.82792182709886
NN Prediction for male in informal context:  148.45354490989635



LM Prediction for female in polite context:  237.209
LM Prediction for female in informal context:  256.762
LM Prediction for male in polite context:  128.86
LM Prediction for male in informal context:  148.413


In [12]:
#took out tanh from the Neuron since this is a linear model
#much smaller learning rate

#Compare Residual standard error with the average loss