In [1]:
from sentence_transformers import SentenceTransformer
model = SentenceTransformer('all-MiniLM-L6-v2')

Downloading (…)e9125/.gitattributes:   0%|          | 0.00/1.18k [00:00<?, ?B/s]

Downloading (…)_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Downloading (…)7e55de9125/README.md:   0%|          | 0.00/10.6k [00:00<?, ?B/s]

Downloading (…)55de9125/config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

Downloading (…)ce_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

Downloading (…)125/data_config.json:   0%|          | 0.00/39.3k [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

Downloading (…)nce_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

Downloading (…)e9125/tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

Downloading (…)9125/train_script.py:   0%|          | 0.00/13.2k [00:00<?, ?B/s]

Downloading (…)7e55de9125/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading (…)5de9125/modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

In [2]:
import math
import numpy as np
import matplotlib.pyplot as plt
import random
%matplotlib inline

In [5]:
class Value:
  
  def __init__(self, data, _children=(), _op='', label=''):
    self.data = data
    self.grad = 0.0
    self._backward = lambda: None
    self._prev = set(_children)
    self._op = _op
    self.label = label

  def __repr__(self):
    return f"Value(data={self.data})"
  
  def __add__(self, other):
    other = other if isinstance(other, Value) else Value(other)
    out = Value(self.data + other.data, (self, other), '+')
    
    def _backward():
      self.grad += 1.0 * out.grad
      other.grad += 1.0 * out.grad
    out._backward = _backward
    
    return out

  def __mul__(self, other):
    other = other if isinstance(other, Value) else Value(other)
    out = Value(self.data * other.data, (self, other), '*')
    
    def _backward():
      self.grad += other.data * out.grad
      other.grad += self.data * out.grad
    out._backward = _backward
      
    return out
  def __sub__(self, other): # self - other
    return self + (-other)

  def __rmul__(self,other):
      return self*other

  def __radd__(self, other): # other + self
    return self + other

  def __truediv__(self,other):
      return self*other**-1

  def __neg__(self):
      return self * -1
    
  def tanh(self):
    x = self.data
    t = (math.exp(2*x) - 1)/(math.exp(2*x) + 1)
    out = Value(t, (self, ), 'tanh')
    
    def _backward():
      self.grad += (1 - t**2) * out.grad
    out._backward = _backward
    
    return out
  
  def __pow__(self,other):
      assert isinstance(other,(int,float)), "Only supporting int/float powers for now"
      out = Value(self.data**other,(self,),f"**{other}")
      def _backward():
          self.grad += (other)*(self.data)**(other-1) * out.grad
      out._backward = _backward
      return out

  def exp(self):
    x = self.data
    out = Value(math.exp(x), (self, ), 'exp')
    
    def _backward():
      self.grad += out.data * out.grad # NOTE: in the video I incorrectly used = instead of +=. Fixed here.
    out._backward = _backward
    
    return out
  
  def backward(self):
    
    topo = []
    visited = set()
    def build_topo(v):
      if v not in visited:
        visited.add(v)
        for child in v._prev:
          build_topo(child)
        topo.append(v)
    build_topo(self)
    
    self.grad = 1.0
    for node in reversed(topo):
      node._backward()

In [60]:
import numpy as np

class Module:
    def __init__(self,nin,nouts,lr = 0.1):
        self.model = MLP(nin,nouts)
        self.lr = lr

    def get_random_subset(self,data,batch_size):
        if len(data) <= batch_size:
            return data

        indexes = [i for i in range(len(data))]
        choices = np.random.choice(indexes, size=batch_size, replace=False)
        final_choice = [x for idx,x in enumerate(data) if idx in choices]
        return final_choice
        
    def train(self,training_data,ys,epochs,batch_size=32):
        for epoch in range(epochs):
            data = self.get_random_subset(training_data,batch_size)
            ypred = [self.model(x) for x in data]
            loss = sum([(yout-ygt)**2 for ygt,yout in zip(ys,ypred)])
    
            print(f"EPOCH {epoch+1}/{epochs} :Current Loss is {loss}")
        
            for p in self.model.parameters():
                p.grad = 0
        
            loss.backward()
        
            for p in self.model.parameters():
                p.data -= 0.01 * p.grad
    
    def pred(self,data):
        return [self.model(x) for x in data]
        
            
class Neuron:

    def __init__(self,nin:int):
        self.w = [Value(random.uniform(-1,1)) for _ in range(nin)]
        self.b = Value(random.uniform(-1,1))

    def __call__(self,x):
        # We want to calculate the value of w * x + b
        act = sum((wi*xi for wi,xi in zip(self.w,x)),self.b)
        out = act.tanh()
        return out

    def parameters(self):
        return self.w + [self.b]

class Layer:
    def __init__(self,nin,nout):
        self.neurons = [Neuron(nin) for _ in range(nout)]

    def __call__(self,x):
        outs = [n(x) for n in self.neurons]
        return outs[0] if len(outs)==1 else outs

    def parameters(self):
        # List Comprehension
        return [
            p for neuron in self.neurons for p in neuron.parameters()
        ]
        
class MLP:
    def __init__(self,nin,nouts):
        sz = [nin] + nouts
        self.layers = [Layer(sz[i],sz[i+1]) for i in range(len(nouts))]

    def __call__(self,x):
        for layer in self.layers:
            x = layer(x)
        return x

    def parameters(self):
        return [
            p for layer in self.layers for p in layer.parameters()
        ]

> Note : The following neural network has around 5000 parameters. :)

In [25]:
xs = [
  [2.0, 3.0, -1.0],
  [3.0, -1.0, 0.5],
  [0.5, 1.0, 1.0],
  [1.0, 1.0, -1.0],
]
ys = [-1.0, -1.0, -1.0, 1.0]
nn = Module(3,[30,30,1])
nn.train(xs,ys,10)

Current Loss is Value(data=6.801166981029446)
Current Loss is Value(data=3.8400633288545944)
Current Loss is Value(data=1.8727537512007717)
Current Loss is Value(data=0.32542432376239555)
Current Loss is Value(data=0.1978885017653678)
Current Loss is Value(data=0.14993734048284407)
Current Loss is Value(data=0.12214097742803555)
Current Loss is Value(data=0.10377218930318818)
Current Loss is Value(data=0.09065742958238432)
Current Loss is Value(data=0.08078237680725807)


## Application

Let's now try using a more complex data set - tweet toxicity.

In [15]:
import pandas as pd

In [22]:
data = pd.read_csv("./train.csv")[["text","target"]]

list(data["text"])

0       Our Deeds are the Reason of this #earthquake M...
1                  Forest fire near La Ronge Sask. Canada
2       All residents asked to 'shelter in place' are ...
3       13,000 people receive #wildfires evacuation or...
4       Just got sent this photo from Ruby #Alaska as ...
                              ...                        
7608    Two giant cranes holding a bridge collapse int...
7609    @aria_ahrary @TheTawniest The out of control w...
7610    M1.94 [01:04 UTC]?5km S of Volcano Hawaii. htt...
7611    Police investigating after an e-bike collided ...
7612    The Latest: More Homes Razed by Northern Calif...
Name: text, Length: 7613, dtype: object

In [26]:
xs = [
    model.encode(text) for text in list(data["text"])
]
ys = list(data["target"])

In [61]:
nn = Module(len(xs),[10,30,30,1])
nn.train(xs,ys,40,24)

EPOCH 1/40 :Current Loss is Value(data=66.93752772719378)
EPOCH 2/40 :Current Loss is Value(data=42.56962480781608)
EPOCH 3/40 :Current Loss is Value(data=8.640442429654083)
EPOCH 4/40 :Current Loss is Value(data=12.619579388326285)
EPOCH 5/40 :Current Loss is Value(data=12.28776030221344)
EPOCH 6/40 :Current Loss is Value(data=8.327111598945992)
EPOCH 7/40 :Current Loss is Value(data=14.671157849359806)
EPOCH 8/40 :Current Loss is Value(data=8.999666397087568)
EPOCH 9/40 :Current Loss is Value(data=9.00106219690902)
EPOCH 10/40 :Current Loss is Value(data=8.861463497182179)
EPOCH 11/40 :Current Loss is Value(data=8.824981090589233)
EPOCH 12/40 :Current Loss is Value(data=8.933404014816745)
EPOCH 13/40 :Current Loss is Value(data=8.808612043166836)
EPOCH 14/40 :Current Loss is Value(data=8.992541023451327)
EPOCH 15/40 :Current Loss is Value(data=9.733093407830511)
EPOCH 16/40 :Current Loss is Value(data=8.996625133501908)
EPOCH 17/40 :Current Loss is Value(data=8.999996749408124)
EPOCH

Now we try to do a test run against the test data set

In [70]:
data = pd.read_csv("./test.csv")
xpred = [model.encode(text) for text in list(data["text"])]

[array([ 1.45790717e-02,  1.41455454e-03,  8.49096626e-02,  1.97133571e-02,
         7.17959404e-02,  2.74998453e-02, -8.19544345e-02,  9.34824347e-02,
         1.30174942e-02, -1.01892753e-02,  2.26383656e-02,  4.15310897e-02,
        -7.81789655e-04,  5.00442795e-02, -1.52932033e-01, -7.20818788e-02,
         1.72205120e-02, -6.30093226e-03, -9.65770036e-02,  7.15548918e-02,
        -6.74255714e-02,  3.40605713e-02, -3.98736373e-02,  4.06016596e-02,
        -8.50838870e-02,  4.67408411e-02, -2.48061623e-02,  4.05918621e-02,
         6.49594842e-03, -4.08539921e-02,  1.37232738e-02, -2.14231033e-02,
         1.36758126e-02, -2.99984161e-02,  5.87882847e-03, -2.81891488e-02,
         5.28043509e-02, -3.74189466e-02,  4.16457839e-02, -7.30474964e-02,
         9.45264183e-04, -1.67926494e-02,  2.03666221e-02, -2.82387994e-02,
        -1.58662314e-03, -7.49318954e-03,  5.24389185e-02,  1.68609973e-02,
         1.20412506e-01, -5.35239503e-02, -1.06850015e-02,  3.78030874e-02,
         2.6

In [85]:
batch_size = 10
res = []

def step_func(v:Value):
    return 1 if v.data > 0.6 else 0

for batch_start in range(0,len(xpred) + 1,batch_size):
    batch_end = min(batch_start+batch_size,len(xpred)+1)
    batch = xpred[batch_start:batch_end]
    pred = nn.pred(batch)
    res.extend([step_func(v) for v in pred])
    print(f"Inference for batch {batch_start//batch_size + 1} - {batch_start}/{len(xpred)} ")

Inference for batch 1 - 0/3263 
Inference for batch 2 - 10/3263 
Inference for batch 3 - 20/3263 
Inference for batch 4 - 30/3263 
Inference for batch 5 - 40/3263 
Inference for batch 6 - 50/3263 
Inference for batch 7 - 60/3263 
Inference for batch 8 - 70/3263 
Inference for batch 9 - 80/3263 
Inference for batch 10 - 90/3263 
Inference for batch 11 - 100/3263 
Inference for batch 12 - 110/3263 
Inference for batch 13 - 120/3263 
Inference for batch 14 - 130/3263 
Inference for batch 15 - 140/3263 
Inference for batch 16 - 150/3263 
Inference for batch 17 - 160/3263 
Inference for batch 18 - 170/3263 
Inference for batch 19 - 180/3263 
Inference for batch 20 - 190/3263 
Inference for batch 21 - 200/3263 
Inference for batch 22 - 210/3263 
Inference for batch 23 - 220/3263 
Inference for batch 24 - 230/3263 
Inference for batch 25 - 240/3263 
Inference for batch 26 - 250/3263 
Inference for batch 27 - 260/3263 
Inference for batch 28 - 270/3263 
Inference for batch 29 - 280/3263 
Infer

In [90]:
submission = pd.DataFrame({'id': data['id'], 'target': res})
submission.to_csv('submission.csv', index=False)