In [7]:
import pygame.midi
import time

pygame.midi.init()
player = pygame.midi.Output(0)
player.set_instrument(48) # https://pjb.com.au/muscript/gm.html
player.note_on(64, 127) # note_on(note, velocity=None, channel = 0) https://en.scratch-wiki.info/wiki/MIDI_Notes
time.sleep(2)
player.note_off(64, 127)
del player
pygame.midi.quit()

In [5]:
# from https://gist.github.com/karpathy/d4dee566867f8291f086

"""
Minimal character-level Vanilla RNN model. Written by Andrej Karpathy (@karpathy)
BSD License
"""
import numpy as np

nruns = 1000
noutput = 100

# data I/O
data = open('notes0.txt', 'r').read() # should be simple plain text file
chars = list(set(data))
data_size, vocab_size = len(data), len(chars)
print('data has %d characters, %d unique.' % (data_size, vocab_size))
char_to_ix = { ch:i for i,ch in enumerate(chars) }
ix_to_char = { i:ch for i,ch in enumerate(chars) }

# hyperparameters
hidden_size = 100 # size of hidden layer of neurons
seq_length = 25 # number of steps to unroll the RNN for
learning_rate = 1e-1

# model parameters
Wxh = np.random.randn(hidden_size, vocab_size)*0.01 # input to hidden
Whh = np.random.randn(hidden_size, hidden_size)*0.01 # hidden to hidden
Why = np.random.randn(vocab_size, hidden_size)*0.01 # hidden to output
bh = np.zeros((hidden_size, 1)) # hidden bias
by = np.zeros((vocab_size, 1)) # output bias

def lossFun(inputs, targets, hprev):
  """
  inputs,targets are both list of integers.
  hprev is Hx1 array of initial hidden state
  returns the loss, gradients on model parameters, and last hidden state
  """
  xs, hs, ys, ps = {}, {}, {}, {}
  hs[-1] = np.copy(hprev)
  loss = 0
  # forward pass
  for t in range(len(inputs)):
    xs[t] = np.zeros((vocab_size,1)) # encode in 1-of-k representation
    xs[t][inputs[t]] = 1
    hs[t] = np.tanh(np.dot(Wxh, xs[t]) + np.dot(Whh, hs[t-1]) + bh) # hidden state
    ys[t] = np.dot(Why, hs[t]) + by # unnormalized log probabilities for next chars
    ps[t] = np.exp(ys[t]) / np.sum(np.exp(ys[t])) # probabilities for next chars
    loss += -np.log(ps[t][targets[t],0]) # softmax (cross-entropy loss)
  # backward pass: compute gradients going backwards
  dWxh, dWhh, dWhy = np.zeros_like(Wxh), np.zeros_like(Whh), np.zeros_like(Why)
  dbh, dby = np.zeros_like(bh), np.zeros_like(by)
  dhnext = np.zeros_like(hs[0])
  for t in reversed(range(len(inputs))):
    dy = np.copy(ps[t])
    dy[targets[t]] -= 1 # backprop into y. see http://cs231n.github.io/neural-networks-case-study/#grad if confused here
    dWhy += np.dot(dy, hs[t].T)
    dby += dy
    dh = np.dot(Why.T, dy) + dhnext # backprop into h
    dhraw = (1 - hs[t] * hs[t]) * dh # backprop through tanh nonlinearity
    dbh += dhraw
    dWxh += np.dot(dhraw, xs[t].T)
    dWhh += np.dot(dhraw, hs[t-1].T)
    dhnext = np.dot(Whh.T, dhraw)
  for dparam in [dWxh, dWhh, dWhy, dbh, dby]:
    np.clip(dparam, -5, 5, out=dparam) # clip to mitigate exploding gradients
  return loss, dWxh, dWhh, dWhy, dbh, dby, hs[len(inputs)-1]

def sample(h, seed_ix, n):
  """ 
  sample a sequence of integers from the model 
  h is memory state, seed_ix is seed letter for first time step
  """
  x = np.zeros((vocab_size, 1))
  x[seed_ix] = 1
  ixes = []
  for t in range(n):
    h = np.tanh(np.dot(Wxh, x) + np.dot(Whh, h) + bh)
    y = np.dot(Why, h) + by
    p = np.exp(y) / np.sum(np.exp(y))
    ix = np.random.choice(range(vocab_size), p=p.ravel())
    x = np.zeros((vocab_size, 1))
    x[ix] = 1
    ixes.append(ix)
  return ixes

n, p = 0, 0
mWxh, mWhh, mWhy = np.zeros_like(Wxh), np.zeros_like(Whh), np.zeros_like(Why)
mbh, mby = np.zeros_like(bh), np.zeros_like(by) # memory variables for Adagrad
smooth_loss = -np.log(1.0/vocab_size)*seq_length # loss at iteration 0
#while True:
while n<nruns:
  # prepare inputs (we're sweeping from left to right in steps seq_length long)
  if p+seq_length+1 >= len(data) or n == 0: 
    hprev = np.zeros((hidden_size,1)) # reset RNN memory
    p = 0 # go from start of data
  inputs = [char_to_ix[ch] for ch in data[p:p+seq_length]]
  targets = [char_to_ix[ch] for ch in data[p+1:p+seq_length+1]]

  # sample from the model now and then
  if n % noutput == 0:
    sample_ix = sample(hprev, inputs[0], 500)
    txt = ''.join(ix_to_char[ix] for ix in sample_ix)
    print('----\n %s \n----' % (txt, ))

  # forward seq_length characters through the net and fetch gradient
  loss, dWxh, dWhh, dWhy, dbh, dby, hprev = lossFun(inputs, targets, hprev)
  smooth_loss = smooth_loss * 0.999 + loss * 0.001
  if n % noutput == 0:
        print('iter %d, loss: %f' % (n, smooth_loss)) # print progress
  
  # perform parameter update with Adagrad
  for param, dparam, mem in zip([Wxh, Whh, Why, bh, by], 
                                [dWxh, dWhh, dWhy, dbh, dby], 
                                [mWxh, mWhh, mWhy, mbh, mby]):
    mem += dparam * dparam
    param += -learning_rate * dparam / np.sqrt(mem + 1e-8) # adagrad update

  p += seq_length # move data pointer
  n += 1 # iteration counter 

data has 36979 characters, 14 unique.
----
 4767 11  :989680535227'56'':71 64077934' 2,,6586435:0::7'17 46,:867: 769'8 350'430961,:2960' 993:,984, 9550',5,90:6,'7985489'8 7'1548963,''1763,6844  2628 0,0846,9048'845:1770:13:0'78782: '7 ,486':735806 1'0'1:24238: 3501 254:505':4 50'5,, 5125013'7305021222,1'40:'889'74,44'5 1565285:7254159'123 :401985,734416914980191963,26'252017:, 2845 :,1 '1  674895 ,:134'9886720105 6''8:140 1,'792'55,4839'9578279 924:650996:36377',, 46'466,,'40:4,' 349 1556450525,7451465'1:03 4:71890650 4947821,030351:,,2,5 
----
iter 0, loss: 65.976431
----
 , '1', ', ','6 ,,72:'0:851:',   30:7'8 :70 ''1 ',0 ', 5:'4 ', :7070:01: 082:62: 6, ',0'0:1 '0:2:40::7,2:87:', ::,1 '3 '1'0 ', '''0''9,2',06 '3:',:8:60:'182', '0', ', :::8 :80:,, ::0:1', '2:71:00 '',''8,2',0:'87 :8  '0:0:,0:',::',74:80:',2 '5, :70',8, '0:0:07::00 ','::0:7'61::061:,00'9 ',',::'0:': :5,, ',:4:10:',04', :5'6 ',0', 51:80 4, 2', '4', ', 9:82:',  ',0 ''0 ',3 ',0:5, 51,'2:50:',1::',0', '0:72'80'1', :0'1:68 

In [2]:
import pygame.midi
import time
import random

pygame.midi.init()
player = pygame.midi.Output(0)
player.set_instrument(1) # https://pjb.com.au/muscript/gm.html
notel = [55, 72, 77, 74, 50, 67, 77, 79, 55, 79, 81, 53, 58, 50, 77, 75, 75, 51, 74, 79, 72, 58, 77, 74, 75, 57, 74, 74, 72, 69, 72, 65, 72, 63, 51, 67, 67, 46, 70, 75, 72, 41, 70, 68, 48, 63, 74, 63, 70, 63, 70, 70, 75, 51, 69, 65, 67, 70, 50, 63, 75, 72, 46, 70, 65, 69, 82, 72, 74, 68, 72, 53, 72, 69, 77, 67, 75, 72, 55, 69, 65, 82, 51, 75, 77, 77, 75, 50, 79, 79, 74, 79, 79, 82, 67, 79, 63, 79, 79, 79, 75, 53, 79, 74, 74, 75, 46, 70, 77, 74, 55, 77, 79, 77, 82, 77, 48, 53, 79, 79, 51, 74, 79, 79]
for n in notel:
    player.note_on(n, 127) # note_on(note, velocity=None, channel = 0) https://en.scratch-wiki.info/wiki/MIDI_Notes
    time.sleep(0.17)
    player.note_off(n, 127)
del player
pygame.midi.quit()

ModuleNotFoundError: No module named 'pygame'

In [33]:
import mido

#outport = mido.open_output()
niter = 0
noteslst = []
for msg in mido.MidiFile('bjs1031c.mid').play():
    #outport.send(msg)
    #print msg.bytes()
    #print str(msg)
    thismsg = str(msg).split(" ") # ['note_on', 'channel=0', 'note=60', 'velocity=61', 'time=0.00168918958333']
    # The velocity specifies the volume or force, with which the note is played
    if thismsg[0] == 'note_on':
        #print thismsg[2][5:]
        noteslst.append(int(thismsg[2][5:]))
    niter += 1
    if niter >= 3000:
        break
print noteslst

[51, 63, 63, 67, 67, 70, 51, 70, 75, 55, 75, 70, 55, 70, 72, 56, 72, 80, 84, 56, 80, 84, 79, 82, 58, 79, 82, 77, 80, 58, 77, 80, 75, 79, 46, 75, 79, 74, 77, 46, 74, 77, 75, 79, 51, 79, 63, 75, 63, 63, 67, 67, 70, 51, 63, 70, 63, 75, 55, 75, 70, 63, 55, 70, 63, 72, 56, 63, 72, 80, 84, 56, 80, 84, 79, 82, 58, 79, 82, 77, 80, 58, 77, 80, 75, 79, 46, 75, 79, 74, 77, 46, 74, 77, 75, 79, 51, 79, 82, 75, 82, 82, 80, 80, 79, 51, 82, 79, 84, 77, 56, 77, 75, 84, 56, 75, 77, 74, 58, 77, 74, 58, 79, 75, 63, 63, 58, 58, 55, 79, 75, 55, 80, 77, 51, 80, 77, 82, 79, 51, 82, 79, 77, 74, 58, 77, 74, 58, 79, 75, 63, 63, 58, 58, 55, 79, 75, 55, 80, 77, 51, 80, 77, 82, 79, 51, 82, 79, 77, 74, 58, 77, 74, 75, 72, 58, 75, 72, 74, 70, 46, 74, 70, 72, 68, 46, 72, 68, 70, 67, 58, 70, 67, 68, 65, 58, 68, 65, 67, 63, 51, 67, 63, 63, 67, 67, 70, 63, 51, 70, 75, 70, 55, 75, 70, 70, 67, 55, 70, 67, 72, 63, 56, 72, 63, 84, 80, 56, 84, 80, 82, 79, 58, 82, 79, 80, 77, 58, 80, 77, 79, 75, 46, 79, 75, 77, 74, 46, 77, 74,

In [1]:
# BPE
import re, collections

def get_stats(vocab):
    pairs = collections.defaultdict(int)
    for word, freq in vocab.items():
        symbols = word.split()
        for i in range(len(symbols)-1):
            pairs[symbols[i],symbols[i+1]] += freq
    return pairs

def merge_vocab(pair, v_in):
    v_out = {}
    bigram = re.escape(' '.join(pair))
    p = re.compile(r'(?<!\S)' + bigram + r'(?!\S)')
    for word in v_in:
        w_out = p.sub(''.join(pair), word)
        v_out[w_out] = v_in[word]
    return v_out

if __name__=='__main__':
    vocab = {'l o w </w>' : 5, 'l o w e r </w>' : 2,'n e w e s t </w>':6, 'w i d e s t </w>':3}
    num_merges = 10
    for i in range(num_merges):
        pairs = get_stats(vocab)
        best = max(pairs, key=pairs.get)
        vocab = merge_vocab(best, vocab)
        print(best)

('e', 's')
('es', 't')
('est', '</w>')
('l', 'o')
('lo', 'w')
('n', 'e')
('ne', 'w')
('new', 'est</w>')
('low', '</w>')
('w', 'i')


In [9]:
# from https://gist.github.com/karpathy/d4dee566867f8291f086

"""
Minimal character-level Vanilla RNN model. Written by Andrej Karpathy (@karpathy)
BSD License
"""
import numpy as np

nruns = 50000
noutput = 5000

# data I/O
data = open('notes0.txt', 'r').read() # should be simple plain text file
chars = list(set(data))
data_size, vocab_size = len(data), len(chars)
print('data has %d characters, %d unique.' % (data_size, vocab_size))
char_to_ix = { ch:i for i,ch in enumerate(chars) }
ix_to_char = { i:ch for i,ch in enumerate(chars) }

# hyperparameters
hidden_size = 100 # size of hidden layer of neurons
seq_length = 25 # number of steps to unroll the RNN for
learning_rate = 1e-1

# model parameters
Wxh = np.random.randn(hidden_size, vocab_size)*0.01 # input to hidden
Whh = np.random.randn(hidden_size, hidden_size)*0.01 # hidden to hidden
Why = np.random.randn(vocab_size, hidden_size)*0.01 # hidden to output
bh = np.zeros((hidden_size, 1)) # hidden bias
by = np.zeros((vocab_size, 1)) # output bias

def lossFun(inputs, targets, hprev):
  """
  inputs,targets are both list of integers.
  hprev is Hx1 array of initial hidden state
  returns the loss, gradients on model parameters, and last hidden state
  """
  xs, hs, ys, ps = {}, {}, {}, {}
  hs[-1] = np.copy(hprev)
  loss = 0
  # forward pass
  for t in range(len(inputs)):
    xs[t] = np.zeros((vocab_size,1)) # encode in 1-of-k representation
    xs[t][inputs[t]] = 1
    hs[t] = np.tanh(np.dot(Wxh, xs[t]) + np.dot(Whh, hs[t-1]) + bh) # hidden state
    ys[t] = np.dot(Why, hs[t]) + by # unnormalized log probabilities for next chars
    ps[t] = np.exp(ys[t]) / np.sum(np.exp(ys[t])) # probabilities for next chars
    loss += -np.log(ps[t][targets[t],0]) # softmax (cross-entropy loss)
  # backward pass: compute gradients going backwards
  dWxh, dWhh, dWhy = np.zeros_like(Wxh), np.zeros_like(Whh), np.zeros_like(Why)
  dbh, dby = np.zeros_like(bh), np.zeros_like(by)
  dhnext = np.zeros_like(hs[0])
  for t in reversed(range(len(inputs))):
    dy = np.copy(ps[t])
    dy[targets[t]] -= 1 # backprop into y. see http://cs231n.github.io/neural-networks-case-study/#grad if confused here
    dWhy += np.dot(dy, hs[t].T)
    dby += dy
    dh = np.dot(Why.T, dy) + dhnext # backprop into h
    dhraw = (1 - hs[t] * hs[t]) * dh # backprop through tanh nonlinearity
    dbh += dhraw
    dWxh += np.dot(dhraw, xs[t].T)
    dWhh += np.dot(dhraw, hs[t-1].T)
    dhnext = np.dot(Whh.T, dhraw)
  for dparam in [dWxh, dWhh, dWhy, dbh, dby]:
    np.clip(dparam, -5, 5, out=dparam) # clip to mitigate exploding gradients
  return loss, dWxh, dWhh, dWhy, dbh, dby, hs[len(inputs)-1]

def sample(h, seed_ix, n):
  """ 
  sample a sequence of integers from the model 
  h is memory state, seed_ix is seed letter for first time step
  """
  x = np.zeros((vocab_size, 1))
  x[seed_ix] = 1
  ixes = []
  for t in range(n):
    h = np.tanh(np.dot(Wxh, x) + np.dot(Whh, h) + bh)
    y = np.dot(Why, h) + by
    p = np.exp(y) / np.sum(np.exp(y))
    ix = np.random.choice(range(vocab_size), p=p.ravel())
    x = np.zeros((vocab_size, 1))
    x[ix] = 1
    ixes.append(ix)
  return ixes

n, p = 0, 0
mWxh, mWhh, mWhy = np.zeros_like(Wxh), np.zeros_like(Whh), np.zeros_like(Why)
mbh, mby = np.zeros_like(bh), np.zeros_like(by) # memory variables for Adagrad
smooth_loss = -np.log(1.0/vocab_size)*seq_length # loss at iteration 0
#while True:
while n<nruns:
  # prepare inputs (we're sweeping from left to right in steps seq_length long)
  if p+seq_length+1 >= len(data) or n == 0: 
    hprev = np.zeros((hidden_size,1)) # reset RNN memory
    p = 0 # go from start of data
  inputs = [char_to_ix[ch] for ch in data[p:p+seq_length]]
  targets = [char_to_ix[ch] for ch in data[p+1:p+seq_length+1]]

  # sample from the model now and then
  if n % noutput == 0 or n == nruns-1:
    sample_ix = sample(hprev, inputs[0], 1500)
    txt = ''.join(ix_to_char[ix] for ix in sample_ix)
    print('----\n %s \n----' % (txt, ))

  # forward seq_length characters through the net and fetch gradient
  loss, dWxh, dWhh, dWhy, dbh, dby, hprev = lossFun(inputs, targets, hprev)
  smooth_loss = smooth_loss * 0.999 + loss * 0.001
  if n % noutput == 0:
        print('iter %d, loss: %f' % (n, smooth_loss)) # print progress
  
  # perform parameter update with Adagrad
  for param, dparam, mem in zip([Wxh, Whh, Why, bh, by], 
                                [dWxh, dWhh, dWhy, dbh, dby], 
                                [mWxh, mWhh, mWhy, mbh, mby]):
    mem += dparam * dparam
    param += -learning_rate * dparam / np.sqrt(mem + 1e-8) # adagrad update

  p += seq_length # move data pointer
  n += 1 # iteration counter 

SyntaxError: invalid syntax (<ipython-input-9-52fe8f31a26a>, line 16)

In [4]:
import pygame.midi
import time
import random

pygame.midi.init()
player = pygame.midi.Output(0)
player.set_instrument(0) # https://pjb.com.au/muscript/gm.html
# channel - note - velocity - time
notel = ['0:70:0', '0:74:0', '1:74:80', '1:79:80', '1:82:0', '2:43:0', '0:74:80', '2:46:80', '1:75:0', '1:72:80', '1:74:0', '1:72:0', '1:80:80', '2:53:80', '0:75:80', '2:53:80', '1:82:80', '2:57:0', '1:69:80', '1:72:80', '1:72:0', '0:75:80', '1:69:0', '0:74:0', '0:75:80', '2:53:80', '0:74:80', '2:46:80', '0:72:80', '2:48:0', '1:69:80', '2:53:0', '0:74:80', '0:75:80', '2:53:80', '0:75:80', '2:53:80', '1:74:80', '2:55:80', '2:53:0', '0:75:80', '1:79:80', '1:79:80', '2:46:0', '2:51:80', '0:72:0', '1:69:0', '1:75:80', '0:77:0', '0:72:0', '1:74:0', '0:74:0', '1:74:80', '0:74:80', '1:74:80', '2:53:0', '0:75:0', '1:74:0', '1:72:0', '2:70:80', '1:69:80', '2:41:0', '1:70:80', '0:72:0', '1:74:0', '1:69:0', '1:75:0', '0:70:80', '1:75:0', '1:74:80', '2:50:80', '0:70:0', '0:69:80', '2:53:80', '2:51:0', '2:55:0', '0:84:80', '1:75:0', '0:79:80', '2:51:80', '0:72:0', '0:72:80', '1:70:0', '1:74:0', '0:70:80', '2:48:80', '0:72:80', '1:72:0', '0:69:0', '2:53:0', '1:74:0', '1:75:0', '1:70:80', '1:75:80', '1:79:80', '1:65:80', '1:75:0', '2:51:80', '0:75:80', '2:41:80', '1:70:0', '1:82:0', '1:74:0', '1:75:80', '2:51:80', '0:75:0', '1:69:80', '1:79:80', '1:75:0', '1:72:0', '1:74:80', '2:51:80', '0:77:80', '2:51:80', '0:77:80', '2:51:0', '1:77:80', '1:69:80', '0:75:80', '2:53:0', '0:79:0', '0:75:80', '2:41:80', '2:58:0', '0:72:80', '1:84:80', '1:75:0', '1:81:80', '2:51:0', '0:69:80', '1:72:80', '2:51:0', '0:74:80', '2:50:80', '2:53:0', '0:69:0', '2:51:0', '1:79:80', '2:53:0', '1:79:80', '1:67:0', '0:79:80']
for n in notel:
    ns = n.split(':')
    #print ns
    player.note_on(int(ns[1]), int(ns[2]), int(ns[0])) # note_on(note, velocity=None, channel = 0) https://en.scratch-wiki.info/wiki/MIDI_Notes
    time.sleep(int(ns[2])/200)
    player.note_off(int(ns[1]), int(ns[2]), int(ns[0]))
del player
pygame.midi.quit()

In [3]:
import mido

#outport = mido.open_output()
niter = 0
noteslst = []
for msg in mido.MidiFile('bjs1031c.mid').play():
    #outport.send(msg)
    #print msg.bytes()
    #print str(msg)
    thismsg = str(msg).split(" ") # ['note_on', 'channel=0', 'note=60', 'velocity=61', 'time=0.00168918958333']
    if thismsg[0] == 'note_on':
        #print thismsg
        #print thismsg[2][5:]
        noteslst.append(thismsg[1][8:]+':'+thismsg[2][5:]+':'+thismsg[3][9:]) # channel, note, velocity
    niter += 1
    if niter >= 5000:
        break
print noteslst

['2:51:80', '1:63:80', '1:63:0', '1:67:80', '1:67:0', '1:70:80', '2:51:0', '1:70:0', '1:75:80', '2:55:80', '1:75:0', '1:70:80', '2:55:0', '1:70:0', '1:72:80', '2:56:80', '1:72:0', '0:80:80', '1:84:80', '2:56:0', '0:80:0', '1:84:0', '0:79:80', '1:82:80', '2:58:80', '0:79:0', '1:82:0', '0:77:80', '1:80:80', '2:58:0', '0:77:0', '1:80:0', '0:75:80', '1:79:80', '2:46:80', '0:75:0', '1:79:0', '0:74:80', '1:77:80', '2:46:0', '0:74:0', '1:77:0', '0:75:80', '1:79:80', '2:51:80', '1:79:0', '1:63:80', '0:75:0', '1:63:0', '0:63:80', '1:67:80', '1:67:0', '1:70:80', '2:51:0', '0:63:0', '1:70:0', '0:63:80', '1:75:80', '2:55:80', '1:75:0', '1:70:80', '0:63:0', '2:55:0', '1:70:0', '0:63:80', '1:72:80', '2:56:80', '0:63:0', '1:72:0', '0:80:80', '1:84:80', '2:56:0', '0:80:0', '1:84:0', '0:79:80', '1:82:80', '2:58:80', '0:79:0', '1:82:0', '0:77:80', '1:80:80', '2:58:0', '0:77:0', '1:80:0', '0:75:80', '1:79:80', '2:46:80', '0:75:0', '1:79:0', '0:74:80', '1:77:80', '2:46:0', '0:74:0', '1:77:0', '0:75:80', '

In [7]:
# Byte Pair Encoding BPE - Instead of merging frequent pairs of bytes, we merge characters or character sequences.
# Frequent character n-grams (or whole words) are eventually merged into a single symbol
import re, collections

def wordsep(word): # returns a word split by ' ' with delimiter </w> as required by BPE
    return ' '.join(list(word)) + ' </w>'

def inp_rep(words): # turns a list of (word-string, frequency) tuples into a BPE-input dict
    res = {}
    for w in words:
        res[wordsep(w[0])] = w[1]
    return res

def get_stats(vocab):
    pairs = collections.defaultdict(int) # dict subclass - initialize the symbol vocabulary with the character vocabulary
    for word, freq in vocab.items(): # items: return the list with all dictionary keys with values
        symbols = word.split()
        #symbols = list(word) # this works without spaces in between
        for i in range(len(symbols)-1):
            pairs[symbols[i],symbols[i+1]] += freq # iteratively count all symbol pairs
    return pairs

def merge_vocab(pair, v_in): # replace each occurrence of the most frequent pair (‘A’, ‘B’) with a new symbol ‘AB’
    v_out = {}
    bigram = re.escape(' '.join(pair)) # Escape all the characters in pattern except ASCII letters and numbers
    p = re.compile(r'(?<!\S)' + bigram + r'(?!\S)')
    for word in v_in:
        w_out = p.sub(''.join(pair), word) # used to replace substrings
        v_out[w_out] = v_in[word]
    return v_out

if __name__=='__main__':
    # represent each word as a sequence of characters, plus a special end-ofword symbol ‘·’, which allows us to restore the
    # original tokenization after translation
    #vocab = {'l o w </w>' : 5, 'l o w e r </w>' : 2,'n e w e s t </w>':6, 'w i d e s t </w>':3}
    #vocab = {'t h i s </w>' : 5, 'i s </w>' : 6,'a </w>':6, 't e s t </w>':3}
    #vocab = {'low </w>' : 5, 'lower </w>' : 2,'newest </w>':6, 'widest </w>':3}
    vocab = inp_rep([('low', 5), ('lower', 2), ('newest', 6), ('widest', 3)])
    num_merges = 10
    for i in range(num_merges):
        pairs = get_stats(vocab)
        if len(pairs) > 1:
            best = max(pairs, key=pairs.get) # get returns a value for the given key - find most frequent pair
            vocab = merge_vocab(best, vocab) # replace each occurrence of the most frequent pair (‘A’, ‘B’) with a new symbol ‘AB’
            print('best: %s : %s' % (best, pairs[best[0], best[1]]))

best: ('e', 's') : 9
best: ('es', 't') : 9
best: ('est', '</w>') : 9
best: ('l', 'o') : 7
best: ('lo', 'w') : 7
best: ('n', 'e') : 6
best: ('ne', 'w') : 6
best: ('new', 'est</w>') : 6
best: ('low', '</w>') : 5
best: ('w', 'i') : 3


In [8]:
import collections

def wordsep(word):
    return ' '.join(list(word)) + ' </w>'

def inp_rep(words):
    res = {}
    for w in words:
        res[wordsep(w[0])] = w[1]
    return res

if __name__=='__main__':
    s = 'mississippi'
    d = collections.defaultdict(int)
    for k in s:
        d[k] += 1
    print(d.items()) # return the list with all dictionary keys with values
    print(s.split())
    print(wordsep('test'))
    wl = [('test', 5), ('some', 2), ('one', 8), ('mississippi', 2)]
    print(inp_rep(wl))

dict_items([('m', 1), ('i', 4), ('s', 4), ('p', 2)])
['mississippi']
t e s t </w>
{'t e s t </w>': 5, 's o m e </w>': 2, 'o n e </w>': 8, 'm i s s i s s i p p i </w>': 2}


In [9]:
# Arithmetic-geometric mean and pi
import math

def agm(a,g,n):
    a0 = a
    g0 = g
    for i in range(n):
        an = (a0+g0)/2.
        gn = math.sqrt(a0*g0)
        a0 = an
        g0 = gn
    return (an,gn)

def piagm(a,g,n):
    a0 = a
    g0 = g
    agsum = 0.
    twop = 2.
    for i in range(1,n):
        an = (a0+g0)/2.
        gn = math.sqrt(a0*g0)
        twop *= 2.
        agsum += twop * (an*an - gn*gn)
        a0 = an
        g0 = gn
    return 4. * (agm(1., 1./math.sqrt(2.), 100)[0] ** 2) / (1. - agsum)

if __name__=='__main__':
    #print(agm(1., 1./math.sqrt(2.), 100))
    print(piagm(1., 1./math.sqrt(2.), 10))

3.1415926535913457


In [15]:
# simple feed forward network with ReLU example
import numpy as np

if __name__=='__main__':
    # parameters
    inp_size = 10 # input size
    etha = 0.1 # learning rate

    # input
    x = np.zeros((1, inp_size)) # input
    x = [[0., 1., 0., 0., 0., 0., 0., 0., 0., 0.]]

    # model parameters
    W1 = np.random.randn(inp_size, inp_size)*0.01 # input to hidden
    W2 = np.random.randn(inp_size, inp_size)*0.01 # hidden to output
    b1 = np.zeros((1, inp_size)) # inp-hidden bias
    b2 = np.zeros((1, inp_size)) # hidden-out bias

    # forward pass
    h1 = np.dot(x, W1) + b1
    h1 = np.maximum(h1, 0, h1) # ReLU
    o2 = np.dot(h1, W2) + b2
    print(o2)

    # backward pass
    y = [[0., 1., 0., 0., 0., 0., 0., 0., 0., 0.]]
    h1 = np.dot(x, W1) + b1
    dW1 = - etha * (o2 - y) * np.maximum(h1, 0, h1)
    dW2 = dW1 * ((h1 > 0) * 1.) * x
    print(dW1)
    print(dW2)

[[-6.21742849e-05  2.40038508e-04  2.40678251e-04  8.47253735e-05
  -1.28055799e-05  4.23046013e-04  1.38969143e-04 -5.51629916e-05
   2.90629974e-04 -3.72688084e-04]]
[[ 3.01755738e-08  1.65382648e-05 -0.00000000e+00 -1.44909836e-07
   2.46414218e-08 -9.02442797e-08 -2.06793148e-07  0.00000000e+00
  -0.00000000e+00  1.96541329e-07]]
[[ 0.00000000e+00  1.65382648e-05 -0.00000000e+00 -0.00000000e+00
   0.00000000e+00 -0.00000000e+00 -0.00000000e+00  0.00000000e+00
  -0.00000000e+00  0.00000000e+00]]


In [9]:
# simple feed forward network with ReLU example
import numpy as np

if __name__=='__main__':
    # parameters
    inp_size = 10 # input size
    etha = 0.1 # learning rate
    nruns = 20 # n training runs

    # model parameters
    W1 = np.random.randn(inp_size, inp_size)*0.01 # input to hidden
    W2 = np.random.randn(inp_size, inp_size)*0.01 # hidden to output
    b1 = np.zeros((1, inp_size)) # inp-hidden bias
    b2 = np.zeros((1, inp_size)) # hidden-out bias
    
    for k in range(10):
        # input
        x = np.zeros((1, inp_size)) # input
        x[0][k] = 1.

        for i in range(nruns):
            # forward pass
            h1 = np.dot(x, W1) + b1
            h1 = np.maximum(h1, 0, h1) # ReLU
            o2 = np.dot(h1, W2) + b2
            #print(o2)

            # backward pass
            y = [[0., 1., 0., 0., 0., 0., 0., 0., 0., 0.]]
            h1 = np.dot(x, W1) + b1
            dW1 = - etha * (o2 - y) * np.maximum(h1, 0, h1)
            dW2 = dW1 * ((h1 > 0) * 1.) * x
            #print(dW1)
            #print(dW2)
            W1 += dW1
            W2 += dW2
    
    #print(W1)
    #print(W2)
    # forward pass
    x = np.zeros((1, inp_size)) # input
    x[0][1] = 1.
    h1 = np.dot(x, W1) + b1
    h1 = np.maximum(h1, 0, h1) # ReLU
    o2 = np.dot(h1, W2) + b2
    print(o2)

[[ 0.08017331  1.00081823  0.0066398  -0.02038512  0.02915765  0.02587995
  -0.00555739  0.04111021 -0.04911599 -0.07523451]]
