### Text Corpus

In [2]:
vocabulary = corpus = 'the cat meows and a dog barks'.split()

vocabulary

['the', 'cat', 'meows', 'and', 'a', 'dog', 'barks']

### Create Embedding Matrix

In [3]:
E = pd.Series(np.ones(len(vocabulary)))
E.index = vocabulary

E

the      1.0
cat      1.0
meows    1.0
and      1.0
a        1.0
dog      1.0
barks    1.0
dtype: float64

# Skipgram Forward Pass

In [9]:
center_idx, valid_idx, corrupt_idx = 1, 2, 5

E.index[center_idx], E.index[valid_idx], E.index[corrupt_idx]

('cat', 'meows', 'dog')

### Lookup Emebddings

In [441]:
center, valid, corrupt = E[center_idx], E[valid_idx], E[corrupt_idx]

center, valid, corrupt

(1.0, 1.0, 1.0)

### Compute Scores

In [442]:
valid_score, corrupt_score = center*valid, center*corrupt

valid_score, corrupt_score

(1.0, 1.0)

### Compute Performance

In [443]:
performance = valid_score - corrupt_score

performance

0.0

# Skipgram Backward Pass

In [444]:
dperf = 1

dpdvs, dpdcs = 1, -1
dvs, dcs = dpdvs*dperf, dpdcs*dperf

dvsdvalid, dvsdcenter, dcsdcenter, dcsdcorrupt = center, valid, corrupt, center
dcenter, dvalid, dcorrupt = dvsdcenter*dvs + dcsdcenter*dcs, dvsdvalid*dvs, dcsdcorrupt*dcs

dvaliddE, dcenterdE, dcorruptdE = np.zeros_like(E), np.zeros_like(E), np.zeros_like(E)
dvaliddE[valid_idx] = dcenterdE[center_idx] = dcorruptdE[corrupt_idx] = 1

dE = dvaliddE*dvalid + dcenterdE*dcenter + dcorruptdE*dcorrupt

dE

array([ 0.,  0.,  1.,  0.,  0., -1.,  0.])

### Define Entire Skipgram Model

In [445]:
def skipgram_update(E, center_idx, valid_idx, corrupt_idx):
    
    # Forward pass
    center, valid, corrupt = E[center_idx], E[valid_idx], E[corrupt_idx]
    valid_score, corrupt_score = center*valid, center*corrupt
    performance = valid_score - corrupt_score
    
    # Backward pass
    dperf = 1

    dpdvs, dpdcs = 1, -1
    dvs, dcs = dpdvs*dperf, dpdcs*dperf

    dvsdvalid, dvsdcenter, dcsdcenter, dcsdcorrupt = center, valid, corrupt, center
    dcenter, dvalid, dcorrupt = dvsdcenter*dvs + dcsdcenter*dcs, dvsdvalid*dvs, dcsdcorrupt*dcs

    dvaliddE, dcenterdE, dcorruptdE = np.zeros_like(E), np.zeros_like(E), np.zeros_like(E)
    dvaliddE[valid_idx] = dcenterdE[center_idx] = dcorruptdE[corrupt_idx] = 1

    dE = dvaliddE*dvalid + dcenterdE*dcenter + dcorruptdE*dcorrupt

    return dE

### Skipgram Pass Generator

In [649]:
alpha = .05

def update(E):
    while True:
        # Sample center, valid, and corrupt idx
        center_idx = np.random.randint(1, 5)
        offset = np.random.choice([-1, 1])
        other_offset = offset*-1
        corrupt_idx = np.random.choice(list(set(range(len(E))) - set([center_idx, offset, other_offset])))
        
        # Compute embedding gradients
        dE = skipgram_update(E, center_idx=1, valid_idx=center_idx+offset, corrupt_idx=corrupt_idx)
        E += alpha*dE # update embeddings

        yield E

### Perform Skipgram Updates

In [670]:
next(update(E))

2 3 6


the      -5.182951
cat      28.165448
meows    10.679679
and      14.371206
a        -6.180290
dog      -4.843158
barks   -10.569050
dtype: float64