In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import matplotlib.pyplot as plt

In [4]:
with open('/Users/deepaksharma/Documents/Python/Kaggle/GenerateKanyeLyrics/Kanye West Lyrics.txt','r',encoding='utf-8') as f:
    text = f.read()

In [6]:
len(text)

353441

In [9]:
chars = sorted(list(set(text)))

In [10]:
len(chars)

101

In [31]:
stoi = {ch:i for i,ch in enumerate(chars)}

In [32]:
itos = {i:ch for i,ch in enumerate(chars)}

In [33]:
encode = lambda s: [stoi[c] for c in s]
decode = lambda l: ''.join([itos[c] for c in l])

In [36]:
a = encode("Hi guys I am Aman")
a

[36, 65, 1, 63, 77, 81, 75, 1, 37, 1, 57, 69, 1, 29, 69, 57, 70]

In [37]:
decode(a)

'Hi guys I am Aman'

In [44]:
data = torch.tensor(encode(text), dtype=torch.long)
print(data.shape, data.type)

torch.Size([353441]) <built-in method type of Tensor object at 0x7fa9c0997c20>


In [55]:
n = int(0.9*len(text))
train_data = data[:n]
val_data = data[n:]
block_size = 8
batch_size = 32
print("A single block is: ",train_data[:block_size])

A single block is:  tensor([100,  55,  31,  64,  71,  74,  77,  75])


In [56]:
x = train_data[:block_size]
y = train_data[1:block_size+1]

In [57]:
for t in range(block_size):
    context = x[:t+1]
    target = y[t]
    print("Context: ", context, " Target: ", target)

ix = torch.randint(len(train_data)-block_size,(batch_size,))


Context:  tensor([100])  Target:  tensor(55)
Context:  tensor([100,  55])  Target:  tensor(31)
Context:  tensor([100,  55,  31])  Target:  tensor(64)
Context:  tensor([100,  55,  31,  64])  Target:  tensor(71)
Context:  tensor([100,  55,  31,  64,  71])  Target:  tensor(74)
Context:  tensor([100,  55,  31,  64,  71,  74])  Target:  tensor(77)
Context:  tensor([100,  55,  31,  64,  71,  74,  77])  Target:  tensor(75)
Context:  tensor([100,  55,  31,  64,  71,  74,  77,  75])  Target:  tensor(56)


In [58]:
x = torch.stack([train_data[i:i+8] for i in ix])
y = torch.stack([train_data[i+1:i+9] for i in ix])

In [158]:
def get_batch(split):
    if split == 'train':
        data = train_data
    elif split == 'val':
        data = val_data
    else:
        raise ValueError("Invalid split")
  
    ix = torch.randint(len(data)-block_size,(batch_size,))
    x = torch.stack([data[i:i+block_size] for i in ix])
    y = torch.stack([data[i+1:i+block_size+1] for i in ix])
    return x, y

In [159]:
# hyperparameters
batch_size = 16 # how many independent sequences will we process in parallel?
block_size = 64 # what is the maximum context length for predictions?
max_iters = 5000
eval_interval = 100
learning_rate = 1e-3
device = 'cuda' if torch.cuda.is_available() else 'cpu'
eval_iters = 200
n_embd = 128
n_head = 8
n_layer = 4
dropout = 0.0
vocab = len(chars)
# ------------

In [160]:
class Head(nn.Module):
    def __init__(self, head_size):
        super(Head,self).__init__()
        self.head_size = head_size
        self.dropout = nn.Dropout(dropout)
        self.key = nn.Linear(n_embd, head_size, bias=False)
        self.query = nn.Linear(n_embd, head_size, bias=False)
        self.value = nn.Linear(n_embd, head_size, bias=False)
        self.register_buffer('tril', torch.tril(torch.ones(block_size, block_size)))
    def forward(self,x):
        k = self.key(x)
        q = self.query(x)
        wei = q @ k.transpose(-2,-1) * (self.head_size ** -0.5)
        wei = wei.masked_fill(self.tril == 0, float('-inf'))
        wei = F.softmax(wei, dim=-1)
        wei = self.dropout(wei)
        v = self.value(x)
        out = wei @ v
        return out
        

In [161]:
class MultiHeadAttention(nn.Module):
    def __init__(self, n_head, head_size):
        super(MultiHeadAttention,self).__init__()
        self.head_size = head_size
        self.n_head = n_head
        self.heads = nn.ModuleList([Head(head_size) for _ in range(n_head)])
        self.out = nn.Linear(n_embd, n_embd)
        self.dropout = nn.Dropout(dropout)
    
    def forward(self,x):
        out = torch.cat([h(x) for h in self.heads], dim=-1)
        out = self.out(out)
        out = self.dropout(out)
        return out


In [162]:
class FeedForwardLayer(nn.Module):
    def __init__(self, n_embd):
        super(FeedForwardLayer, self).__init__()
        self.n_embd = n_embd
        self.fc1 = nn.Linear(n_embd, 4*n_embd)
        self.fc2 = nn.Linear(4*n_embd,n_embd)
        self.dropout = nn.Dropout(dropout)
    
    def forward(self, x):
        out = self.fc1(x)
        out = F.gelu(out)
        out = self.fc2(out)
        out = self.dropout(out)
        return out

In [163]:
class Block(nn.Module):
    def __init__(self):
        super(Block, self).__init__()
        self.attn = MultiHeadAttention(n_head, n_embd // n_head)
        self.ff = FeedForwardLayer(n_embd)
        self.ln1 = nn.LayerNorm(n_embd)
        self.ln2 = nn.LayerNorm(n_embd)
    def forward(self,x):
        x = x + self.attn(self.ln1(x))
        x = x + self.ff(self.ln2(x))
        return x
        

In [164]:
class Transformer(nn.Module):
    def __init__(self, n_embd, n_layer):
        super(Transformer, self).__init__()
        self.n_embd = n_embd
        self.n_layer = n_layer
        self.token_embedding = nn.Embedding(vocab, n_embd)
        self.position_embedding = nn.Embedding(block_size,n_embd)
        self.blocks = nn.Sequential(*[Block() for _ in range(n_layer)])
        self.ln_f = nn.LayerNorm(n_embd) # final layer norm
        self.ffwd = nn.Linear(n_embd, vocab)
    
    def forward(self, idx, targets=None):
        B,T = idx.shape
        x = self.token_embedding(idx) + self.position_embedding(torch.arange(T, device=idx.device))
        x = self.blocks(x)
        x = self.ln_f(x)
        logits = self.ffwd(x)
        if targets is None:
            loss = None
        else:
            B,T,C = logits.shape
            logits = logits.view(B*T, C)
            targets = targets.view(B*T)
            loss = F.cross_entropy(logits, targets, ignore_index=0)
        return logits,loss
    
    def generate(self, idx, max_tokens):
        for _ in range(max_tokens):
            idx_cond = idx[:, -block_size:]
            logits, _ = self(idx_cond)
            logits = logits[:,-1,:]
            probs = F.softmax(logits, dim=-1)
            idx_next = torch.multinomial(probs, num_samples=1)
            idx = torch.cat([idx, idx_next], dim=-1)
        return idx

In [165]:
model = Transformer(n_embd,n_layer)

print("Total params: ", sum(p.numel() for p in model.parameters()))

optimizer = torch.optim.AdamW(model.parameters(), lr=1e-3)

for steps in range(100000):
    x,y = get_batch('train')
    logits, loss = model(x, y)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    if steps % 100 == 0:
        print("Step: ", steps, " Loss: ", loss.item())

Total params:  825957
Step:  0  Loss:  4.844474792480469
Step:  100  Loss:  2.5599639415740967
Step:  200  Loss:  2.3981499671936035
Step:  300  Loss:  2.25873064994812
Step:  400  Loss:  1.9892122745513916
Step:  500  Loss:  2.051386833190918
Step:  600  Loss:  1.9710615873336792
Step:  700  Loss:  1.8869479894638062
Step:  800  Loss:  1.7240028381347656
Step:  900  Loss:  1.9278476238250732
Step:  1000  Loss:  1.7470016479492188
Step:  1100  Loss:  1.6099830865859985
Step:  1200  Loss:  1.729011058807373
Step:  1300  Loss:  1.7054438591003418
Step:  1400  Loss:  1.6144025325775146
Step:  1500  Loss:  1.6527540683746338
Step:  1600  Loss:  1.4751813411712646
Step:  1700  Loss:  1.5877550840377808
Step:  1800  Loss:  1.4670706987380981
Step:  1900  Loss:  1.4576531648635864
Step:  2000  Loss:  1.211912989616394
Step:  2100  Loss:  1.4431787729263306
Step:  2200  Loss:  1.260366439819336
Step:  2300  Loss:  1.3404265642166138
Step:  2400  Loss:  1.2303211688995361
Step:  2500  Loss:  1.

KeyboardInterrupt: 

In [167]:
# generate from the model
context = torch.zeros((1, 64), dtype=torch.long, device=device)
print(decode(model.generate(context, max_tokens=1000)[0].tolist()))

































































[Intro: T-O-O-O-Cof-G-Good-do-do, you know just wo's ben and-boloss on my books, huh? Beslaquazy, don't you adopt all that coone off them niggas on that call that rode or the tabley in my wrestlessmas, then prolls of hus an and, huh? hurts and that's what we'll yell live”zue what we left You and years old when you and I was to blame?! Now, I start to get shit like your brothers Rae and Kanye West & Simps & Elly Jackson & Rare Earth]rade Amerient is like a wretting more than Old Taco]lamps told it powers enough, arm faction, I'll fit so heard a fact, get barget up your skull you up, ain't no question if I want it, I need it again6L At in the wint, what we at a all about the bottlegge respect the facters to convertlinaterlur hell on 'em prey 'til deaths, get lights in dealer gave up!" I know that God's perfect? Trumpeon mm, I through I couldn't work the locks night? "Where the Wacks in the club?" Nobody Caughtions (Yeah!) Bu

In [168]:
# Print model's state_dict
print("Model's state_dict:")
for param_tensor in model.state_dict():
    print(param_tensor, "\t", model.state_dict()[param_tensor].size())

Model's state_dict:
token_embedding.weight 	 torch.Size([101, 128])
position_embedding.weight 	 torch.Size([64, 128])
blocks.0.attn.heads.0.tril 	 torch.Size([64, 64])
blocks.0.attn.heads.0.key.weight 	 torch.Size([16, 128])
blocks.0.attn.heads.0.query.weight 	 torch.Size([16, 128])
blocks.0.attn.heads.0.value.weight 	 torch.Size([16, 128])
blocks.0.attn.heads.1.tril 	 torch.Size([64, 64])
blocks.0.attn.heads.1.key.weight 	 torch.Size([16, 128])
blocks.0.attn.heads.1.query.weight 	 torch.Size([16, 128])
blocks.0.attn.heads.1.value.weight 	 torch.Size([16, 128])
blocks.0.attn.heads.2.tril 	 torch.Size([64, 64])
blocks.0.attn.heads.2.key.weight 	 torch.Size([16, 128])
blocks.0.attn.heads.2.query.weight 	 torch.Size([16, 128])
blocks.0.attn.heads.2.value.weight 	 torch.Size([16, 128])
blocks.0.attn.heads.3.tril 	 torch.Size([64, 64])
blocks.0.attn.heads.3.key.weight 	 torch.Size([16, 128])
blocks.0.attn.heads.3.query.weight 	 torch.Size([16, 128])
blocks.0.attn.heads.3.value.weight 	 torc

In [169]:
# Print optimizer's state_dict
print("Optimizer's state_dict:")
for var_name in optimizer.state_dict():
    print(var_name, "\t", optimizer.state_dict()[var_name])

Optimizer's state_dict:
state 	 {0: {'step': tensor(42173.), 'exp_avg': tensor([[-1.1370e-04,  4.6778e-04, -1.5702e-04,  ..., -4.8435e-04,
         -4.1983e-05,  2.6924e-04],
        [ 9.3783e-05,  2.1704e-04,  3.5789e-04,  ..., -1.6380e-04,
          3.8638e-04, -4.8070e-04],
        [ 1.8466e-04,  1.2476e-04, -1.4167e-04,  ...,  9.4115e-06,
         -8.3858e-05, -4.2905e-06],
        ...,
        [ 1.1630e-06, -2.6407e-06, -6.3855e-06,  ...,  3.1694e-06,
         -1.2078e-05, -4.7072e-06],
        [ 1.8812e-11, -9.8351e-13,  2.6944e-11,  ...,  2.6177e-11,
          1.3326e-11, -1.8232e-11],
        [ 5.6052e-45, -5.6052e-45, -5.6052e-45,  ..., -5.6052e-45,
          5.6052e-45,  5.6052e-45]]), 'exp_avg_sq': tensor([[5.0371e-06, 9.7723e-07, 2.3480e-06,  ..., 3.1967e-06, 1.0881e-06,
         1.2911e-06],
        [2.2282e-05, 5.1047e-06, 1.0380e-05,  ..., 1.7326e-05, 5.1748e-06,
         6.2151e-06],
        [1.8350e-07, 3.3358e-08, 5.9241e-08,  ..., 6.2213e-08, 2.6765e-08,
         3.2

In [170]:
torch.save(model.state_dict(), 'model_weights.pth')

In [171]:
stoi

{'\n': 0,
 ' ': 1,
 '!': 2,
 '"': 3,
 '#': 4,
 '$': 5,
 '&': 6,
 "'": 7,
 '(': 8,
 ')': 9,
 '*': 10,
 '+': 11,
 ',': 12,
 '-': 13,
 '.': 14,
 '/': 15,
 '0': 16,
 '1': 17,
 '2': 18,
 '3': 19,
 '4': 20,
 '5': 21,
 '6': 22,
 '7': 23,
 '8': 24,
 '9': 25,
 ':': 26,
 ';': 27,
 '?': 28,
 'A': 29,
 'B': 30,
 'C': 31,
 'D': 32,
 'E': 33,
 'F': 34,
 'G': 35,
 'H': 36,
 'I': 37,
 'J': 38,
 'K': 39,
 'L': 40,
 'M': 41,
 'N': 42,
 'O': 43,
 'P': 44,
 'Q': 45,
 'R': 46,
 'S': 47,
 'T': 48,
 'U': 49,
 'V': 50,
 'W': 51,
 'X': 52,
 'Y': 53,
 'Z': 54,
 '[': 55,
 ']': 56,
 'a': 57,
 'b': 58,
 'c': 59,
 'd': 60,
 'e': 61,
 'f': 62,
 'g': 63,
 'h': 64,
 'i': 65,
 'j': 66,
 'k': 67,
 'l': 68,
 'm': 69,
 'n': 70,
 'o': 71,
 'p': 72,
 'q': 73,
 'r': 74,
 's': 75,
 't': 76,
 'u': 77,
 'v': 78,
 'w': 79,
 'x': 80,
 'y': 81,
 'z': 82,
 'Á': 83,
 'á': 84,
 'é': 85,
 'ñ': 86,
 'ó': 87,
 'ö': 88,
 'ú': 89,
 'ā': 90,
 'Ő': 91,
 '–': 92,
 '—': 93,
 '‘': 94,
 '’': 95,
 '“': 96,
 '”': 97,
 '…': 98,
 '\u2060': 99,
 '\u

In [176]:
lyrics = encode("Bitch I am back on my comma , sipping on my CocaCola, driving on a hangover ")
lyrics = torch.tensor(lyrics, dtype=torch.long)
lyrics = torch.stack([lyrics for _ in range(1)], dim=0)
lyrics.shape

torch.Size([1, 76])

In [177]:
print(decode(model.generate(lyrics, max_tokens=1000)[0].tolist()))

Bitch I am back on my comma , sipping on my CocaCola, driving on a hangover to way too his to the Guck way to hand and be still paidIck thangs, I'm possace, I'm down for niggas more than the ameradding me and don't give a Folk some Sexuning (To go Mr. Bewn!) zBut by bad daddy shating every right you should have seen, and some out of your blood-beliess Beonecomes some puble focking and loved it take it to be home, baby! We outta here, baby!" (Now, I don't know what it is with females dise (The grawhed up this Bevidenz] the Lord) Curce! You!) N-n-n-n-n-now we the lights in here, baby! We in turn the wlothe time top 'head, rock coldest with uh, popped store that bite so much should never hurt this bad we than you motherfuckers really remind to Holling light when you let me down girl, if you got spirit at least let breath you decide on the ladies of that Bluestarded for everyonce that people let it be while merieder and you, but what the hell do I know? you see me now right now (You see me

In [220]:
def generate_kanye_lyrics(text):
    if len(text)<64:
        initial_text = ""
        padding = 64-len(text)
        initial_list = []
        for i in range(0, padding):
            initial_list.append(0)
        context = initial_list + encode(text)
    else:
        padding = 0
        initial_text = text[0:len(text)-block_size]
        context = text[-block_size:]
        context = encode(context)
    context = torch.tensor(context, dtype=torch.long)
    lyrics = torch.stack([context for _ in range(1)], dim=0)
    return initial_text + decode(model.generate(lyrics, max_tokens=1000)[0].tolist())[padding:]

In [221]:
final_lyric_1 = generate_kanye_lyrics("Bitch don't kill my vibe!, I can feel your energy from two places away. I got my music and my ")

In [222]:
final_lyric

'Bitch don\'t kill my vibe!, I can feel your energy from two places away. I got my music and my shoes good doctor seen try to me that itton your knee shit, that\'s the monster shoes went po\'bogran my dick from Kanye West] about to get burs for years like Marvin Granene, this is what you live this good, then I felt lights kinda crowdaired the good life, got be kick up on the yardy got Kanye fresheds of the phony? Eyesty sectives, I was just how my head and say should go and news and came out a buller of the lord, and jokes (Ow!) Q-now we p-ferettestice better (better we than you), just to see if I couldn\'t afford a couch by promised to you!" I\'ma.i. (A nigga when I\'m workin\' on my hopped New Wave is it, casservive (Black), ever! Twistake, watch out side niggas that all my niggas get it good for the ashtraute with the amazil so head, ass up and that manny grands a loose and ctimes are forever (forever, forever, forever) (free" (No more Louis Vellas gied up the Chicago skylines, this

In [223]:
final_lyric_2 = generate_kanye_lyrics("She says she wants some Marvin Gayes ")

In [224]:
final_lyric_2

'She says she wants some Marvin Gayes Choir, Natalie Grammha. .... Wholler] Get Bugardles Kefespecials we gendon\'t wanna to go (Down) your know what some really that you love me when you latest? it\'s back to be playin\' coup think too mean?" I supposed to do now? Hmm, I\'m saying something in the boss uplights, never got bloooooss to be assholes we become look good control by these wrongs turn to work, you call now" you done at of puff $ Kongs & Simps & Elly Jackson] to Rihan day girl friends your will been tryna bars - (well out, motherfuckers better do your job and roll ups in the bathroom fuckin\' like stay don\'t need as far as my new what we houstline: Kanya West] minds, let \'em go on, sock it down, the boy, you gotta fade, sort where the houstory, is like a will behind menacin\' and say you will, couldn\'t afford about to be an everybody and bag not love me ask you for worghnt the yever give me a seen ten charmes of God who have (he you\'re \'round that\'s what Dat me!" zome M

In [225]:
!pip install gradio

