In [1]:
import torch
import torch.nn.functional as F
from torch import nn
import matplotlib.pyplot as plt # for making figures
import plotly.graph_objects as go

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [3]:
import requests

# URL of the text file
url = "https://cs.stanford.edu/people/karpathy/char-rnn/shakespear.txt"

# Fetch the content of the text file
response = requests.get(url)

# Check if the request was successful (status code 200)
if response.status_code == 200:
    # Extract the text content
    text_content = response.text

    # Now you can process the text content as needed
    print(text_content[:56])  # Print the first 500 characters as an example
else:
    # Print an error message if the request failed
    print("Failed to fetch the content. Status code:", response.status_code)

That, poor contempt, or claim'd thou slept so faithful,



In [4]:
# Create a list to store all characters in sequential order
all_chars = []

# Iterate over each character in the text content
for char in text_content:
    # Check if the character is not a newline character
    if char != '\n':
        # Append the character to the list
        all_chars.append(char.lower())

# Print the first few characters as an example
print(all_chars[:50])  # Printing the first 50 characters


['t', 'h', 'a', 't', ',', ' ', 'p', 'o', 'o', 'r', ' ', 'c', 'o', 'n', 't', 'e', 'm', 'p', 't', ',', ' ', 'o', 'r', ' ', 'c', 'l', 'a', 'i', 'm', "'", 'd', ' ', 't', 'h', 'o', 'u', ' ', 's', 'l', 'e', 'p', 't', ' ', 's', 'o', ' ', 'f', 'a', 'i', 't']


In [5]:
unique_chars = sorted(set(all_chars))

In [6]:
unique_chars

[' ',
 '!',
 "'",
 ',',
 '-',
 '.',
 ':',
 ';',
 '?',
 'a',
 'b',
 'c',
 'd',
 'e',
 'f',
 'g',
 'h',
 'i',
 'j',
 'k',
 'l',
 'm',
 'n',
 'o',
 'p',
 'q',
 'r',
 's',
 't',
 'u',
 'v',
 'w',
 'x',
 'y',
 'z']

In [7]:
# Create a dictionary to store labeled characters
labeled_chars_dict = {}

# Generate labels and assign to characters
labeled_chars_dict[0] = '_'
for i, char in enumerate(unique_chars):
    labeled_chars_dict[i + 1] = f"{char}"

print(labeled_chars_dict)


{0: '_', 1: ' ', 2: '!', 3: "'", 4: ',', 5: '-', 6: '.', 7: ':', 8: ';', 9: '?', 10: 'a', 11: 'b', 12: 'c', 13: 'd', 14: 'e', 15: 'f', 16: 'g', 17: 'h', 18: 'i', 19: 'j', 20: 'k', 21: 'l', 22: 'm', 23: 'n', 24: 'o', 25: 'p', 26: 'q', 27: 'r', 28: 's', 29: 't', 30: 'u', 31: 'v', 32: 'w', 33: 'x', 34: 'y', 35: 'z'}


In [8]:
# Create a dictionary to store the characters with labels
labeled_chars_dict_reverse = {char: label for label, char in labeled_chars_dict.items()}
# Print the dictionary
print(labeled_chars_dict_reverse)

{'_': 0, ' ': 1, '!': 2, "'": 3, ',': 4, '-': 5, '.': 6, ':': 7, ';': 8, '?': 9, 'a': 10, 'b': 11, 'c': 12, 'd': 13, 'e': 14, 'f': 15, 'g': 16, 'h': 17, 'i': 18, 'j': 19, 'k': 20, 'l': 21, 'm': 22, 'n': 23, 'o': 24, 'p': 25, 'q': 26, 'r': 27, 's': 28, 't': 29, 'u': 30, 'v': 31, 'w': 32, 'x': 33, 'y': 34, 'z': 35}


In [9]:
# Set the word length
con_len = 50
n_pred = 10
word_length = con_len

# Generate words of length 25 by concatenating characters
words_of_length_25 = ["".join(all_chars[i:i+word_length]) for i in range(0, len(all_chars), word_length)]

# Print the first few words as an example
print(words_of_length_25[:10])  # Printing the first 10 words
print(len(words_of_length_25))


["that, poor contempt, or claim'd thou slept so fait", 'hful,i may contrive our father; and, in their defe', 'ated queen,her flesh broke me and puttance of expe', 'dition house,and in that same that ever i lament t', 'his stomach,and he, nor butly and my fury, knowing', ' everythinggrew daily ever, his great strength and', ' thoughtthe bright buds of mine own.biondello:marr', "y, that it may not pray their patience.'king lear:", 'the instant common maid, as we may less bea brave ', 'gentleman and joiner: he that finds us with waxand']
1934


In [10]:
block_size = con_len # context length: how many characters do we take to predict the next one?
X, Y = [], []
for w in words_of_length_25[:100]:
  
  context = [0] * block_size
  for ch in w + '.':
    ix = labeled_chars_dict_reverse[ch]
#     ix is char number
    X.append(context)
    Y.append(ix)
#     X looks somethig like this :[[0, 0, 0, 0, 0], [0, 0, 0, 0, 7], [0, 0, 0, 7, 21]]
# this is Y: [0, 0, 7, 21, 12]

    print(''.join(labeled_chars_dict[i] for i in context), '--->', labeled_chars_dict[ix])
    context = context[1:] + [ix] # crop and append
    print(len(X))
#     context: [0, 0, 7, 21, 12]
  
# Move data to GPU

X = torch.tensor(X).to(device)
Y = torch.tensor(Y).to(device)

__________________________________________________ ---> t
1
_________________________________________________t ---> h
2
________________________________________________th ---> a
3
_______________________________________________tha ---> t
4
______________________________________________that ---> ,
5
_____________________________________________that, --->  
6
____________________________________________that,  ---> p
7
___________________________________________that, p ---> o
8
__________________________________________that, po ---> o
9
_________________________________________that, poo ---> r
10
________________________________________that, poor --->  
11
_______________________________________that, poor  ---> c
12
______________________________________that, poor c ---> o
13
_____________________________________that, poor co ---> n
14
____________________________________that, poor con ---> t
15
___________________________________that, poor cont ---> e
16
_________________________________

In [19]:
class NextChar(nn.Module):
  def __init__(self, block_size, vocab_size, emb_dim, hidden_size):
    super().__init__()
    self.emb = nn.Embedding(vocab_size, emb_dim)
    self.lin1 = nn.Linear(block_size * emb_dim, hidden_size)
    self.lin2 = nn.Linear(hidden_size, int(hidden_size/2))
    self.lin3 = nn.Linear(int(hidden_size/2), vocab_size)

  def forward(self, x):
    x = self.emb(x)
    x = x.view(x.shape[0], -1)
    x = torch.sin(self.lin1(x))
    x = self.lin2(x)
    return x
    

In [20]:
emb_dim = 2
model = NextChar(block_size, len(labeled_chars_dict_reverse), emb_dim, 256).to(device)


In [26]:
def plot_emb(emb, itos):
    fig = go.Figure()
    for i in range(len(itos)):
        x, y = emb.weight[i].detach().cpu().numpy()
        fig.add_trace(go.Scatter(x=[x], y=[y], mode='markers', marker=dict(color='black'), name=itos[i]))
        fig.add_annotation(x=x + 0.05, y=y + 0.05, text=itos[i], showarrow=False)
    fig.update_layout(title='Embedding Visualization',
                      xaxis_title='Dimension 1',
                      yaxis_title='Dimension 2', height=1000, width = 1000)
    fig.show()

plot_emb(model.emb, labeled_chars_dict)

In [14]:
# Train the model

loss_fn = nn.CrossEntropyLoss()
opt = torch.optim.AdamW(model.parameters(), lr=0.01)
import time
# Mini-batch training
batch_size = 4096
print_every = 1000
elapsed_time = []
for epoch in range(50000):
    start_time = time.time()
    for i in range(0, X.shape[0], batch_size):
        x = X[i:i+batch_size]
        y = Y[i:i+batch_size]
        y_pred = model(x)
        loss = loss_fn(y_pred, y)
        loss.backward()
        opt.step()
        opt.zero_grad()
    end_time = time.time()
    elapsed_time.append(end_time - start_time)
    if epoch % print_every == 0:
        print(epoch, loss.item())

0 3.799614429473877
1000 0.07265236973762512
2000 0.07097605615854263
3000 0.07094182819128036


KeyboardInterrupt: 

In [17]:
def plot_emb(emb, itos):
    fig = go.Figure()
    for i in range(len(itos)):
        x, y = emb.weight[i].detach().cpu().numpy()
        fig.add_trace(go.Scatter(x=[x], y=[y], mode='markers', marker=dict(color='black'), name=itos[i]))
        fig.add_annotation(x=x + 0.05, y=y + 0.05, text=itos[i], showarrow=False)
    fig.update_layout(title='Embedding Visualization',
                      xaxis_title='Dimension 1',
                      yaxis_title='Dimension 2',height=1000, width = 1000)
    fig.show()

plot_emb(model.emb, labeled_chars_dict)

In [17]:
def create_context(word, labeled_chars_dict_reverse, max_context_length= con_len):
    # Initialize the context list with zeros
    context = [0] * max_context_length
    
    # Extract the numerical labels corresponding to the characters in the word
    labels = [labeled_chars_dict_reverse[char] for char in word]
    
    # Calculate the starting index for assigning labels in the context list
    start_index = max_context_length - len(labels)
    
    # Assign the labels to the last word-length elements of the context list
    context[start_index:] = labels
    
    return context

def generate_name(word, model, labeled_chars_dict_reverse, labeled_chars_dict, block_size, max_len=n_pred):
    context = create_context(word, labeled_chars_dict_reverse, con_len)
    name = ''
    for i in range(max_len):
        x = torch.tensor(context).view(1, -1).to(device)
        y_pred = model(x)
        ix = torch.distributions.categorical.Categorical(logits=y_pred).sample().item()
        ch = labeled_chars_dict[ix]
        if ch == '_':
            break
        name += ch
        context = context[1:] + [ix]
    return name

print(generate_name("come to hope", model, labeled_chars_dict_reverse, labeled_chars_dict, block_size))

ace tr:e h


In [18]:
# Assuming your model is named "model"
model_path = 'E:\ML\Assignment3\model.pth'
torch.save(model.state_dict(), model_path)

In [13]:
import pickle

# Assuming `model` is your trained PyTorch model
model_path = r'E:\ML\Assignment3\num_char.pkl'

# Save the model
with open(model_path, 'wb') as f:
    pickle.dump(labeled_chars_dict, f)