In [1]:
!pip install nltk



In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from collections import Counter
from torch.utils.data import Dataset, DataLoader
from nltk.tokenize import word_tokenize
import nltk

In [3]:
document = """A black hole is a massive, compact astronomical object so dense that its gravity prevents anything from escaping, even light. Albert Einstein's theory of general relativity predicts that a sufficiently compact mass will form a black hole. The boundary of no escape is called the event horizon. In general relativity, a black hole’s event horizon seals an object’s fate but produces no locally detectable change when crossed. In many ways, a black hole acts like an ideal black body, as it reflects no light. Quantum field theory in curved spacetime predicts that event horizons emit Hawking radiation, with the same spectrum as a black body of a temperature inversely proportional to its mass. This temperature is of the order of billionths of a kelvin for stellar black holes, making it essentially impossible to observe directly.

Objects whose gravitational fields are too strong for light to escape were first considered in the eighteenth century by John Michell and Pierre-Simon Laplace. In 1916, Karl Schwarzschild found the first modern solution of general relativity that would characterise a black hole. Due to his influential research, the Schwarzschild metric is named after him. David Finkelstein, in 1958, first published the interpretation of "black hole" as a region of space from which nothing can escape. Black holes were long considered a mathematical curiosity; it was not until the 1960s that theoretical work showed they were a generic prediction of general relativity. The first black hole known was Cygnus X-1, identified by several researchers independently in 1971.

Black holes typically form when massive stars collapse at the end of their life cycle. After a black hole has formed, it can grow by absorbing mass from its surroundings. Supermassive black holes of millions of solar masses may form by absorbing other stars and merging with other black holes, or via direct collapse of gas clouds. There is consensus that supermassive black holes exist in the centres of most galaxies.

The presence of a black hole can be inferred through its interaction with other matter and with electromagnetic radiation such as visible light. Matter falling toward a black hole can form an accretion disk of infalling plasma, heated by friction and emitting light. In extreme cases, this creates a quasar, some of the brightest objects in the universe. Stars passing too close to a supermassive black hole can be shredded into streamers that shine very brightly before being "swallowed." If other stars are orbiting a black hole, their orbits can be used to determine the black hole's mass and location. Such observations can be used to exclude possible alternatives such as neutron stars. In this way, astronomers have identified numerous stellar black hole candidates in binary systems and established that the radio source known as Sagittarius A*, at the core of the Milky Way galaxy, contains a supermassive black hole of about four point three million solar masses.
The idea of a body so big that even light could not escape was briefly proposed by English astronomical pioneer and clergyman John Michell and independently by French scientist Pierre-Simon Laplace. Both scholars proposed very large stars rather than the modern model of stars with extraordinary density.

Michell's idea, in a short part of a letter published in seventeen eighty four, calculated that a star with the same density but five hundred times the radius of the sun would not let any emitted light escape; the surface escape velocity would exceed the speed of light. Michell correctly noted that such supermassive but non-radiating bodies might be detectable through their gravitational effects on nearby visible bodies.

In seventeen ninety six, Laplace mentioned that a star could be invisible if it were sufficiently large while speculating on the origin of the Solar System in his book Exposition du Système du Monde. Franz Xaver von Zach asked Laplace for a mathematical analysis, which Laplace provided and published in a journal edited by von Zach.

Scholars of the time were initially excited by the proposal that giant but invisible 'dark stars' might be hiding in plain view, but enthusiasm dampened when the wavelike nature of light became apparent in the early nineteenth century, since light was understood as a wave rather than a particle, it was unclear what, if any, influence gravity would have on escaping light waves.

In nineteen seventy one, Hawking showed under general conditions that the total area of the event horizons of any collection of classical black holes can never decrease, even if they collide and merge. This result, now known as the second law of black hole mechanics, is remarkably similar to the second law of thermodynamics, which states that the total entropy of an isolated system can never decrease. As with classical objects at absolute zero temperature, it was assumed that black holes had zero entropy. If this were the case, the second law of thermodynamics would be violated by entropy-laden matter entering a black hole, resulting in a decrease in the total entropy of the universe. Therefore, Bekenstein proposed that a black hole should have an entropy, and that it should be proportional to its horizon area.

The link with the laws of thermodynamics was further strengthened by Hawking's discovery in nineteen seventy four that quantum field theory predicts that a black hole radiates blackbody radiation at a constant temperature. This seemingly causes a violation of the second law of black hole mechanics, since the radiation will carry away energy from the black hole causing it to shrink. The radiation also carries away entropy, and it can be proven under general assumptions that the sum of the entropy of the matter surrounding a black hole and one quarter of the area of the horizon as measured in Planck units is in fact always increasing. This allows the formulation of the first law of black hole mechanics as an analogue of the first law of thermodynamics, with the mass acting as energy, the surface gravity as temperature and the area as entropy.

One puzzling feature is that the entropy of a black hole scales with its area rather than with its volume, since entropy is normally an extensive quantity that scales linearly with the volume of the system. This odd property led Gerard 't Hooft and Leonard Susskind to propose the holographic principle, which suggests that anything that happens in a volume of spacetime can be described by data on the boundary of that volume.

Although general relativity can be used to perform a semiclassical calculation of black hole entropy, this situation is theoretically unsatisfying. In statistical mechanics, entropy is understood as counting the number of microscopic configurations of a system that have the same macroscopic qualities, such as mass, charge, pressure, etc. Without a satisfactory theory of quantum gravity, one cannot perform such a computation for black holes. Some progress has been made in various approaches to quantum gravity. In nineteen ninety five, Andrew Strominger and Cumrun Vafa showed that counting the microstates of a specific supersymmetric black hole in string theory reproduced the Bekenstein–Hawking entropy. Since then, similar results have been reported for different black holes both in string theory and in other approaches to quantum gravity like loop quantum gravity.

Because a black hole has only a few internal parameters, most of the information about the matter that went into forming the black hole is lost. Regardless of the type of matter which goes into a black hole, it appears that only information concerning the total mass, charge, and angular momentum are conserved. As long as black holes were thought to persist forever this information loss is not that problematic, as the information can be thought of as existing inside the black hole, inaccessible from the outside, but represented on the event horizon in accordance with the holographic principle. However, black holes slowly evaporate by emitting Hawking radiation. This radiation does not appear to carry any additional information about the matter that formed the black hole, meaning that this information appears to be gone forever.

The question whether information is truly lost in black holes (the black hole information paradox) has divided the theoretical physics community. In quantum mechanics, loss of information corresponds to the violation of a property called unitarity, and it has been argued that loss of unitarity would also imply violation of conservation of energy, though this has also been disputed. Over recent years evidence has been building that indeed information and unitarity are preserved in a full quantum gravitational treatment of the problem.

One attempt to resolve the black hole information paradox is known as black hole complementarity. In twenty twelve, the "firewall paradox" was introduced with the goal of demonstrating that black hole complementarity fails to solve the information paradox. According to quantum field theory in curved spacetime, a single emission of Hawking radiation involves two mutually entangled particles. The outgoing particle escapes and is emitted as a quantum of Hawking radiation; the infalling particle is swallowed by the black hole. Assume a black hole formed a finite time in the past and will fully evaporate away in some finite time in the future. Then, it will emit only a finite amount of information encoded within its Hawking radiation. According to research by physicists like Don Page and Leonard Susskind, there will eventually be a time by which an outgoing particle must be entangled with all the Hawking radiation the black hole has previously emitted.

This seemingly creates a paradox: a principle called "monogamy of entanglement" requires that, like any quantum system, the outgoing particle cannot be fully entangled with two other systems at the same time; yet here the outgoing particle appears to be entangled both with the infalling particle and, independently, with past Hawking radiation. In order to resolve this contradiction, physicists may eventually be forced to give up one of three time-tested principles: Einstein's equivalence principle, unitarity, or local quantum field theory. One possible solution, which violates the equivalence principle, is that a "firewall" destroys incoming particles at the event horizon. In general, which—if any—of these assumptions should be abandoned remains a topic of debate.
"""


In [5]:
#Convert unsupervised task to supervised

In [6]:
# Tokenization
nltk.download('punkt')
nltk.download('punkt_tab')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.


True

In [7]:
# tokenize
tokens = word_tokenize(document.lower())

In [9]:
# build vocabulary
vocab = {'<unk>':0}

for token in Counter(tokens).keys():   #for unique words
  if token not in vocab:
    vocab[token] = len(vocab)

vocab

{'<unk>': 0,
 'a': 1,
 'black': 2,
 'hole': 3,
 'is': 4,
 'massive': 5,
 ',': 6,
 'compact': 7,
 'astronomical': 8,
 'object': 9,
 'so': 10,
 'dense': 11,
 'that': 12,
 'its': 13,
 'gravity': 14,
 'prevents': 15,
 'anything': 16,
 'from': 17,
 'escaping': 18,
 'even': 19,
 'light': 20,
 '.': 21,
 'albert': 22,
 'einstein': 23,
 "'s": 24,
 'theory': 25,
 'of': 26,
 'general': 27,
 'relativity': 28,
 'predicts': 29,
 'sufficiently': 30,
 'mass': 31,
 'will': 32,
 'form': 33,
 'the': 34,
 'boundary': 35,
 'no': 36,
 'escape': 37,
 'called': 38,
 'event': 39,
 'horizon': 40,
 'in': 41,
 '’': 42,
 's': 43,
 'seals': 44,
 'an': 45,
 'fate': 46,
 'but': 47,
 'produces': 48,
 'locally': 49,
 'detectable': 50,
 'change': 51,
 'when': 52,
 'crossed': 53,
 'many': 54,
 'ways': 55,
 'acts': 56,
 'like': 57,
 'ideal': 58,
 'body': 59,
 'as': 60,
 'it': 61,
 'reflects': 62,
 'quantum': 63,
 'field': 64,
 'curved': 65,
 'spacetime': 66,
 'horizons': 67,
 'emit': 68,
 'hawking': 69,
 'radiation': 70,


In [10]:
len(vocab)

604

In [11]:
input_sentences = document.split('\n')

In [12]:
def text_to_indices(sentence, vocab):

  numerical_sentence = []

  for token in sentence:
    if token in vocab:
      numerical_sentence.append(vocab[token])
    else:
      numerical_sentence.append(vocab['<unk>'])

  return numerical_sentence


In [13]:
input_numerical_sentences = []

for sentence in input_sentences:
  input_numerical_sentences.append(text_to_indices(word_tokenize(sentence.lower()), vocab))


In [14]:
len(input_numerical_sentences)

31

In [15]:
training_sequence = []
for sentence in input_numerical_sentences:

  for i in range(1, len(sentence)):
    training_sequence.append(sentence[:i+1])

In [16]:
len(training_sequence)

1841

In [17]:
training_sequence[:5]     #they will be of different length

[[1, 2], [1, 2, 3], [1, 2, 3, 4], [1, 2, 3, 4, 1], [1, 2, 3, 4, 1, 5]]

In [18]:
len_list = []

for sequence in training_sequence:
  len_list.append(len(sequence))

max(len_list)     #length of longest existing sequence

176

In [19]:
training_sequence[0]

[1, 2]

In [20]:
padded_training_sequence = []
for sequence in training_sequence:

  padded_training_sequence.append([0]*(max(len_list) - len(sequence)) + sequence)

In [21]:
len(padded_training_sequence[10])   #length of all sequences will be 62 now

176

In [22]:
padded_training_sequence = torch.tensor(padded_training_sequence, dtype=torch.long)

In [23]:
padded_training_sequence

tensor([[  0,   0,   0,  ...,   0,   1,   2],
        [  0,   0,   0,  ...,   1,   2,   3],
        [  0,   0,   0,  ...,   2,   3,   4],
        ...,
        [  0,   0,   0,  ...,   1, 602,  26],
        [  0,   0,   0,  ..., 602,  26, 603],
        [  0,   0,   0,  ...,  26, 603,  21]])

In [24]:
X = padded_training_sequence[:, :-1]    #Changing into supervised
y = padded_training_sequence[:,-1]

In [25]:
X

tensor([[  0,   0,   0,  ...,   0,   0,   1],
        [  0,   0,   0,  ...,   0,   1,   2],
        [  0,   0,   0,  ...,   1,   2,   3],
        ...,
        [  0,   0,   0,  ..., 601,   1, 602],
        [  0,   0,   0,  ...,   1, 602,  26],
        [  0,   0,   0,  ..., 602,  26, 603]])

In [26]:
y

tensor([  2,   3,   4,  ...,  26, 603,  21])

In [27]:
class CustomDataset(Dataset):

  def __init__(self, X, y):
    self.X = X
    self.y = y

  def __len__(self):
    return self.X.shape[0]

  def __getitem__(self, idx):
    return self.X[idx], self.y[idx]

In [28]:
dataset = CustomDataset(X,y)

In [29]:
len(dataset)

1841

In [30]:
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)  #to process 32 sequences at once

In [31]:
class LSTMModel(nn.Module):

  def __init__(self, vocab_size):
    super().__init__()
    self.embedding = nn.Embedding(vocab_size, 100)    #every word is represented using vector of size 100
    self.lstm = nn.LSTM(100, 150, batch_first=True)    # no. of neurons in embedding layer,no. of neurons in other layers of lstm
    self.fc = nn.Linear(150, vocab_size)

  def forward(self, x):
    embedded = self.embedding(x)
    intermediate_hidden_states, (final_hidden_state, final_cell_state) = self.lstm(embedded)
    output = self.fc(final_hidden_state.squeeze(0))
    return output

In [32]:
x=nn.Embedding(289,100)    #UNDERSTANDING LSTM ARCHITECTURE
y=nn.LSTM(100,150,batch_first=True)
z=nn.Linear(150,289)

In [33]:
a=dataset[0][0].unsqueeze(0)

In [34]:
b=x(a)

In [35]:
c,d=y(b)

In [36]:
c.shape   #set of all intermediate hidden states

torch.Size([1, 175, 150])

In [37]:
e,f=d

In [38]:
e.shape   #cell state of final timestamp
f.shape   #hiddden state of final timestamp

torch.Size([1, 1, 150])

In [39]:
z(f.squeeze(0)).shape    #Logit value for all words (we can extract probabilities)

torch.Size([1, 289])

In [40]:
model = LSTMModel(len(vocab))

In [41]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [42]:
model.to(device)

LSTMModel(
  (embedding): Embedding(604, 100)
  (lstm): LSTM(100, 150, batch_first=True)
  (fc): Linear(in_features=150, out_features=604, bias=True)
)

In [43]:
epochs = 50
learning_rate = 0.001

criterion = nn.CrossEntropyLoss()

optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [44]:
# training loop

for epoch in range(epochs):
  total_loss = 0

  for batch_x, batch_y in dataloader:

    batch_x, batch_y = batch_x.to(device), batch_y.to(device)  #importing to gpu

    optimizer.zero_grad()

    output = model(batch_x)

    loss = criterion(output, batch_y)

    loss.backward()

    optimizer.step()

    total_loss = total_loss + loss.item()

  print(f"Epoch: {epoch + 1}, Loss: {total_loss:.4f}")

Epoch: 1, Loss: 353.5819
Epoch: 2, Loss: 306.5803
Epoch: 3, Loss: 284.1554
Epoch: 4, Loss: 258.5847
Epoch: 5, Loss: 232.3996
Epoch: 6, Loss: 207.6156
Epoch: 7, Loss: 183.3013
Epoch: 8, Loss: 160.3867
Epoch: 9, Loss: 139.2998
Epoch: 10, Loss: 120.2089
Epoch: 11, Loss: 102.6939
Epoch: 12, Loss: 86.6284
Epoch: 13, Loss: 72.8970
Epoch: 14, Loss: 61.0631
Epoch: 15, Loss: 51.0748
Epoch: 16, Loss: 42.7557
Epoch: 17, Loss: 36.0006
Epoch: 18, Loss: 30.2133
Epoch: 19, Loss: 25.7332
Epoch: 20, Loss: 21.7401
Epoch: 21, Loss: 18.8057
Epoch: 22, Loss: 16.1714
Epoch: 23, Loss: 13.9407
Epoch: 24, Loss: 12.2057
Epoch: 25, Loss: 10.8463
Epoch: 26, Loss: 9.6230
Epoch: 27, Loss: 8.5352
Epoch: 28, Loss: 7.7086
Epoch: 29, Loss: 6.9829
Epoch: 30, Loss: 6.3533
Epoch: 31, Loss: 5.7943
Epoch: 32, Loss: 5.3328
Epoch: 33, Loss: 4.8816
Epoch: 34, Loss: 4.6165
Epoch: 35, Loss: 4.2457
Epoch: 36, Loss: 3.9241
Epoch: 37, Loss: 3.6956
Epoch: 38, Loss: 3.4382
Epoch: 39, Loss: 3.2134
Epoch: 40, Loss: 3.0204
Epoch: 41, Lo

In [45]:
# prediction

def prediction(model, vocab, text):

  # tokenize
  tokenized_text = word_tokenize(text.lower())

  # text -> numerical indices
  numerical_text = text_to_indices(tokenized_text, vocab)

  # padding
  padded_text = torch.tensor([0] * (61 - len(numerical_text)) + numerical_text, dtype=torch.long).unsqueeze(0)

  # send to model
  output = model(padded_text)

  # predicted index
  value, index = torch.max(output, dim=1)

  # merge with text
  return text + " " + list(vocab.keys())[index]



In [48]:
prediction(model, vocab, "The presence of a black hole can be inferred through its")

'The presence of a black hole can be inferred through its interaction'

In [50]:
import time

num_tokens = 10
input_text = "how are"

for i in range(num_tokens):
  output_text = prediction(model, vocab, input_text)
  print(output_text)
  input_text = output_text
  time.sleep(0.5)


how are whose
how are whose gravitational
how are whose gravitational fields
how are whose gravitational fields are
how are whose gravitational fields are too
how are whose gravitational fields are too strong
how are whose gravitational fields are too strong for
how are whose gravitational fields are too strong for light
how are whose gravitational fields are too strong for light to
how are whose gravitational fields are too strong for light to escape


In [54]:
#We can also try extracting n max possible next words of a sequence instead of 1 only

In [52]:
dataloader1 = DataLoader(dataset, batch_size=32, shuffle=False)

In [53]:
# Function to calculate accuracy
def calculate_accuracy(model, dataloader, device):
    model.eval()  # Set the model to evaluation mode
    correct = 0
    total = 0

    with torch.no_grad():  # No need to compute gradients
        for batch_x, batch_y in dataloader1:
            batch_x, batch_y = batch_x.to(device), batch_y.to(device)

            # Get model predictions
            outputs = model(batch_x)

            # Get the predicted word indices
            _, predicted = torch.max(outputs, dim=1)

            # Compare with actual labels
            correct += (predicted == batch_y).sum().item()
            total += batch_y.size(0)

    accuracy = correct / total * 100
    return accuracy

# Compute accuracy
accuracy = calculate_accuracy(model, dataloader, device)
print(f"Model Accuracy: {accuracy:.2f}%")


Model Accuracy: 99.73%
