In [None]:
!pip install nltk

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from collections import Counter
from torch.utils.data import Dataset, DataLoader
from nltk.tokenize import word_tokenize
import nltk

In [None]:
document = """About the Program
What is the course fee for Finance & Investment Bootcamp (FIB 2025)?
The course follows a monthly subscription model where you have to make monthly payments of Rs 899/month.
What is the total duration of the course?
The total duration of the course is 5 months. So the total course fee becomes 899*5 = Rs 4500 (approx.)
What is the syllabus of the mentorship program?
We will be covering the following modules:
Personal Finance Management
Stock Market Fundamentals
Technical & Fundamental Analysis
Mutual Funds & SIPs
Cryptocurrency Basics
Risk Management
Portfolio Diversification
Financial Planning Tools
You can check the detailed syllabus here - https://learn.finmentor.in/courses/Finance-Investment-Bootcamp-2025
Will we cover taxation or audit topics in this program?
No, this program does not include taxation or auditing. It focuses on investments and financial literacy.
What if I miss a live session? Will I get a recording of the session?
Yes, all sessions are recorded and made available to you in the student dashboard.
Where can I find the class schedule?
You can view the course timetable here - https://docs.google.com/spreadsheets/d/finance-timetable-sheet/edit?usp=sharing.
What is the time duration of each live session?
Each live session is approximately 90 minutes long.
What language will be used in the sessions?
The sessions will be conducted in English and Hindi (bilingual).
How will I be informed about upcoming classes?
We will send you an email before every scheduled session once you enroll.
Can I join the course without prior finance background?
Yes, this course is suitable for beginners and enthusiasts with no formal finance education.
I am late, can I join the program in the middle?
Yes, enrollment is open throughout the year, and you can access all previous content.
If I join/pay in the middle, will I be able to see previous content?
Yes, once you subscribe, you get full access to all earlier sessions and resources.
Do I need to submit assignments?
No mandatory submissions. Self-assessment tasks and solutions are provided for your practice.
Will the course include real-world case studies?
Yes, we analyze real financial markets and scenarios as part of the curriculum.
Where can we contact you?
Email us anytime at support@finmentor.in
Payment/Registration related questions
Where do I make payments?
All payments should be made on our official website: https://learn.finmentor.in/
Can I pay the full course fee at once?
No, the course runs on a monthly subscription model.
What is the subscription validity?
Each payment gives you 30 days of access. For example, payment on 12th June gives access until 12th July.
What is the refund policy?
We offer a 7-day full refund from the day you register.
I'm not in India and can’t pay on the website. What should I do?
Please write to us at support@finmentor.in, and we’ll assist with alternative payment methods.
Post-registration queries
How long do I retain access to the videos?
You can access videos as long as your subscription is active. Once full payment is made, access continues until Jan 2026.
Why is lifetime access not available?
To ensure the course remains affordable and content remains updated.
How do I clear doubts after class?
Submit your queries through the dashboard form and our team will arrange a 1-on-1 call.
Can I ask questions from previous weeks?
Yes, simply mention the week/topic when filling out the doubt form.
Certificate and Career Support
What are the criteria to get a certificate?
You must:
Complete the full payment of Rs 4500
Attempt all quizzes and submit the final portfolio project
How do I pay for missed months if I join late?
You’ll see the previous month payment options once you pay for the current one.
Is placement or job support included?
Yes, we provide **career support**, not job guarantees. This includes:
Investment firm portfolio building workshops
Financial analyst resume guidance
Interview readiness sessions
Access to industry mentors and webinars
"""


In [None]:
# Tokenization
nltk.download('punkt')
nltk.download('punkt_tab')

In [None]:
# tokenize
tokens = word_tokenize(document.lower())

In [None]:
# build vocab
vocab = {'<unk>':0}

for token in Counter(tokens).keys():
  if token not in vocab:
    vocab[token] = len(vocab)

vocab

In [None]:
len(vocab)

In [None]:
input_sentences = document.split('\n')

In [None]:
def text_to_indices(sentence, vocab):

  numerical_sentence = []

  for token in sentence:
    if token in vocab:
      numerical_sentence.append(vocab[token])
    else:
      numerical_sentence.append(vocab['<unk>'])

  return numerical_sentence


In [None]:
input_numerical_sentences = []

for sentence in input_sentences:
  input_numerical_sentences.append(text_to_indices(word_tokenize(sentence.lower()), vocab))


In [None]:
len(input_numerical_sentences)

In [None]:
training_sequence = []
for sentence in input_numerical_sentences:

  for i in range(1, len(sentence)):
    training_sequence.append(sentence[:i+1])

In [None]:
len(training_sequence)

In [None]:
training_sequence[:5]

In [None]:
len_list = []

for sequence in training_sequence:
  len_list.append(len(sequence))

max(len_list)

In [None]:
training_sequence[0]

In [None]:
padded_training_sequence = []
for sequence in training_sequence:

  padded_training_sequence.append([0]*(max(len_list) - len(sequence)) + sequence)

In [None]:
len(padded_training_sequence[10])

In [None]:
padded_training_sequence = torch.tensor(padded_training_sequence, dtype=torch.long)

In [None]:
padded_training_sequence

In [None]:
X = padded_training_sequence[:, :-1]
y = padded_training_sequence[:,-1]

In [None]:
X

In [None]:
y

In [None]:
class CustomDataset(Dataset):

  def __init__(self, X, y):
    self.X = X
    self.y = y

  def __len__(self):
    return self.X.shape[0]

  def __getitem__(self, idx):
    return self.X[idx], self.y[idx]

In [None]:
dataset = CustomDataset(X,y)

In [None]:
len(dataset)

In [None]:
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)

In [None]:
class LSTMModel(nn.Module):

  def __init__(self, vocab_size):
    super().__init__()
    self.embedding = nn.Embedding(vocab_size, 100)
    self.lstm = nn.LSTM(100, 150, batch_first=True)
    self.fc = nn.Linear(150, vocab_size)

  def forward(self, x):
    embedded = self.embedding(x)
    intermediate_hidden_states, (final_hidden_state, final_cell_state) = self.lstm(embedded)
    output = self.fc(final_hidden_state.squeeze(0))
    return output

In [None]:
model = LSTMModel(len(vocab))

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
model.to(device)

In [None]:
epochs = 50
learning_rate = 0.001

criterion = nn.CrossEntropyLoss()

optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [None]:
# training loop

for epoch in range(epochs):
  total_loss = 0

  for batch_x, batch_y in dataloader:

    batch_x, batch_y = batch_x.to(device), batch_y.to(device)

    optimizer.zero_grad()

    output = model(batch_x)

    loss = criterion(output, batch_y)

    loss.backward()

    optimizer.step()

    total_loss = total_loss + loss.item()

  print(f"Epoch: {epoch + 1}, Loss: {total_loss:.4f}")

In [None]:
import torch
from nltk.tokenize import word_tokenize

def prediction(model, vocab, text):
    # Get model's device (cpu or cuda)
    device = next(model.parameters()).device

    # Tokenize
    tokenized_text = word_tokenize(text.lower())

    # Text -> numerical indices
    numerical_text = text_to_indices(tokenized_text, vocab)

    # Padding
    padded_text = torch.tensor([0] * (61 - len(numerical_text)) + numerical_text, dtype=torch.long).unsqueeze(0)

    # Move input to same device as model
    padded_text = padded_text.to(device)

    # Predict
    model.eval()
    with torch.no_grad():
        output = model(padded_text)

    # Get predicted index
    value, index = torch.max(output, dim=1)

    # Convert index back to word
    predicted_word = list(vocab.keys())[list(vocab.values()).index(index.item())]

    # Merge with text
    return text + " " + predicted_word


In [None]:
prediction(model, vocab, "")

In [None]:
import time

num_tokens = 20
input_text = "You can access videos"

for i in range(num_tokens):
  output_text = prediction(model, vocab, input_text)
  print(output_text)
  input_text = output_text
  time.sleep(0.5)


In [None]:
dataloader1 = DataLoader(dataset, batch_size=32, shuffle=False)

In [None]:
# Function to calculate accuracy
def calculate_accuracy(model, dataloader, device):
    model.eval()  # Set the model to evaluation mode
    correct = 0
    total = 0

    with torch.no_grad():  # No need to compute gradients
        for batch_x, batch_y in dataloader1:
            batch_x, batch_y = batch_x.to(device), batch_y.to(device)

            # Get model predictions
            outputs = model(batch_x)

            # Get the predicted word indices
            _, predicted = torch.max(outputs, dim=1)

            # Compare with actual labels
            correct += (predicted == batch_y).sum().item()
            total += batch_y.size(0)

    accuracy = correct / total * 100
    return accuracy

# Compute accuracy
accuracy = calculate_accuracy(model, dataloader, device)
print(f"Model Accuracy: {accuracy:.2f}%")
