In [6]:
# Google Colab: Chatbot for Data Structures and Algorithms

# Install necessary packages
!pip install transformers
!pip install torch
!pip install sentence-transformers

import torch
from transformers import GPT2LMHeadModel, GPT2Tokenizer
from sentence_transformers import SentenceTransformer, util
import numpy as np

# Load the pre-trained GPT-2 model and tokenizer
model_name = 'gpt2-medium'
model = GPT2LMHeadModel.from_pretrained(model_name)
tokenizer = GPT2Tokenizer.from_pretrained(model_name)

# Load sentence transformer model for semantic similarity
embedder = SentenceTransformer('all-MiniLM-L6-v2')

# Define some common questions and answers related to data structures and algorithms
faq = {
    "What is a data structure?": "A data structure is a way to store and organize data in a computer so that it can be used efficiently.",
    "What are the types of data structures?": "Data structures can be divided into two categories: linear data structures (like arrays, linked lists, stacks, and queues) and non-linear data structures (like trees and graphs).",
    "What is an algorithm?": "An algorithm is a step-by-step procedure or formula for solving a problem.",
    "What is the time complexity of an algorithm?": "Time complexity is a way to describe the amount of time an algorithm takes to run as a function of the length of the input.",
    "What is a binary search tree?": "A binary search tree (BST) is a node-based data structure in which each node has at most two children, referred to as the left child and the right child. The left subtree of a node contains only nodes with keys less than the node’s key, and the right subtree only nodes with keys greater than the node’s key.",
    "What is a graph?": "A graph is a collection of nodes (or vertices) and edges connecting some or all of them. Graphs can be directed or undirected.",
    "What is a stack?": "A stack is a linear data structure that follows the Last In, First Out (LIFO) principle, where the last element added is the first one to be removed.",
    "What is a queue?": "A queue is a linear data structure that follows the First In, First Out (FIFO) principle, where the first element added is the first one to be removed."
}

# Convert the FAQ to embeddings
faq_questions = list(faq.keys())
faq_answers = list(faq.values())
faq_embeddings = embedder.encode(faq_questions, convert_to_tensor=True)

def get_response(question, top_k=1):
    # Encode the user's question
    question_embedding = embedder.encode(question, convert_to_tensor=True)

    # Compute similarity scores
    cos_scores = util.pytorch_cos_sim(question_embedding, faq_embeddings)[0]

    # Find the top k most similar questions
    top_results = torch.topk(cos_scores, k=top_k)

    # Get the answer corresponding to the most similar question
    best_match_idx = top_results[1].item()
    response = faq_answers[best_match_idx]

    return response

# Chatbot interaction
print("Hello! I'm your Data Structures and Algorithms Chatbot. Ask me anything related to data structures and algorithms.")

while True:
    question = input("You: ")
    if question.lower() in ["exit", "quit", "bye"]:
        print("Chatbot: Goodbye!")
        break
    response = get_response(question)
    print(f"Chatbot: {response}")


Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch)
  Using cached nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.1.3.1 (from torch)
  Using cached nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.0.2.54 (from torch)
  Using cached nvidia_cufft_cu12-11.0.2.54-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.2.106 (from torch)
  Using cached nvidia_curand_cu12-10.3.2.106-py3-

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/718 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.52G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.7k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Hello! I'm your Data Structures and Algorithms Chatbot. Ask me anything related to data structures and algorithms.
You: what is data srutucre
Chatbot: A data structure is a way to store and organize data in a computer so that it can be used efficiently.
You: data strucute
Chatbot: A data structure is a way to store and organize data in a computer so that it can be used efficiently.


KeyboardInterrupt: Interrupted by user