In [2]:
import yaml
import torch
from transformers import AutoTokenizer, AutoModel, pipeline
from sentence_transformers import SentenceTransformer, util

In [3]:
def load_fitness_data(file_path):
    with open(file_path, 'r') as file:
        data = yaml.safe_load(file)
    return [(qa['Question'], qa['Answer']) for qa in data]

In [5]:
file_path = '/content/FitnessQA_converted.yaml'
fitness_data = load_fitness_data(file_path)
questions, answers = zip(*fitness_data)
print("Loaded", len(fitness_data), "Q&A pairs successfully!")

Loaded 20 Q&A pairs successfully!


In [6]:

embedder = SentenceTransformer('all-MiniLM-L6-v2')
question_embeddings = embedder.encode(questions, convert_to_tensor=True)

tokenizer = AutoTokenizer.from_pretrained('facebook/blenderbot-400M-distill')
generative_model = pipeline("text-generation", model="facebook/blenderbot-400M-distill")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.7k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

1_Pooling%2Fconfig.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.15k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.57k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/127k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/62.9k [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/16.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/772 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/310k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/730M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/730M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/347 [00:00<?, ?B/s]

Device set to use cpu


In [7]:
def retrieve_answer(user_query):
    query_embedding = embedder.encode(user_query, convert_to_tensor=True)
    similarity_scores = util.pytorch_cos_sim(query_embedding, question_embeddings)
    best_match_idx = torch.argmax(similarity_scores).item()
    return answers[best_match_idx]

In [8]:
def generate_response(user_query):
    retrieved_answer = retrieve_answer(user_query)
    prompt = f"User: {user_query}\nBot: {retrieved_answer}"
    generated_response = generative_model(prompt, max_length=100, do_sample=True, truncation=True)[0]['generated_text']
    return generated_response

In [9]:
def chatbot():
    print("Fitness Chatbot (Type 'exit' to quit)")
    while True:
        user_input = input("You: ")
        if user_input.lower() == 'exit':
            break
        response = generate_response(user_input)
        print(f"Bot: {response}")

In [11]:
chatbot()

Fitness Chatbot (Type 'exit' to quit)
You: lose weight how?
Bot: User: lose weight how?
Bot: To lose weight, maintain a calorie deficit by eating fewer calories than you burn. Combine strength training and cardio for best results.
You: How to build muscles fast?
Bot: User: How to build muscles fast?
Bot: Focus on compound movements like squats, deadlifts, and bench presses. Eat protein-rich foods and progressively overload your muscles.
You: What are the best supplements for muscle building?
Bot: User: What are the best supplements for muscle building?
Bot: Whey protein, creatine monohydrate, and branched-chain amino acids (BCAAs) are effective for muscle growth and recovery.
You: Exit
