In [1]:
import os
import torch
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    pipeline
)
from datasets import load_dataset
from peft import LoraConfig, PeftModel

from langchain.text_splitter import CharacterTextSplitter
from langchain.document_transformers import Html2TextTransformer
from langchain.document_loaders import AsyncChromiumLoader

from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS

from langchain.prompts import PromptTemplate
from langchain.schema.runnable import RunnablePassthrough
from langchain.llms import HuggingFacePipeline

  from .autonotebook import tqdm as notebook_tqdm
2024-01-08 18:34:04.350448: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-01-08 18:34:04.350479: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-01-08 18:34:04.351115: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-01-08 18:34:04.355690: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [1]:
!pip install faster-whisper

Collecting faster-whisper
  Downloading faster_whisper-1.0.1-py3-none-any.whl.metadata (14 kB)
Collecting av==11.* (from faster-whisper)
  Downloading av-11.0.0-cp39-cp39-win_amd64.whl.metadata (4.7 kB)
Collecting ctranslate2<5,>=4.0 (from faster-whisper)
  Downloading ctranslate2-4.1.0-cp39-cp39-win_amd64.whl.metadata (10 kB)
Collecting onnxruntime<2,>=1.14 (from faster-whisper)
  Downloading onnxruntime-1.17.1-cp39-cp39-win_amd64.whl.metadata (4.4 kB)
Downloading faster_whisper-1.0.1-py3-none-any.whl (1.5 MB)
   ---------------------------------------- 0.0/1.5 MB ? eta -:--:--
   - -------------------------------------- 0.1/1.5 MB 1.1 MB/s eta 0:00:02
   -- ------------------------------------- 0.1/1.5 MB 1.1 MB/s eta 0:00:02
   --- ------------------------------------ 0.1/1.5 MB 1.1 MB/s eta 0:00:02
   ----- ---------------------------------- 0.2/1.5 MB 980.4 kB/s eta 0:00:02
   ------ --------------------------------- 0.2/1.5 MB 958.6 kB/s eta 0:00:02
   ------- -------------------

In [2]:
#!pip install -q -U torch datasets transformers tensorflow langchain playwright html2text sentence_transformers faiss-cpu
#!pip install -q accelerate==0.21.0 peft==0.4.0 bitsandbytes==0.40.2 trl==0.4.7

In [3]:
model_name='mistralai/Mistral-7B-Instruct-v0.2'
#model_name= "HuggingFaceH4/zephyr-7b-beta"
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

use_4bit = True
bnb_4bit_compute_dtype = "float16"
# Quantization type (fp4 or nf4)
bnb_4bit_quant_type = "nf4"
use_nested_quant = False #(double quantization)

compute_dtype = getattr(torch, bnb_4bit_compute_dtype)

bnb_config = BitsAndBytesConfig(
    load_in_4bit=use_4bit,
    bnb_4bit_quant_type=bnb_4bit_quant_type,
    bnb_4bit_compute_dtype=compute_dtype,
    bnb_4bit_use_double_quant=use_nested_quant,
)

# Check GPU compatibility with bfloat16
if compute_dtype == torch.float16 and use_4bit:
    major, _ = torch.cuda.get_device_capability()
    if major >= 8:
        print("=" * 80)
        print("Your GPU supports bfloat16: accelerate training with bf16=True")
        print("=" * 80)


mistral_model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
)

Your GPU supports bfloat16: accelerate training with bf16=True


Loading checkpoint shards: 100%|██████████| 3/3 [00:06<00:00,  2.29s/it]


In [20]:
from transformers import pipeline
from langchain.llms import HuggingFacePipeline

standalone_query_generation_pipeline = pipeline(
    model=mistral_model,
    tokenizer=tokenizer,
    task="text-generation",
    temperature=0.2,
    repetition_penalty=1.1,
    return_full_text=True,
    max_new_tokens=200,
)
standalone_query_generation_llm = HuggingFacePipeline(pipeline=standalone_query_generation_pipeline)

response_generation_pipeline = pipeline(
    model=mistral_model,
    tokenizer=tokenizer,
    task="text-generation",
    temperature=0.6,
    repetition_penalty=1.1,
    return_full_text=True,
    max_new_tokens=200,
)
response_generation_llm = HuggingFacePipeline(pipeline=response_generation_pipeline)

In [5]:
#!playwright install 
#!playwright install-deps 

In [6]:
import nest_asyncio
nest_asyncio.apply()

# Articles to index
articles = ["https://www..com/blog/the-need-for-speed/",
            "https://www..com/blog/improve-your-green-reading-with-these-easy-steps/",
            "https://www..com/blog/make-the-most-out-of-your--practice/",
            "https://www..com/products/indoor/features/",
            "https://www..com/blog/strokes-gained-/",
            "https://www..com/blog/get-in-the-hole/",
            "https://www..com/blog/visual-cues/",
            "https://www..com/blog/your-trackman-for-/",
            "https://www..com/blog/putter-fitting-with-martin-stecher/"]

loader = AsyncChromiumLoader(articles)
docs = loader.load()

In [7]:
# Converts HTML to plain text 
html2text = Html2TextTransformer()
docs_transformed = html2text.transform_documents(docs)

# Chunk text
text_splitter = CharacterTextSplitter(chunk_size=600, chunk_overlap=0) # @TODO: Make this configurable and minimize the chunk size
chunked_documents = text_splitter.split_documents(docs_transformed)

# Load chunked documents into the FAISS index
db = FAISS.from_documents(chunked_documents, HuggingFaceEmbeddings(model_name='sentence-transformers/all-mpnet-base-v2'))

retriever = db.as_retriever(k = 1)

Created a chunk of size 704, which is longer than the specified 600
Created a chunk of size 671, which is longer than the specified 600
Created a chunk of size 751, which is longer than the specified 600
Created a chunk of size 725, which is longer than the specified 600
Created a chunk of size 827, which is longer than the specified 600
Created a chunk of size 820, which is longer than the specified 600
Created a chunk of size 778, which is longer than the specified 600
Created a chunk of size 704, which is longer than the specified 600
Created a chunk of size 917, which is longer than the specified 600
Created a chunk of size 763, which is longer than the specified 600
Created a chunk of size 946, which is longer than the specified 600
Created a chunk of size 676, which is longer than the specified 600
Created a chunk of size 1006, which is longer than the specified 600
Created a chunk of size 749, which is longer than the specified 600
Created a chunk of size 636, which is longer th

In [8]:
# LANGCHAIN Prompt Template - CONVERSATIONAL LLM

from langchain.schema import format_document
from langchain_core.messages import get_buffer_string
from langchain_core.runnables import RunnableLambda, RunnablePassthrough
from langchain.memory import ConversationBufferMemory
from langchain.prompts.prompt import PromptTemplate
from langchain_core.prompts.chat import ChatPromptTemplate

from operator import itemgetter

In [9]:
_template = """
[INST] 
Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language, that can be used to query a FAISS index. This query will be used to retrieve documents with additional context. 

Let me share a couple examples that will be important. 

If you do not see any chat history, you MUST return the "Follow Up Input" as is:

```
Chat History:

Follow Up Input: what are fundamentals of golf?
Standalone Question:
what are fundamentals of golf?
```

If this is the second question onwards, you should properly rephrase the question like this:

```
Chat History:
what are fundamentals of golf?
AI: 
Fundamentals of golf are speed, line, and green reading.

Follow Up Input: Can you explain them?
Standalone Question:
Can you explain the fundamentals of golf?
```

Now, with those examples, here is the actual chat history and input question.

Chat History:
{chat_history}

Follow Up Input: {question}
Standalone question:
[your response here]
[/INST] 
"""
CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(_template)

In [10]:
template = """
[INST] 
You are an AI assistant that helps users to improve their  skills. Answer the question based only on the following context:
{context}

Question: {question}
[/INST] 
"""
ANSWER_PROMPT = ChatPromptTemplate.from_template(template)

In [11]:
DEFAULT_DOCUMENT_PROMPT = PromptTemplate.from_template(template="{page_content}")

def _combine_documents(
    docs, document_prompt=DEFAULT_DOCUMENT_PROMPT, document_separator="\n\n"
):
    doc_strings = [format_document(doc, document_prompt) for doc in docs]
    return document_separator.join(doc_strings)

In [21]:
# Instantiate ConversationBufferMemory
memory = ConversationBufferMemory(
 return_messages=True, output_key="answer", input_key="question"
)

# First we add a step to load memory
# This adds a "memory" key to the input object
loaded_memory = RunnablePassthrough.assign(
    chat_history=RunnableLambda(memory.load_memory_variables) | itemgetter("history"),
)
# Now we calculate the standalone question
standalone_question = {
    "standalone_question": {
        "question": lambda x: x["question"],
        "chat_history": lambda x: get_buffer_string(x["chat_history"]),
    }
    | CONDENSE_QUESTION_PROMPT
    | standalone_query_generation_llm,
}
# Now we retrieve the documents
retrieved_documents = {
    "docs": itemgetter("standalone_question") | retriever,
    "question": lambda x: x["standalone_question"],
}
# Now we construct the inputs for the final prompt
final_inputs = {
    "context": lambda x: _combine_documents(x["docs"]),
    "question": itemgetter("question"),
}
# And finally, we do the part that returns the answers
answer = {
    "answer": final_inputs | ANSWER_PROMPT | response_generation_llm,
    "question": itemgetter("question"),
    "context": final_inputs["context"]
}
# And now we put it all together!
final_chain = loaded_memory | standalone_question | retrieved_documents | answer

In [22]:
import time
def call_conversational_rag(question, chain, memory):
    """
    Calls a conversational RAG (Retrieval-Augmented Generation) model to generate an answer to a given question.

    This function sends a question to the RAG model, retrieves the answer, and stores the question-answer pair in memory 
    for context in future interactions.

    Parameters:
    question (str): The question to be answered by the RAG model.
    chain (LangChain object): An instance of LangChain which encapsulates the RAG model and its functionality.
    memory (Memory object): An object used for storing the context of the conversation.

    Returns:
    dict: A dictionary containing the generated answer from the RAG model.
    """
    start = time.time()
    
    # Prepare the input for the RAG model
    inputs = {"question": question}

    # Invoke the RAG model to get an answer
    result = chain.invoke(inputs)
    
    # Save the current question and its answer to memory for future context
    memory.save_context(inputs, {"answer": result["answer"]})
    end = time.time()
    print("response time :",end - start)
    # Return the result
    return result

In [14]:
question = "how can i improve my ?"
call_conversational_rag(question, final_chain, memory)

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


response time : 12.468083381652832


{'answer': 'To improve your  skills in golf, consider focusing on the following areas:\n1. Alignment: Ensure your body, club, and putter face are all aligned correctly towards your target.\n2. Tempo: Maintain a consistent tempo throughout your stroke for better control and accuracy.\n3. Contact: Make solid contact with the ball by keeping your putter face square at impact.\n4. Green Reading: Understand the contours and slopes of the green to make informed decisions about the speed and direction of your putts.\n5. Practice: Regularly practice  drills and techniques to build muscle memory and confidence.\n\nAdditionally, you can use reference points to prepare yourself for various options on the green. Find a specific slope percentage and practice hitting putts to that target regularly. This will help you replicate those putts on the course and improve your confidence in your green reading abilities. Another effective method is to map out your putt using',
 'question': "To help you impro

In [15]:
question = "what can you recommend me to improve my  skills? i have no idea about ."
text = call_conversational_rag(question, final_chain, memory)
text['answer']

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


response time : 13.167700290679932


"For someone new to  in golf, here are some recommendations to improve your skills:\n\n1. Proper alignment: Learn how to align your body, club, and putter face correctly towards your target. You can use reference points such as a line on the ground or a specific slope percentage to help with alignment.\n2. Consistent tempo: Develop a consistent tempo throughout your stroke for better control and accuracy. You can practice this by focusing on the length and rhythm of your swing.\n3. Solid contact: Practice making solid contact with the ball by keeping your putter face square at impact. You can use drills such as hitting balls to specific targets to help improve your contact.\n4. Basic green reading: Start learning the basics of interpreting contours and slopes on the green to make informed decisions about the speed and direction of your putts. You can practice this by observing the putt's path and adjusting your aim accordingly."

In [32]:
question = "can you help me to get better? Can you prepare a training plan for me"
text = call_conversational_rag(question, final_chain, memory)
text['answer']

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


response time : 14.089851379394531


"Based on the context provided, here's how I would answer the question:\n\nTo create a personalized training plan for improving your  skills, consider the following steps:\n\n1. Assess your current abilities: Evaluate your strengths and weaknesses in , including aim, distance control, green reading, and mental focus. You can do this by observing your  performance on the course or during practice sessions.\n2. Set specific, measurable goals: Establish clear objectives for your  improvement. For example, you might aim to reduce the number of three-putts you have per round, improve your average number of putts per round, or achieve a certain  percentage. Make sure your goals are specific, measurable, achievable, relevant, and time-bound.\n3. Find reference points: Identify specific slope percentages on the practice green and work on aiming accurately at those targets. This will help you build confidence in your green"

In [27]:
question = "i heard that ladder drills are good for speed control. what do you think ? "
text = call_conversational_rag(question, final_chain, memory)
text['answer']

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


response time : 10.564454078674316


"To answer your question, yes, ladder drills can be effective for improving speed control in golf . By setting up a series of putts with increasing lengths, you can focus on maintaining consistent stroke length and tempo throughout the drill. This will help you train yourself to control your speed more effectively and develop a better feel for the distance required for various putts. Ultimately, this can lead to improved performance on the green.\n\nHowever, it's important to note that ladder drills may not completely eliminate the need for guessing and trial and error scenarios, especially when it comes to identifying whether an error was due to initial ball direction or speed control. To address this issue, using  tools such as a  System can provide valuable feedback and help improve your accuracy and consistency in both areas.\n\nAdditionally, practicing your speed control on the same green and under the same conditions can help ensure more accurate results and make it easier to rep

In [26]:
question = "what is the best exercise for speed control "
text = call_conversational_rag(question, final_chain, memory)
text['answer']

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


response time : 6.744657039642334


'To answer your question using the context provided, we cannot directly provide information about specific drills for improving speed control in golf  from the text. However, we can suggest looking into the "Features" section of the "Indoor" and "Outdoor" products related to  practice, such as the P7, P7 Plus, P8, P10, P12, C1, CX1, C2, CX2, and Moving Green, as they may offer features or tools designed to help improve speed control in golf . Additionally, checking out the "Blog" section for articles on strokes gained  and other  techniques and drills could also be beneficial.'