In [1]:
from langchain.llms import LlamaCpp
from langchain import PromptTemplate, LLMChain
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import (
    StreamingStdOutCallbackHandler
)
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.document_loaders import UnstructuredFileLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter,CharacterTextSplitter
from huggingface_hub import hf_hub_download
from langchain.llms import LlamaCpp
import time

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
main_template = """You are a personal assistant . Help users to learn  fundamentals and techniques.
### Instruction: {question}
### Response:
"""

prompt_main = PromptTemplate(template=main_template, input_variables=["question"])
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])

In [3]:
!CMAKE_ARGS="-DLLAMA_CUBLAS=on" FORCE_CMAKE=1 pip install --upgrade --force-reinstall llama-cpp-python --no-cache-dir

'CMAKE_ARGS' is not recognized as an internal or external command,
operable program or batch file.


In [3]:
from langchain_community.document_loaders import PyPDFLoader

loader = PyPDFLoader("_RAG.pdf")
documents = loader.load()

In [6]:
(repo_id, model_file_name) = ("",
                                  "ggml-model18-q4.gguf")

model_path = hf_hub_download(repo_id=repo_id,
                                 filename=model_file_name,
                                 repo_type="model")
llm = LlamaCpp(
        model_path=model_path,
        n_gpu_layers=512,
        n_batch=30,
        n_ctx=6000,
        max_tokens=200,
        temperature=0,
        # callback_manager=callback_manager,
        verbose=True,
        streaming=True,
        )

llm_chain_main = LLMChain(prompt=prompt_main, llm=llm)

loader = PyPDFLoader("_RAG.pdf")
documents = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=20)
docs = text_splitter.split_documents(documents)
hf_embedding = HuggingFaceEmbeddings(model_name="BAAI/bge-small-en")

db = FAISS.from_documents(docs, hf_embedding)
db.save_local("_FAISS")
# load from local
db = FAISS.load_local("_FAISS/", embeddings=hf_embedding)

llama_model_loader: loaded meta data with 23 key-value pairs and 291 tensors from /home//.cache/huggingface/hub/models--berkouille--Mistral_Golf_GGUF/snapshots/8648d6b1eab8fa3845331a7df007c81f334aa190/ggml-model18-q4.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = 7B
llama_model_loader: - kv   2:                       llama.context_length u32              = 32768
llama_model_loader: - kv   3:                     llama.embedding_length u32              = 4096
llama_model_loader: - kv   4:                          llama.block_count u32              = 32
llama_model_loader: - kv   5:                  llama.feed_forward_length u32              = 14336
llama_model_loader: - kv   6:                 llama.rope.dimension_count

In [5]:
!pip install faiss-gpu

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)




In [7]:
template_1 = '''Context: {context}
Based on the Context, provide an answer as a  coach for following question
### Instruction:{question}
'''

questions = ["What is the best way to control speed on the green?",
             "Explain main aspects of .",
             "How can i get better at  ?",
             "Explain ladder drills, start drills, rolling balls, hole out challange and challange zone. What are they used for. ",
             "what is green reading and how to improve it with ? ",
             "what is distance accuracy and how to improve it with ? ",
             "What are the games in ?",
             "how do i learn how to putt?",
             "tell me a random fact about .",
             "how can i choose a correct putter for myself?",
             "I am beginner where should i start ?"]

for question in questions:
    search = db.similarity_search(question, k=2)
    prompt = PromptTemplate(input_variables=["context", "question"], template= template_1)
    final_prompt = prompt.format(question=question, context=search)
    rag_out = llm_chain_main.run(final_prompt)
    print(rag_out)
    time.sleep(1)

"""query = "I am pretty bad at speed control, can you help me?"
search = db.similarity_search(query, k=2)
prompt = PromptTemplate(input_variables=["context", "question"], template= template_1)
final_prompt = prompt.format(question=query, context=search)
rag_out = llm_chain_main.run(final_prompt)
rag_out"""

  warn_deprecated(

llama_print_timings:        load time =      65.71 ms
llama_print_timings:      sample time =       4.48 ms /    39 runs   (    0.11 ms per token,  8709.25 tokens per second)
llama_print_timings: prompt eval time =     431.14 ms /   200 tokens (    2.16 ms per token,   463.89 tokens per second)
llama_print_timings:        eval time =     382.38 ms /    38 runs   (   10.06 ms per token,    99.38 tokens per second)
llama_print_timings:       total time =     882.18 ms /   238 tokens


Speed control is crucial on the green. To improve, focus on your line, distance, and speed. Practice with short putts, progressively increasing distance as you gain confidence.
    


Llama.generate: prefix-match hit

llama_print_timings:        load time =      65.71 ms
llama_print_timings:      sample time =      22.52 ms /   200 runs   (    0.11 ms per token,  8881.78 tokens per second)
llama_print_timings: prompt eval time =    2575.74 ms /  1193 tokens (    2.16 ms per token,   463.17 tokens per second)
llama_print_timings:        eval time =    2150.34 ms /   199 runs   (   10.81 ms per token,    92.54 tokens per second)
llama_print_timings:       total time =    5121.76 ms /  1392 tokens


 1. Stance: In  practice, it's crucial to establish a consistent and comfortable stance. This involves setting the feet shoulder-width apart, aligning them parallel to the target line, and ensuring a slight bend in the knees. A stable and balanced stance provides a solid foundation for a controlled  stroke. 
 
 2. Line: The line is all about the intended path the ball should take to reach the hole. During  practice, golfers focus on aligning the putter face to this desired line. This involves careful aim and precision to ensure that the ball starts on the correct path. 
 
 3. Speed: Speed control is a critical aspect of  practice, emphasizing how hard or soft a golfer strikes the ball. Practicing various distances and green speeds helps golfers develop a consistent feel for how much force to apply to achieve the desired putt length. It involves refining touch and judgment to adapt to different green conditions


Llama.generate: prefix-match hit

llama_print_timings:        load time =      65.71 ms
llama_print_timings:      sample time =       5.52 ms /    49 runs   (    0.11 ms per token,  8880.03 tokens per second)
llama_print_timings: prompt eval time =    2499.95 ms /  1167 tokens (    2.14 ms per token,   466.81 tokens per second)
llama_print_timings:        eval time =     528.76 ms /    48 runs   (   11.02 ms per token,    90.78 tokens per second)
llama_print_timings:       total time =    3153.59 ms /  1215 tokens


 Improving your  skills involves consistent practice, proper technique, and mental focus. We can work on your aiming, distance control, and green reading. I'll also provide feedback and motivation to help you reach your goals.
    


Llama.generate: prefix-match hit

llama_print_timings:        load time =      65.71 ms
llama_print_timings:      sample time =       7.77 ms /    67 runs   (    0.12 ms per token,  8618.47 tokens per second)
llama_print_timings: prompt eval time =    1379.89 ms /   634 tokens (    2.18 ms per token,   459.46 tokens per second)
llama_print_timings:        eval time =     714.23 ms /    66 runs   (   10.82 ms per token,    92.41 tokens per second)
llama_print_timings:       total time =    2233.29 ms /   700 tokens


Ladder drills focus on distance control, Straight putts hone alignment, Star drills improve green reading, Rolling Balls demonstrate ideal ball motion, Random Putts enhance pressure putt skills, Hole Out Challange tests distance accuracy, and Challange Zone aims to improve speed control under pressure.
    


Llama.generate: prefix-match hit

llama_print_timings:        load time =      65.71 ms
llama_print_timings:      sample time =       8.69 ms /    78 runs   (    0.11 ms per token,  8979.97 tokens per second)
llama_print_timings: prompt eval time =    1632.15 ms /   764 tokens (    2.14 ms per token,   468.09 tokens per second)
llama_print_timings:        eval time =     822.63 ms /    77 runs   (   10.68 ms per token,    93.60 tokens per second)
llama_print_timings:       total time =    2609.74 ms /   841 tokens


Green reading is the ability to accurately determine the line and speed of a putt based on the green's topography.  helps improve green reading by providing visual cues such as contour lines, slope arrows, and slope percentage. These visuals help golfers understand the green's slope and break better, leading to more accurate putts. 
    


Llama.generate: prefix-match hit

llama_print_timings:        load time =      65.71 ms
llama_print_timings:      sample time =       5.72 ms /    51 runs   (    0.11 ms per token,  8909.85 tokens per second)
llama_print_timings: prompt eval time =    2665.78 ms /  1222 tokens (    2.18 ms per token,   458.40 tokens per second)
llama_print_timings:        eval time =     550.63 ms /    50 runs   (   11.01 ms per token,    90.81 tokens per second)
llama_print_timings:       total time =    3350.40 ms /  1272 tokens


 Distance accuracy refers to the consistency with which you hit your putts at the intended speed. 's real-time feedback and drills can help improve your distance accuracy by providing visual cues and tracking your progress. 
    


Llama.generate: prefix-match hit

llama_print_timings:        load time =      65.71 ms
llama_print_timings:      sample time =      21.46 ms /   200 runs   (    0.11 ms per token,  9320.10 tokens per second)
llama_print_timings: prompt eval time =    2638.54 ms /  1216 tokens (    2.17 ms per token,   460.86 tokens per second)
llama_print_timings:        eval time =    2162.59 ms /   199 runs   (   10.87 ms per token,    92.02 tokens per second)
llama_print_timings:       total time =    5202.26 ms /  1415 tokens


  offers various games such as PuttPong, Maze, Shooter, Splash, Tic-Tac-Put, PuttPong, Maze, Shooter, Splash, Tic-Tac-Put, PuttPong, Maze, Shooter, Splash, Tic-Tac-Put, PuttPong, Maze, Shooter, Splash, Tic-Tac-Put, PuttPong, Maze, Shooter, Splash, Tic-Tac-Put, PuttPong, Maze, Shooter, Splash, Tic-Tac-Put, PuttPong, Maze, Shooter, Splash, Tic-Tac-Put, PuttPong, Maze, Shooter, Splash, Tic-Tac-Put,


Llama.generate: prefix-match hit

llama_print_timings:        load time =      65.71 ms
llama_print_timings:      sample time =       5.92 ms /    51 runs   (    0.12 ms per token,  8611.96 tokens per second)
llama_print_timings: prompt eval time =    2661.55 ms /  1222 tokens (    2.18 ms per token,   459.13 tokens per second)
llama_print_timings:        eval time =     546.87 ms /    50 runs   (   10.94 ms per token,    91.43 tokens per second)
llama_print_timings:       total time =    3348.70 ms /  1272 tokens


Learning how to putt effectively involves practicing key skills such as distance control, green reading, and maintaining a consistent stroke. Utilize 's visual cues, drills, and feedback to improve your  performance.
    


Llama.generate: prefix-match hit

llama_print_timings:        load time =      65.71 ms
llama_print_timings:      sample time =       3.78 ms /    34 runs   (    0.11 ms per token,  9001.85 tokens per second)
llama_print_timings: prompt eval time =    1535.39 ms /   715 tokens (    2.15 ms per token,   465.68 tokens per second)
llama_print_timings:        eval time =     350.94 ms /    33 runs   (   10.63 ms per token,    94.03 tokens per second)
llama_print_timings:       total time =    1968.69 ms /   748 tokens


Did you know that the average distance for a putt on the PGA Tour is just over 17 feet? That's pretty impressive accuracy!
    


Llama.generate: prefix-match hit

llama_print_timings:        load time =      65.71 ms
llama_print_timings:      sample time =      17.83 ms /   159 runs   (    0.11 ms per token,  8917.05 tokens per second)
llama_print_timings: prompt eval time =    2630.80 ms /  1205 tokens (    2.18 ms per token,   458.04 tokens per second)
llama_print_timings:        eval time =    1722.99 ms /   158 runs   (   10.90 ms per token,    91.70 tokens per second)
llama_print_timings:       total time =    4684.70 ms /  1363 tokens


Choosing the correct putter is crucial for your golf game. Here are some key factors to consider when selecting a putter: 1. Length: Determine the ideal putter length that suits your height and posture. 2. Grip: Test different grip sizes to find one that provides comfort and control. 3. Head Design: Experiment with various head designs to assess their impact on alignment and confidence. 4. Hosel Configuration: Consider the hosel configuration (open or closed) based on your aiming preferences. 5. Testing: Don't hesitate to ask for help from  staff or use the  technology to test different putters and find the one that suits your  style. 
    


Llama.generate: prefix-match hit


As a  coach, I would recommend starting with short putts to build confidence and accuracy. We can also focus on alignment, green reading, and distance control using 's visual cues and feedback.
    



llama_print_timings:        load time =      65.71 ms
llama_print_timings:      sample time =       5.34 ms /    47 runs   (    0.11 ms per token,  8793.26 tokens per second)
llama_print_timings: prompt eval time =    2608.63 ms /  1195 tokens (    2.18 ms per token,   458.09 tokens per second)
llama_print_timings:        eval time =     509.83 ms /    46 runs   (   11.08 ms per token,    90.23 tokens per second)
llama_print_timings:       total time =    3244.77 ms /  1241 tokens


'query = "I am pretty bad at speed control, can you help me?"\nsearch = db.similarity_search(query, k=2)\nprompt = PromptTemplate(input_variables=["context", "question"], template= template_1)\nfinal_prompt = prompt.format(question=query, context=search)\nrag_out = llm_chain_main.run(final_prompt)\nrag_out'

In [11]:
# test the speed
question = input("Enter the question: ")
search = db.similarity_search(question, k=2)
prompt = PromptTemplate(input_variables=["context", "question"], template= template_1)
final_prompt = prompt.format(question=question, context=search)
rag_out = llm_chain_main.run(final_prompt)
print(rag_out)

Llama.generate: prefix-match hit


The Challenge Zone is an exercise designed to improve distance control on the greens. The goal is to play as many consecutive putts as possible, with each putt being shorter than the previous one. This challenge helps golfers hone their speed control skills, which are essential for successful .
    



llama_print_timings:        load time =      65.71 ms
llama_print_timings:      sample time =       6.83 ms /    62 runs   (    0.11 ms per token,  9080.26 tokens per second)
llama_print_timings: prompt eval time =       0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time =     654.96 ms /    62 runs   (   10.56 ms per token,    94.66 tokens per second)
llama_print_timings:       total time =     754.97 ms /    63 tokens


: 

 ===== as retriever  =======



In [83]:
text_splitter = CharacterTextSplitter(chunk_size=500, 
                                      chunk_overlap=20)
chunked_documents = text_splitter.split_documents(documents)
db = FAISS.from_documents(chunked_documents, 
                          HuggingFaceEmbeddings(model_name='sentence-transformers/all-mpnet-base-v2'))

retriever = db.as_retriever()

Created a chunk of size 811, which is longer than the specified 500
Created a chunk of size 609, which is longer than the specified 500


In [85]:
prompt = PromptTemplate(
    input_variables=["context", "question"],
    template=template_1,
)

llm_chain_1 = LLMChain(llm=llm, prompt=prompt)
#llm_chain.invoke({"context": "", "question": "what are the games in ?"})

In [86]:
from langchain.schema.runnable import RunnablePassthrough

rag_chain = ( 
 {"context": retriever, "question": RunnablePassthrough()}
    | llm_chain_1
)

result = rag_chain.invoke(query)
result['text']

Llama.generate: prefix-match hit


'### Response: Improving  skills involves practicing key fundamentals and techniques. We can create a customized  training plan to help you enhance your accuracy, distance control, and mental focus on the greens.\n    '