<a href="https://colab.research.google.com/github/etuckerman/surf_NLP/blob/main/KSF_LLM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Step 1: Install necessary packages
!pip install -q pandas scikit-learn numpy sentence-transformers tqdm langchain langchain_community transformers bitsandbytes accelerate

In [17]:
# Step 2: Import necessary libraries
import pandas as pd
import re
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sentence_transformers import SentenceTransformer
import pickle
from tqdm import tqdm
from google.colab import files
import os
import torch
from transformers import BitsAndBytesConfig, AutoModelForCausalLM, AutoTokenizer, pipeline
from langchain import HuggingFacePipeline, PromptTemplate
from langchain.schema.runnable import RunnableSequence
from langchain.chains import LLMChain


In [3]:
# Step 3: Check for files in Google Colab
if os.path.exists('messages.csv'):
    # Load the CSV file if it exists
    df = pd.read_csv('messages.csv')
    print("File 'messages.csv' found and loaded.")
else:
    # If the file does not exist, prompt the user to upload it
    print("File 'messages.csv' not found. Please upload the file.")
    uploaded = files.upload()
    df = pd.read_csv(next(iter(uploaded.keys())))
    print("File 'messages.csv' uploaded and loaded.")

# Data Cleaning
df['Message'] = df['Message'].astype(str)
encoded_emoji_pattern = re.compile(r'[^\x00-\x7F]+')
df['Message'] = df['Message'].apply(lambda x: re.sub(encoded_emoji_pattern, '', x))

short_responses = ['ok', 'lol', 'haha', 'yes', 'no', 'sure', 'right', 'cool', 'nah', 'yep', 'nope', 'yeah', 'k', 'hm']
df = df[~df['Message'].str.lower().isin(short_responses)]
df = df[df['Message'].str.contains(r'\b\w{3,}\b')]
df = df[df['Message'].str.strip().str.len() > 2]
df = df.reset_index(drop=True)

File 'messages.csv' found and loaded.


In [4]:
# Step 4: Initialize the SentenceTransformer model
model = SentenceTransformer('paraphrase-MiniLM-L6-v2', device='cuda')


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


In [5]:
# Step 5: Check if embeddings have been previously generated
if os.path.exists('embeddings.pkl'):
    with open('embeddings.pkl', 'rb') as f:
        df['Embedding'] = pickle.load(f)
    print("Embeddings loaded from 'embeddings.pkl'.")
else:
    # Function for batch processing of embeddings
    def batch_encode(model, texts, batch_size=32):
        embeddings = []
        for i in range(0, len(texts), batch_size):
            batch = texts[i:i + batch_size]
            embeddings.extend(model.encode(batch))
        return embeddings

    # Generate embeddings using batch processing
    df['Embedding'] = batch_encode(model, df['Message'].tolist())

    # Save embeddings to a file after computation
    with open('embeddings.pkl', 'wb') as f:
        pickle.dump(df['Embedding'].tolist(), f)
    print("Embeddings computed and saved to 'embeddings.pkl'.")

Embeddings loaded from 'embeddings.pkl'.


In [6]:
# Step 6: Find the most relevant answer using index as recency
def find_answer(query, df, recency_bias=0.01):
    query_embedding = model.encode(query)

    # Initialize tqdm progress bar
    tqdm.pandas(desc="Finding best answer")

    # Calculate cosine similarity between the query and all messages, progress_apply() for tqdm tracking
    df['Similarity'] = df['Embedding'].progress_apply(lambda x: cosine_similarity([query_embedding], [x])[0][0])

    # Apply recency bias based on index
    df['Recency_Score'] = np.exp(-recency_bias * (len(df) - df.index))

    # Combine similarity and recency scores
    df['Score'] = df['Similarity'] * df['Recency_Score']

    # Find the message with the highest score
    best_match = df.loc[df['Score'].idxmax()]
    return best_match['Message']

# Example usage
query = "how do i strafe?"
answer = find_answer(query, df)
print("Best Answer:", answer)

Finding best answer: 100%|██████████| 658280/658280 [06:20<00:00, 1728.18it/s]

Best Answer: Also you kind of look a bit shaky and reactionary when you board a ramp especially on the blind angles. Get really comfortable with all the ramp boards and flicks in saveloc so you know exactly how the flow of the map is and try to not so abruptly stop but kind of consistently strafe when you're in the air, either towards the side of the next ramp or just weave back and forth if it's straigh in front of you





In [11]:
# Step 7: Set up the model and tokenizer with 4-bit quantization
quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
)

model_name = "mistralai/Mistral-7B-v0.1"
model_4bit = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",
    quantization_config=quantization_config,
)
tokenizer = AutoTokenizer.from_pretrained(model_name)


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [13]:
# Step 8: Create the text generation pipeline

pipeline_inst = pipeline(
    "text-generation",
    model=model_4bit,
    tokenizer=tokenizer,
    use_cache=True,
    device_map="auto",
    max_length=2500,
    do_sample=True,
    top_k=5,
    num_return_sequences=1,
    eos_token_id=tokenizer.eos_token_id,
    pad_token_id=tokenizer.eos_token_id,
)

# Set up LangChain with the new API
llm = HuggingFacePipeline(pipeline=pipeline_inst)

  warn_deprecated(


In [31]:
# Step 9: Define the template for generating refined responses
template = """
<s>[INST] Context: The user is looking for help or advice related to the Counter-Strike: Source (CS:S) surfing community.
The user may have a specific question about surfing techniques, server settings, community events, or any other aspect of the CS:S surfing scene.
The relevant message retrieved from the CSV file should provide some background information or a starting point for the user's query.

User Query: {query}
Relevant Message: {answer}
Context: {context}

Based on the user's query and the relevant message from the CS:S surfing community, please generate a helpful and informative response.
</s>

<out>
Refined Answer: [/INST]
</out>
"""

# Example context usage
context = """
The user is looking for help or advice related to the Counter-Strike: Source (CS:S) surfing community.
The user may have a specific question about surfing techniques, server settings, community events, or any other aspect of the CS:S surfing scene.
The relevant message retrieved from the CSV file, {answer}, should provide some background information or a starting point for the user's query.
"""


In [32]:
# Step 10: Generate refined answers
def generate_refined_answer(query, answer, context):
    prompt = PromptTemplate(template=template, input_variables=["query", "answer", "context"])
    llm_chain = LLMChain(prompt=prompt, llm=llm)
    response = llm_chain.run({"query": query, "answer": answer, "context": context})
    return response

In [33]:
refined_answer = generate_refined_answer(query, answer, context)
print("Refined Answer:", refined_answer)

Refined Answer: 
<s>[INST] Context: The user is looking for help or advice related to the Counter-Strike: Source (CS:S) surfing community. 
The user may have a specific question about surfing techniques, server settings, community events, or any other aspect of the CS:S surfing scene. 
The relevant message retrieved from the CSV file should provide some background information or a starting point for the user's query.

User Query: how do i strafe?
Relevant Message: Also you kind of look a bit shaky and reactionary when you board a ramp especially on the blind angles. Get really comfortable with all the ramp boards and flicks in saveloc so you know exactly how the flow of the map is and try to not so abruptly stop but kind of consistently strafe when you're in the air, either towards the side of the next ramp or just weave back and forth if it's straigh in front of you
Context: 
The user is looking for help or advice related to the Counter-Strike: Source (CS:S) surfing community. 
The us

In [None]:
print