<a href="https://colab.research.google.com/github/etuckerman/AI-Powered_CS-S_Surfing_Community_Assistant/blob/main/KSF_LLM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [12]:
# prompt: huggingface login using huggingface_token.txt
with open('huggingface_token.txt', 'r') as f:
  HUGGINGFACE_TOKEN = f.read().strip()

!huggingface-cli login --token $HUGGINGFACE_TOKEN


The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: read).
Your token has been saved to /root/.cache/huggingface/token
Login successful


In [1]:
!pip install -q pandas scikit-learn numpy sentence-transformers tqdm langchain langchain_community transformers bitsandbytes accelerate nltk gradio
!pip install --upgrade langchain gradio

# Import necessary packages
import pandas as pd
import re
import numpy as np
import pickle
import os
from tqdm.auto import tqdm
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
from langchain import HuggingFacePipeline
from sentence_transformers import SentenceTransformer, util
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from google.colab import files
import gradio as gr
import nltk
from sklearn.metrics.pairwise import cosine_similarity


[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.4/50.4 kB[0m [31m2.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m227.1/227.1 kB[0m [31m5.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m [31m19.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.3/2.3 MB[0m [31m49.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m137.5/137.5 MB[0m [31m15.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m16.8/16.8 MB[0m [31m73.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m318.7/318.7 kB[0m [31m27.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m76.4/76.4 kB[0m [31m7.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [2]:
# --------- Data Preprocessing Section ---------

# Load CSV data with upload fallback
def load_or_upload_file(file_name):
    if os.path.exists(file_name):
        return pd.read_csv(file_name), f"File '{file_name}' found and loaded."
    else:
        print(f"File '{file_name}' not found. Please upload the file.")
        uploaded = files.upload()
        df = pd.read_csv(next(iter(uploaded.keys())))
        return df, f"File '{file_name}' uploaded and loaded."

# Clean messages in the dataset
def clean_messages(df):
    nltk.download('stopwords')
    nltk.download('wordnet')

    df['Message'] = df['Message'].astype(str)
    df['Message'] = df['Message'].apply(lambda x: re.sub(r'[^\x00-\x7F]+|http\S+|@\S+', '', x))
    df = df[df['Message'].str.strip().str.len() > 2]

    short_responses = set(['ok', 'lol', 'haha', 'yes', 'no', 'sure', 'right', 'cool', 'nah', 'yep', 'nope', 'yeah', 'k', 'hm'])
    df = df[~df['Message'].str.lower().isin(short_responses)]

    stop_words = set(stopwords.words('english'))
    lemmatizer = WordNetLemmatizer()
    df['Message'] = df['Message'].apply(lambda x: ' '.join([lemmatizer.lemmatize(word) for word in x.split() if word.lower() not in stop_words]))

    return df.reset_index(drop=True)

# Extract question-answer pairs from cleaned messages
def extract_qa_pairs(df, model, window_size=50):
    messages = df['Message'].tolist()
    message_embeddings = model.encode(messages)
    qa_pairs = []

    for i in range(len(df) - window_size):
        message = messages[i]

        if message.endswith('?') or any(word in message.lower() for word in ['who', 'what', 'where', 'when', 'why', 'how']):
            potential_answers = messages[i + 1: min(i + 1 + window_size, len(df))]
            if potential_answers:
                question_embedding = message_embeddings[i]
                answer_embeddings = message_embeddings[i + 1: min(i + 1 + window_size, len(df))]
                similarities = cosine_similarity([question_embedding], answer_embeddings)[0]
                best_answer_index = np.argmax(similarities) if len(potential_answers) > 1 else 0
                best_answer = potential_answers[best_answer_index]
                if not best_answer.endswith('?'):
                    qa_pairs.append({'Question': message, 'Answer': best_answer})
    return qa_pairs

# Load player data and clean it
def preprocess_player_data(df):
    df['First online'] = pd.to_datetime(df['First online'])
    df['Last online'] = pd.to_datetime(df['Last online'])
    return df


In [3]:
# --------- Chatbot Functionality Section ---------

# Save/load embeddings for faster processing
def save_or_load_embeddings(df, model):
    if os.path.exists('qa_embeddings.pkl'):
        with open('qa_embeddings.pkl', 'rb') as f:
            df['Question_Embedding'] = pickle.load(f)
            df['Answer_Embedding'] = pickle.load(f)
        print("QA Embeddings loaded from 'qa_embeddings.pkl'.")
    else:
        tqdm.pandas()
        df['Question_Embedding'] = df['Question'].progress_apply(lambda x: model.encode(x))
        df['Answer_Embedding'] = df['Answer'].progress_apply(lambda x: model.encode(x))
        with open('qa_embeddings.pkl', 'wb') as f:
            pickle.dump(df['Question_Embedding'].tolist(), f)
            pickle.dump(df['Answer_Embedding'].tolist(), f)
        print("QA Embeddings computed and saved to 'qa_embeddings.pkl'.")

# Find the most relevant QA pair based on a user query
def find_relevant_qa_pair(user_query, messages_qa, model):
    query_embedding = model.encode(user_query)
    messages_qa['Question_Similarity'] = messages_qa['Question_Embedding'].progress_apply(lambda x: cosine_similarity([query_embedding], [x])[0][0])
    messages_qa['Answer_Similarity'] = messages_qa['Answer_Embedding'].progress_apply(lambda x: cosine_similarity([query_embedding], [x])[0][0])
    messages_qa['Similarity'] = (messages_qa['Question_Similarity'] + messages_qa['Answer_Similarity']) / 2
    best_match = messages_qa.loc[messages_qa['Similarity'].idxmax()]
    return best_match['Question'], best_match['Answer']

# Load Hugging Face model
def setup_llama_model():
    model_name = "meta-llama/Meta-Llama-3.1-8B-Instruct"
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True, device_map='auto', offload_buffers=True)
    return model, tokenizer

# Generate a response using the LLaMA model
def generate_response(user_query, messages_qa, model, tokenizer, fixed_context):
    question, relevant_message = find_relevant_qa_pair(user_query, messages_qa, model)
    prompt = (f"User Query: {user_query}\n"
              f"Relevant Message: {relevant_message}\n"
              f"Context: {fixed_context}\n"
              f"Answer:")
    model_input = tokenizer(prompt, return_tensors="pt").to('cuda' if torch.cuda.is_available() else 'cpu')
    with torch.no_grad():
        outputs = model.generate(**model_input, max_length=200, num_return_sequences=1, temperature=0.7, top_p=0.9)
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response.strip()


In [23]:
import re

def detect_player_name(query, player_df):
    for name in player_df['Name']:
        if re.search(rf'\b{name}\b', query, re.IGNORECASE):
            return player_df[player_df['Name'].str.contains(name, case=False)]
    return None

def generate_response(user_query, df, player_df):
    # Check if player name is mentioned
    player_info = detect_player_name(user_query, player_df)

    if player_info is not None:
        player_info_str = player_info.to_string(index=False)
        response = f"Player Information:\n{player_info_str}"
    else:
        # Existing code to find relevant Q&A
        relevant_qa = find_relevant_qa_pair(user_query, df)
        if relevant_qa:
            response = relevant_qa['Answer']
        else:
            response = "Sorry, I don't have an answer to that."

    return response


In [4]:
# --------- Main Section to Run Preprocessing & Interface ---------

# Preprocess data files
df, message = load_or_upload_file('messages.csv')
df = clean_messages(df)
player_df, player_message = load_or_upload_file('playerdata.csv')
player_df = preprocess_player_data(player_df)

# Extract QA pairs and embeddings
model = SentenceTransformer('all-mpnet-base-v2', device='cuda')
qa_pairs = extract_qa_pairs(df, model)
messages_qa = pd.DataFrame(qa_pairs)
save_or_load_embeddings(messages_qa, model)


[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.6k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]



1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

QA Embeddings loaded from 'qa_embeddings.pkl'.


OSError: You are trying to access a gated repo.
Make sure to have access to it at https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct.
401 Client Error. (Request ID: Root=1-66d9e832-3dd686f8059ae19a23f523f1;38faff80-b22f-497f-bb9a-c28742716ccd)

Cannot access gated repo for url https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct/resolve/main/config.json.
Access to model meta-llama/Meta-Llama-3.1-8B-Instruct is restricted. You must be authenticated to access it.

In [13]:
# # Load LLaMA model
# llama_model, tokenizer = setup_llama_model()

# # Fixed context for the chatbot
# FIXED_CONTEXT = (
#     "Counter-Strike Surf is a popular custom game mode within the Counter-Strike series..."
# )

# # Setup Gradio Interface
# def gradio_interface():
#     iface = gr.Interface(
#         fn=lambda query: generate_response(query, messages_qa, llama_model, tokenizer, FIXED_CONTEXT),
#         inputs=[gr.Textbox(label="User Query", placeholder="Enter your query here...")],
#         outputs="text",
#         title="Counter-Strike Surf QA Chatbot"
#     )
#     return iface

# # Launch the interface
# gradio_interface().launch()


tokenizer_config.json:   0%|          | 0.00/55.4k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/296 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/855 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/4 [00:00<?, ?it/s]

model-00001-of-00004.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00002-of-00004.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00003-of-00004.safetensors:   0%|          | 0.00/4.92G [00:00<?, ?B/s]

model-00004-of-00004.safetensors:   0%|          | 0.00/1.17G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/184 [00:00<?, ?B/s]



Setting queue=True in a Colab notebook requires sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Running on public URL: https://2037bd53f522d3c0e1.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)




In [24]:
import transformers
import torch
import pandas as pd

# Initialize the LLaMA model pipeline
model_id = "meta-llama/Meta-Llama-3.1-8B-Instruct"
pipeline = transformers.pipeline(
    "text-generation",
    model=model_id,
    model_kwargs={"torch_dtype": torch.bfloat16},
    device_map="auto",
)

def detect_player_name(query, player_df):
    # Check if the query contains a player's name
    for name in player_df['Name']:
        if name.lower() in query.lower():
            return player_df[player_df['Name'].str.lower() == name.lower()]
    return None

def generate_response(user_query):
    # Step 1: Find the relevant question-answer pair from the DataFrame
    relevant_qa_pair = messages_qa[messages_qa['Question'].str.contains(user_query, case=False, na=False)]

    if relevant_qa_pair.empty:
        question_answer_pair = ("No relevant question found", "Sorry, I don't have an answer to that.")
    else:
        question_answer_pair = (relevant_qa_pair.iloc[0]['Question'], relevant_qa_pair.iloc[0]['Answer'])

    # Step 2: Check if the user query contains a player's name
    player_info = detect_player_name(user_query, player_df)
    if player_info is not None:
        player_info_str = player_info.to_string(index=False)
        context = f"User query: {user_query}. Relevant Q&A: {question_answer_pair[0]} - {question_answer_pair[1]}. Player data: {player_info_str}"
    else:
        context = f"User query: {user_query}. Relevant Q&A: {question_answer_pair[0]} - {question_answer_pair[1]}"

    # Prepare the input messages for the model
    messages = [
        {"role": "system", "content": "You are an intelligent assistant helping with game-related questions. The questions and answers are retrieved from the KSF community Discord. Use this information to form a correct answer to the user. Players in this game are called 'surfers'."},
        {"role": "user", "content": f"{context}"}
    ]

    # Generate response using LLaMA
    outputs = pipeline(
        messages,
        max_new_tokens=256,
    )

    response = outputs[0]["generated_text"]

    return response

# Example usage
user_query = "How good is Caff at surfing?"
response = generate_response(user_query)
print(response)




Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


[{'role': 'system', 'content': "You are an intelligent assistant helping with game-related questions. The questions and answers are retrieved from the KSF community Discord. Use this information to form a correct answer to the user. Players in this game are called 'surfers'."}, {'role': 'user', 'content': "User query: How good is Caff at surfing?. Relevant Q&A: No relevant question found - Sorry, I don't have an answer to that.. Player data: Name   Country  Global rank    Points  Map completion points  Stage completion points  Bonus completion points  Top 10 points  Stage record points  Bonus record points  Group completion points  Completions               First online                Last online  Time surfing  Time spectating  WRs  Rank 2  Rank 3  Rank 4  Rank 5  Rank 6  Rank 7  Rank 8  Rank 9  Rank 10  Group 1  Group 2  Group 3  Group 4  Group 5  Group 6\nCaff Australia            1 1192450.0                61309.7                     9146                  26122.7       868081.0     