In [1]:
!pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
!pip install --no-deps "xformers<0.0.27" "trl<0.9.0" peft accelerate bitsandbytes
!pip install transformers torch flask flask-ngrok
!pip install pyngrok pandas sentence-transformers langchain



Collecting unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git
  Cloning https://github.com/unslothai/unsloth.git to /tmp/pip-install-gwsvwh6t/unsloth_d004b21605414671b8004364b68ce2f7
  Running command git clone --filter=blob:none --quiet https://github.com/unslothai/unsloth.git /tmp/pip-install-gwsvwh6t/unsloth_d004b21605414671b8004364b68ce2f7
[31mERROR: Operation cancelled by user[0m[31m
[0mTraceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/pip/_internal/cli/base_command.py", line 169, in exc_logging_wrapper
    status = run_func(*args)
  File "/usr/local/lib/python3.10/dist-packages/pip/_internal/cli/req_command.py", line 242, in wrapper
    return func(self, options, args)
  File "/usr/local/lib/python3.10/dist-packages/pip/_internal/commands/install.py", line 377, in run
    requirement_set = resolver.resolve(
  File "/usr/local/lib/python3.10/dist-packages/pip/_internal/resolution/resolvelib/resolver.py", line 73, in resolve
    co

In [None]:
import os
import threading
import pandas as pd
from flask import Flask, request, jsonify
from pyngrok import ngrok
from unsloth import FastLanguageModel
from sentence_transformers import SentenceTransformer
from langchain import LangChain
from langchain.chains import SimpleChain
from langchain.prompts import PromptTemplate
from langchain.llms import LLM

app = Flask(__name__)
port = "5000"

# Load the model and tokenizer
model_name = "agrimreaper48/first-model"  # Your model name with LoRA
max_seq_length = 2048
dtype = None
load_in_4bit = True

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name=model_name,
    max_seq_length=max_seq_length,
    dtype=dtype,
    load_in_4bit=load_in_4bit,
)

FastLanguageModel.for_inference(model)  # Enable native 2x faster inference

# Initialize sentence transformer for document embeddings
sentence_model = SentenceTransformer('all-MiniLM-L6-v2')

# Load and preprocess player stats CSV
df = pd.read_csv('player_stats.csv')

def preprocess_player_stats(row):
    return (f"Player {row['PLAYER NAME']} from team {row['PLAYER TEAM']} "
            f"plays as {row['PLAYER POSITION']} against {row['OPP']}. "
            f"Status: {row['STATUS']}. "
            f"Projected points: {row['PROJ']}. "
            f"Passing stats: {row['PASSING C/A']} completions, {row['PASSING YDS']} yards, "
            f"{row['PASSING TD']} touchdowns, {row['PASSING INT']} interceptions. "
            f"Rushing stats: {row['RUSHING CAR']} carries, {row['RUSHING YDS']} yards, "
            f"{row['RUSHING TD']} touchdowns. "
            f"Receiving stats: {row['RECEIVING REC']} receptions, {row['RECEIVING YDS']} yards, "
            f"{row['RECEIVING TD']} touchdowns. "
            f"Miscellaneous: {row['MISC 2PC']} two-point conversions, {row['MISC FUML']} fumbles, "
            f"{row['MISC TD']} touchdowns, Total points: {row['TOTAL']}.")

# Create a list of player descriptions
player_texts = df.apply(preprocess_player_stats, axis=1).tolist()

# Generate embeddings for player stats
player_embeddings = sentence_model.encode(player_texts)

# Define LangChain components
class CustomLLM(LLM):
    def __init__(self, model, tokenizer):
        self.model = model
        self.tokenizer = tokenizer

    def call(self, prompt):
        inputs = self.tokenizer([prompt], return_tensors='pt').to('cuda')
        outputs = self.model.generate(**inputs, max_new_tokens=64, use_cache=True)
        result = self.tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
        return result

custom_llm = CustomLLM(model, tokenizer)

# Define a simple LangChain chain
class PlayerPredictionChain(SimpleChain):
    def __init__(self, llm, sentence_model, player_embeddings, player_texts):
        super().__init__(llm)
        self.sentence_model = sentence_model
        self.player_embeddings = player_embeddings
        self.player_texts = player_texts

    def _call(self, prompt):
        query_embedding = self.sentence_model.encode(prompt)
        player_similarities = cosine_similarity([query_embedding], self.player_embeddings)
        most_similar_player_index = player_similarities.argmax()
        most_relevant_player = df.iloc[most_similar_player_index]
        return (f"Based on your query, the most relevant player is {most_relevant_player['PLAYER NAME']} "
                f"from team {most_relevant_player['PLAYER TEAM']} with {most_relevant_player['TOTAL']} total points.")

player_prediction_chain = PlayerPredictionChain(custom_llm, sentence_model, player_embeddings, player_texts)

@app.route('/predict', methods=['POST'])
def predict():
    data = request.get_json()
    instruction = data['instruction']
    input_text = data['input']
    output_text = data['output']

    alpaca_prompt = "{instruction} {input} {output}"  # Format the prompt
    query = alpaca_prompt.format(instruction=instruction, input=input_text, output=output_text)

    # Use LangChain to get prediction
    response_text = player_prediction_chain.call(query)

    return jsonify({'response': response_text})

# Start the Flask server in a new thread
def run_server():
    app.run(port=int(port), use_reloader=False, debug=True)

public_url = ngrok.connect(port).public_url
print(f"ngrok tunnel \"{public_url}\" -> \"http://127.0.0.1:{port}\"")

threading.Thread(target=run_server).start()
