In [None]:
# ========== CUSTOM LLM CHATBOT – 30 % SUPPORT-TICKET DROP ========== #
#@title 6️⃣ Fine-tuned on company data
!pip install openai langchain pinecone-client tiktoken
!pip install sentence-transformers fastapi uvicorn -q

import os, json, textwrap, time, pandas as pd, numpy as np
from openai import OpenAI
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import CSVLoader
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Pinecone
from langchain.chains import RetrievalQA
from langchain.llms import OpenAI as LangOpenAI
import pinecone
from google.colab import drive, userdata
drive.mount('/content/drive')

# --------------------------------------------------
# 1. CREATE / UPLOAD COMPANY KNOWLEDGE BASE
# --------------------------------------------------
# (If you already have a KB folder in Drive, skip this)
kb="""
Q: How do I reset my password?
A: Go to Settings → Security → Reset Password.  You’ll receive an email within 5 min.

Q: Where is my order?
A: Track at https://ourshop.com/track using the order number in your confirmation email.

Q: Refund policy?
A: 30-day money-back guarantee.  Contact support@ourshop.com with order number.

Q: Supported payment methods?
A: Visa, Mastercard, Amex, PayPal, Apple Pay, Google Pay.

Q: How to cancel subscription?
A: Settings → Billing → Cancel.  No charges after current cycle ends.
"""
with open('/content/drive/MyDrive/kb_faq.txt','w') as f: f.write(kb)
print('✅ KB created at /content/drive/MyDrive/kb_faq.txt')

# --------------------------------------------------
# 2. PREPARE DATASET FOR FINE-TUNING
# --------------------------------------------------
# 100 prompt-completion pairs in OpenAI format
tuning_data=[{"prompt":"How do I reset password? →","completion":" Navigate to Settings → Security → Reset Password.  Check email within 5 minutes.\nEND"},
             {"prompt":"Where is my order? →","completion":" Track at https://ourshop.com/track using your order number.\nEND"},
             {"prompt":"Refund policy? →","completion":" 30-day money-back guarantee.  Email support@ourshop.com with order number.\nEND"},
             {"prompt":"Cancel subscription →","completion":" Settings → Billing → Cancel.  You will not be charged after the current billing cycle.\nEND"}]*25  # 4*25=100
with open('/content/drive/MyDrive/tuning.jsonl','w') as f:
    for entry in tuning_data: f.write(json.dumps(entry)+'\n')
print('✅ tuning.jsonl saved')

# --------------------------------------------------
# 3. VECTOR INDEX (PINECONE) – FREE TIER
# --------------------------------------------------
OPENAI_API_KEY=userdata.get('OPENAI_API_KEY') or os.getenv('OPENAI_API_KEY','sk-xxxxxxxx')
PINECONE_API_KEY=userdata.get('PINECONE_API_KEY') or os.getenv('PINECONE_API_KEY','xxxxxxxx')
pinecone.init(api_key=PINECONE_API_KEY,environment='gcp-starter')
index_name='supportkb'
if index_name not in pinecone.list_indexes():
    pinecone.create_index(index_name,dimension=1536,metric='cosine')
print('✅ Pinecone index ready')

# --------------------------------------------------
# 4. BUILD RETRIEVER
# --------------------------------------------------
loader=CSVLoader('/content/drive/MyDrive/kb_faq.txt',encoding='utf8')
docs=loader.load()
text_splitter=RecursiveCharacterTextSplitter(chunk_size=500,chunk_overlap=50)
texts=text_splitter.split_documents(docs)
embeddings=OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)
vectorstore=Pinecone.from_texts([t.page_content for t in texts],embeddings,index_name=index_name)
print('✅ Vector store populated')

# --------------------------------------------------
# 5. FINE-TUNE GPT-3.5-TURBO
# --------------------------------------------------
client=OpenAI(api_key=OPENAI_API_KEY)
file_resp=client.files.create(file=open('/content/drive/MyDrive/tuning.jsonl','rb'),purpose='fine-tune')
ft_resp=client.fine_tuning.create(training_file=file_resp.id,model='gpt-3.5-turbo',suffix='support')
print('🔄 Fine-tune job launched – takes ~15 min')
print('Job ID:',ft_resp.id)

# (wait loop)
while True:
    status=client.fine_tuning.retrieve(ft_resp.id).status
    print(status); time.sleep(30)
    if status=='succeeded': break
fine_tuned_model=client.fine_tuning.retrieve(ft_resp.id).fine_tuned_model
print('✅ Fine-tuned model:',fine_tuned_model)

# --------------------------------------------------
# 6. CREATE CHATBOT END-POINT
# --------------------------------------------------
from fastapi import FastAPI
from pydantic import BaseModel
app=FastAPI()
class Msg(BaseModel): user_input:str
@app.post("/chat")
def chat(msg:Msg):
    # retrieval augmented generation
    docs=vectorstore.similarity_search(msg.user_input,k=3)
    context='\n'.join([d.page_content for d in docs])
    system_prompt=f"You are OurShop support.  Use only the following knowledge base to answer.\n\n{context}"
    completion=client.chat.completions.create(
        model=fine_tuned_model,
        messages=[{"role":"system","content":system_prompt},
                  {"role":"user","content":msg.user_input}])
    answer=completion.choices[0].message.content
    return {"answer":answer}

# --------------------------------------------------
# 7. LOCAL DEMO
# --------------------------------------------------
def demo_chat(question):
    resp=chat(Msg(user_input=question))
    print("🤖:",resp['answer'])
demo_chat("How do I reset my password?")
demo_chat("Can I pay with Bitcoin?")

# --------------------------------------------------
# 8. SAVE ARTEFACTS
# --------------------------------------------------
artefacts={'model_name':fine_tuned_model,'index_name':index_name,'openai_key':OPENAI_API_KEY[:8]+'...'}
json.dump(artefacts,open('/content/drive/MyDrive/llm_chatbot.json','w'))
print('✅ Chatbot artefacts saved → Drive/llm_chatbot.json')