In [1]:
pip install trulens

Note: you may need to restart the kernel to use updated packages.




In [None]:
from llama_cpp import Llama
import gradio as gr
import os
from trulens_eval import Feedback, TruLlama
from trulens_eval.feedback import Feedback
from trulens_eval.feedback.provider import Provider
from trulens_eval.tru_custom_app import instrument
import numpy as np
from textblob import TextBlob
import nltk
import json
from datetime import datetime
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
import re
from deepeval import evaluate, assert_test
from deepeval.metrics import AnswerRelevancyMetric
from deepeval.test_case import LLMTestCase

# Download required NLTK data
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('averaged_perceptron_tagger')

# Define file paths
PERSONALITIES_FILE = "D:\All_Coding_stuff\Sora\Production\Personalities\Personalities.txt"
CONVERSATIONS_DIR = "conversations"
CONTEXT_FILE = "conversation_history.txt"
PROMPT_FILE = "custom_prompt.txt"
EVAL_FILE = "model_evaluations.txt"

class PromptManager:
    def __init__(self):
        self.default_prompt = """
You are Sora, Arnav Mishra's personal assistant, he is your boss. Arnav Mishra is a human. Arnav is busy till 10:00 AM. Be respectful to everyone.
        """.strip()
        
    def load_prompt(self):
        """Load custom prompt from file or return default if file doesn't exist"""
        if os.path.exists(PROMPT_FILE):
            try:
                with open(PROMPT_FILE, 'r', encoding='utf-8') as file:
                    prompt = file.read().strip()
                return prompt if prompt else self.default_prompt
            except Exception as e:
                print(f"Error loading prompt: {e}")
                return self.default_prompt
        return self.default_prompt
    
    def save_prompt(self, prompt):
        """Save a new custom prompt to file"""
        try:
            with open(PROMPT_FILE, 'w', encoding='utf-8') as file:
                file.write(prompt)
            return True
        except Exception as e:
            print(f"Error saving prompt: {e}")
            return False
    
    def update_prompt(self, new_prompt):
        """Update the custom prompt"""
        return self.save_prompt(new_prompt)
    
    def reset_prompt(self):
        """Reset prompt to default"""
        return self.save_prompt(self.default_prompt)

class PersonalityManager:
    def __init__(self):
        self.personalities_file = PERSONALITIES_FILE
        self.personalities = self.load_personalities()
        self.save_personalities()
        
    def load_personalities(self):
        try:
            if os.path.exists(self.personalities_file):
                with open(self.personalities_file, 'r', encoding='utf-8') as file:
                    return json.load(file)
        except Exception as e:
            print(f"Error loading personalities: {e}")
            
        return {
            "default": {
                "name": "Arnav Mishra",
                "role": "Boss",
                "description": "Creator and boss of Sora",
                "relationships": {
                    "Sora": {
                        "role": "AI Assistant",
                        "description": "Personal AI assistant to Arnav Mishra"
                    }
                },
                "preferences": {
                    "schedule": {
                        "busy_until": "10:00 AM"
                    }
                }
            }
        }
    
    def save_personalities(self):
        try:
            os.makedirs(os.path.dirname(self.personalities_file), exist_ok=True)
            with open(self.personalities_file, 'w', encoding='utf-8') as file:
                json.dump(self.personalities, file, indent=4)
            return True
        except Exception as e:
            print(f"Error saving personalities: {e}")
            return False
    
    def get_relationships(self):
        try:
            return self.personalities["default"].get("relationships", {})
        except Exception as e:
            print(f"Error getting relationships: {e}")
            return {}
    
    def add_relationship(self, person_name, role, description=""):
        try:
            if "default" not in self.personalities:
                self.personalities["default"] = {
                    "name": "Arnav Mishra",
                    "relationships": {}
                }
            
            if "relationships" not in self.personalities["default"]:
                self.personalities["default"]["relationships"] = {}
            
            self.personalities["default"]["relationships"][person_name] = {
                "role": role,
                "description": description
            }
            return self.save_personalities()
        except Exception as e:
            print(f"Error adding relationship: {e}")
            return False

class ConversationManager:
    def __init__(self):
        if not os.path.exists(CONVERSATIONS_DIR):
            os.makedirs(CONVERSATIONS_DIR)
        
    def start_new_conversation(self):
        conversation_id = len(os.listdir(CONVERSATIONS_DIR)) + 1
        filename = f"conversation_{conversation_id}.txt"
        filepath = os.path.join(CONVERSATIONS_DIR, filename)
        with open(filepath, 'w', encoding='utf-8') as file:
            file.write(f"Conversation started at: {datetime.now()}\n\n")
        return conversation_id
    
    def save_message(self, conversation_id, message, response):
        filename = f"conversation_{conversation_id}.txt"
        filepath = os.path.join(CONVERSATIONS_DIR, filename)
        with open(filepath, 'a', encoding='utf-8') as file:
            file.write(f"User: {message}\nAssistant: {response}\n\n")

class LocalFeedbackProvider(Provider):
    def __init__(self):
        super().__init__()
        
    def sentiment_analysis(self, text: str) -> float:
        blob = TextBlob(text)
        return (blob.sentiment.polarity + 1) / 2
    
    def complexity_score(self, text: str) -> float:
        words = word_tokenize(text)
        if not words:
            return 0.0
        
        avg_word_length = sum(len(word) for word in words) / len(words)
        sentences = text.split('.')
        avg_sentence_length = len(words) / max(len(sentences), 1)
        
        complexity = (avg_word_length / 10 + avg_sentence_length / 20) / 2
        return min(max(complexity, 0.0), 1.0)
    
    def consistency_check(self, text: str) -> float:
        sentences = text.split('.')
        if len(sentences) <= 1:
            return 1.0
        
        sentiments = [TextBlob(sent).sentiment.polarity for sent in sentences if sent.strip()]
        if not sentiments:
            return 0.0
        
        variance = np.var(sentiments)
        consistency = 1 / (1 + variance)
        return float(consistency)
    
    def response_length_quality(self, text: str) -> float:
        words = word_tokenize(text)
        word_count = len(words)
        
        min_words = 20
        max_words = 150
        optimal_range = (50, 100)
        
        if word_count < min_words:
            return word_count / min_words
        elif word_count > max_words:
            return max_words / word_count
        elif optimal_range[0] <= word_count <= optimal_range[1]:
            return 1.0
        else:
            return 0.8

class ModelEvaluator:
    def __init__(self):
        self.feedback_provider = LocalFeedbackProvider()
        
        # Initialize basic NLTK resources with error handling
        try:
            nltk.download('stopwords', quiet=True)
            nltk.download('averaged_perceptron_tagger', quiet=True)
        except Exception as e:
            print(f"Warning: Error downloading NLTK data: {e}")
            
        # Initialize stopwords with fallback
        try:
            self.stop_words = set(stopwords.words('english'))
        except:
            self.stop_words = set(['i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', 
                                "you're", "you've", "you'll", "you'd", 'your', 'yours', 'yourself', 
                                'yourselves', 'he', 'him', 'his', 'himself', 'she', "she's", 'her', 
                                'hers', 'herself', 'it', "it's", 'its', 'itself', 'they', 'them', 
                                'their', 'theirs', 'themselves', 'what', 'which', 'who', 'whom', 
                                'this', 'that', "that'll", 'these', 'those', 'am', 'is', 'are', 
                                'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'having', 
                                'do', 'does', 'did', 'doing', 'a', 'an', 'the', 'and', 'but', 'if', 
                                'or', 'because', 'as', 'until', 'while', 'of', 'at', 'by', 'for', 
                                'with', 'about', 'against', 'between', 'into', 'through', 'during', 
                                'before', 'after', 'above', 'below', 'to', 'from', 'up', 'down', 
                                'in', 'out', 'on', 'off', 'over', 'under', 'again', 'further', 
                                'then', 'once'])
    
    def evaluate_response(self, query: str, response: str) -> dict:
        try:
            # Get basic metrics using LocalFeedbackProvider
            sentiment_score = self.feedback_provider.sentiment_analysis(response)
            complexity_score = self.feedback_provider.complexity_score(response)
            consistency_score = self.feedback_provider.consistency_check(response)
            length_quality = self.feedback_provider.response_length_quality(response)
            
            # Simple word-based relevancy calculation
            query_words = set(w.lower() for w in query.split() if w.lower() not in self.stop_words)
            response_words = set(w.lower() for w in response.split() if w.lower() not in self.stop_words)
            
            # Remove punctuation
            query_words = {word.strip('.,!?()[]{}:;"\'') for word in query_words}
            response_words = {word.strip('.,!?()[]{}:;"\'') for word in response_words}
            
            # Calculate word overlap
            word_overlap = len(query_words.intersection(response_words)) / len(query_words) if query_words else 0
            
            # Calculate final relevancy score
            relevancy_score = (word_overlap + sentiment_score) / 2
            
            return {
                "relevancy": {
                    "score": round(relevancy_score, 3),
                    "reason": self._generate_reason(
                        relevancy_score,
                        sentiment_score,
                        complexity_score,
                        consistency_score,
                        length_quality
                    )
                },
                "additional_metrics": {
                    "sentiment": round(sentiment_score, 3),
                    "complexity": round(complexity_score, 3),
                    "consistency": round(consistency_score, 3),
                    "length_quality": round(length_quality, 3)
                }
            }
            
        except Exception as e:
            print(f"Error in evaluation: {e}")
            return {
                "relevancy": {
                    "score": 0.0,
                    "reason": "Error occurred during evaluation"
                },
                "additional_metrics": {
                    "sentiment": 0.0,
                    "complexity": 0.0,
                    "consistency": 0.0,
                    "length_quality": 0.0
                }
            }

    def _generate_reason(self, relevancy, sentiment, complexity, consistency, length_quality):
        """Generate a human-readable explanation of the scores"""
        reasons = []
        
        if relevancy >= 0.8:
            reasons.append("Response is highly relevant to the query")
        elif relevancy >= 0.5:
            reasons.append("Response is moderately relevant to the query")
        else:
            reasons.append("Response could be more relevant to the query")
            
        if sentiment >= 0.7:
            reasons.append("Appropriate emotional tone")
        
        if consistency >= 0.8:
            reasons.append("Good internal consistency")
        elif consistency < 0.5:
            reasons.append("Could improve response consistency")
            
        if length_quality >= 0.8:
            reasons.append("Good response length")
        elif length_quality < 0.5:
            reasons.append("Response length could be adjusted")
            
        if complexity >= 0.7:
            reasons.append("Well-balanced complexity")
        elif complexity < 0.3:
            reasons.append("Response could be more detailed")
            
        return " | ".join(reasons)
def load_gguf_model(model_path, n_ctx=2048, n_gpu_layers=0):
    try:
        model = Llama(
            model_path=model_path,
            n_ctx=n_ctx,
            n_gpu_layers=n_gpu_layers
        )
        return model
    except Exception as e:
        print(f"Error loading model: {str(e)}")
        return None

def get_personality_context():
    try:
        relationships = personality_manager.get_relationships()
        context = "Personal Context:\n"
        
        context += f"- Main User: {personality_manager.personalities['default']['name']}\n"
        
        if relationships:
            context += "\nKnown Relationships:\n"
            for person, details in relationships.items():
                context += f"- {person}: {details['role']}"
                if details.get('description'):
                    context += f" ({details['description']})"
                context += "\n"
        
        if 'preferences' in personality_manager.personalities['default']:
            prefs = personality_manager.personalities['default']['preferences']
            context += "\nPreferences:\n"
            if 'schedule' in prefs:
                context += f"- Schedule: Busy until {prefs['schedule']['busy_until']}\n"
        
        return context
    except Exception as e:
        print(f"Error generating personality context: {e}")
        return "Error loading personality context"

def chat_with_model(message, history):
    global current_conversation_id
    
    if current_conversation_id is None:
        current_conversation_id = conversation_manager.start_new_conversation()
    
    try:
        personality_context = get_personality_context()
        system_prompt = prompt_manager.load_prompt()
        
        conversation = f"{system_prompt}\n{personality_context}\nUser: {message}\nAssistant:"

        response = llm.create_completion(
            conversation,
            max_tokens=150,
            temperature=0.7,
            stop=["User:", "Assistant:"]
        )

        response_text = response['choices'][0]['text'].strip()
        
        # Save the conversation
        conversation_manager.save_message(current_conversation_id, message, response_text)
        
        try:
            # Get the evaluation results
            evaluation = evaluator.evaluate_response(message, response_text)
            
            if evaluation:
                eval_summary = "\n\nEvaluation Metrics:\n"
                eval_summary += f"Relevancy Score: {evaluation['relevancy']['score']}\n"
                eval_summary += f"Sentiment: {evaluation['additional_metrics']['sentiment']}\n"
                eval_summary += f"Complexity: {evaluation['additional_metrics']['complexity']}\n"
                eval_summary += f"Consistency: {evaluation['additional_metrics']['consistency']}\n"
                eval_summary += f"Length Quality: {evaluation['additional_metrics']['length_quality']}\n"
                eval_summary += f"\nAnalysis: {evaluation['relevancy']['reason']}"
                response_text += eval_summary
        except Exception as e:
            print(f"Evaluation error: {e}")
            # Continue without evaluation if it fails
            
        return response_text
    
    except Exception as e:
        print(f"Error in chat: {e}")
        return "An error occurred while processing your request."


# Initialize all components
prompt_manager = PromptManager()
personality_manager = PersonalityManager()
conversation_manager = ConversationManager()
evaluator = ModelEvaluator()
current_conversation_id = None

# Load the model
model_path = r"llama-3-8b-Instruct-bnb-4bit-scrapegraph-companion-unsloth.Q4_K_M.gguf"
llm = load_gguf_model(model_path)

# Create Gradio interface
demo = gr.ChatInterface(
    fn=chat_with_model,
    title="Enhanced GGUF Model Chatbot",
    description="Chat with your local GGUF model with personality awareness and conversation history",
    examples=["Tell me about Arnav's friends", "What is your role as Sora?", "Who are the people you know?"],
    theme="default"
)

# Launch the interface
if __name__ == "__main__":
    # Add some initial relationships for testing
    personality_manager.add_relationship(
        "Alice", 
        "Colleague",
        "Works with Arnav on AI projects"
    )
    personality_manager.add_relationship(
        "Bob",
        "Friend",
        "College friend and regular gaming partner"
    )
    
    demo.launch(share=True)  # Set share=False to disable public URL

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\mishr\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\mishr\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\mishr\AppData\Roaming\nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
llama_model_loader: loaded meta data with 23 key-value pairs and 291 tensors from llama-3-8b-Instruct-bnb-4bit-scrapegraph-companion-unsloth.Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = llama-3

* Running on local URL:  http://127.0.0.1:7862

Could not create share link. Please check your internet connection or our status page: https://status.gradio.app.


llama_perf_context_print:        load time =    8587.73 ms
llama_perf_context_print: prompt eval time =    8587.28 ms /   186 tokens (   46.17 ms per token,    21.66 tokens per second)
llama_perf_context_print:        eval time =   10184.20 ms /    32 runs   (  318.26 ms per token,     3.14 tokens per second)
llama_perf_context_print:       total time =   18822.09 ms /   218 tokens


Error in evaluation: 
**********************************************************************
  Resource [93mpunkt_tab[0m not found.
  Please use the NLTK Downloader to obtain the resource:

  [31m>>> import nltk
  >>> nltk.download('punkt_tab')
  [0m
  For more information see: https://www.nltk.org/data.html

  Attempted to load [93mtokenizers/punkt_tab/english/[0m

  Searched in:
    - 'C:\\Users\\mishr/nltk_data'
    - 'C:\\Program Files\\WindowsApps\\PythonSoftwareFoundation.Python.3.11_3.11.2544.0_x64__qbz5n2kfra8p0\\nltk_data'
    - 'C:\\Program Files\\WindowsApps\\PythonSoftwareFoundation.Python.3.11_3.11.2544.0_x64__qbz5n2kfra8p0\\share\\nltk_data'
    - 'C:\\Program Files\\WindowsApps\\PythonSoftwareFoundation.Python.3.11_3.11.2544.0_x64__qbz5n2kfra8p0\\lib\\nltk_data'
    - 'C:\\Users\\mishr\\AppData\\Roaming\\nltk_data'
    - 'C:\\nltk_data'
    - 'D:\\nltk_data'
    - 'E:\\nltk_data'
**********************************************************************

