In [1]:
from sentence_transformers import SentenceTransformer, util
import joblib
import numpy as np
import pandas as pd


In [2]:
embedder = SentenceTransformer('all-MiniLM-L6-v2')
model = joblib.load('questions-categorizer-v2-KNeighborsClassifier.model.joblib')

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


In [3]:
# Initialize fixed size dataframe to setup cache (LFU)
num_rows = 4  # Size of the cache
columns = ['Question', 'Response', 'Access Count']
# Create a list of dictionaries with default values
data = [{'Question': '', 'Response': '', 'Access Count': 0} for _ in range(num_rows)]
# Create a DataFrame from the list of dictionaries
cache_for_VM = pd.DataFrame(data, columns=columns)

print(cache_for_VM)


  Question Response  Access Count
0                               0
1                               0
2                               0
3                               0


In [4]:
# New values for the row
new_values = {'Question': 'How does TLB caching improve virtual memory performance?', 'Response': 'Resp 23', 'Access Count': 1}

print(new_values['Question'])

# Change the values using .loc[]
cache_for_VM.loc[2] = new_values

print(cache_for_VM.iloc[2])

How does TLB caching improve virtual memory performance?
Question        How does TLB caching improve virtual memory pe...
Response                                                  Resp 23
Access Count                                                    1
Name: 2, dtype: object


In [5]:
# encoded cache files
embeddings_cache_for_VM = embedder.encode(cache_for_VM['Question'])
print(embeddings_cache_for_VM)

[[-0.11883842  0.04829867 -0.00254811 ...  0.1264095   0.04654902
  -0.01571722]
 [-0.11883842  0.04829867 -0.00254811 ...  0.1264095   0.04654902
  -0.01571722]
 [ 0.00430944  0.05077931 -0.00995335 ... -0.06675059 -0.03632696
  -0.06660837]
 [-0.11883842  0.04829867 -0.00254811 ...  0.1264095   0.04654902
  -0.01571722]]


In [6]:
def give_the_response(new_request, category):
  encoded_new_request = embedder.encode(new_request)

  response = cache_handler(new_request, encoded_new_request, category)
  return response


In [7]:
def cache_handler(request, encoded_request, category):
  if category == "vm":
    cos_sim = util.cos_sim(embeddings_cache_for_VM, encoded_request)
    if (max(cos_sim) > 0.75):
      print("* * * * * * * From Cache * * * * * * * ")
      print(cos_sim.argmax().item())
      return cache_for_VM.iloc[cos_sim.argmax().item()][1]
    else:
      return call_API(request)
  # add other categories


In [124]:
def call_API(request):
  print("* * * * * * * Calling API * * * * * * * ")
  mock_resp = "API response for -> " + request
  #Add response and question to the cache
  new_value = {'Question': 'How does virtual memory support memory protection and isolation between processes?', 'Response': 'Resp 15', 'Access Count': 0}
  cacheVM.add_record(new_value)
  return mock_resp
    

In [9]:
test_sentence = "can you please tell me how can we improve performances of cache using TLB?"

In [10]:
category = "vm"
response_for_test_sentence = give_the_response(test_sentence, category)
print(response_for_test_sentence)

* * * * * * * From Cache * * * * * * * 
2
Resp 23


In [65]:
class CacheLFU:
    def __init__(self, category):
        """
        Create a new cache object.

        Args:
            category: The category of the cache.  

        Returns:
            None.
        """
        self.category = category
        self.size = 4
        # Initialize fixed size dataframe
        columns = ['Question', 'Response', 'Access Count']
        # Create a list of dictionaries with default values
        data = [{'Question': '', 'Response': '', 'Access Count': 0} for _ in range(self.size)]
        # Create a DataFrame from the list of dictionaries
        self.cache_df = pd.DataFrame(data, columns=columns)
        #encode questions
        self.embeddings_cache = embedder.encode(self.cache_df['Question'])
        print(self.category, "cache initialized")
        print(len(self.embeddings_cache))   
        
    def add_record(self, new_values):
        """
        Add a new record to the cache.

        Args:
            new_values (dict): The new record values. Ex - {'Question': 'How does TLB caching improve virtual memory performance?', 'Response': 'Resp 23', 'Access Count': 0}

        Returns:
            None.
        """

        # Check if the cache is full.
        if len(self.cache_df) == self.size:
            # Get the least accessed question in the cache.
            least_accessed_question = self.cache_df[self.cache_df['Access Count'] == self.cache_df['Access Count'].min()].iloc[0]
            print("Least accessed question:", least_accessed_question)
            removing_record_index = least_accessed_question.name
            print("Removing record with index", removing_record_index)
            
            self.cache_df.loc[removing_record_index] = new_values
            
            # Encode the new question
            encoded_question = embedder.encode(new_values['Question'])
            
            # Update the embeddings cache.
            self.embeddings_cache[removing_record_index] = encoded_question
    
    def update_count(self, index):
        """
        Update the access count of a record.

        Args:
            index (int): The index of the record to be updated.

        Returns:
            None.
        """
        self.cache_df.loc[index, 'Access Count'] += 1

    def get_response(self, index):
        """
        Get the response for a record.

        Args:
            index (int): The index of the record to be retrieved.

        Returns:
            str: The response for the record.
        """
        return self.cache_df.loc[index, 'Response']


In [66]:
cacheVM = CacheLFU("VM")

VM cache initialized
4


In [67]:
def cache_handler_v2(request, encoded_request, category):
  if category == "vm":
    cos_sim = util.cos_sim(cacheVM.embeddings_cache, encoded_request)
    if (max(cos_sim) > 0.75):
      print("* * * * * * * From Cache * * * * * * * ")
      print(cos_sim.argmax().item())
      # Update the access count
      cacheVM.update_count(cos_sim.argmax().item())
      return cacheVM.get_response(cos_sim.argmax().item())
    else:
      return call_API(request)
  # add other categories


In [68]:
def give_the_response_v2(new_request, category):
  encoded_new_request = embedder.encode(new_request)

  response = cache_handler_v2(new_request, encoded_new_request, category)
  return response


In [129]:
test_sentence = "can you please tell me how virtual memory support memory protection and isolation between processes?"
category = "vm"
response_for_test_sentence = give_the_response_v2(test_sentence, category)
print(response_for_test_sentence)

* * * * * * * From Cache * * * * * * * 
2
Resp 15


In [130]:
print(cacheVM.cache_df)

                                            Question Response  Access Count
0       How does swapping affect system performance?  Resp 17             4
1  How does TLB caching improve virtual memory pe...  Resp 23            15
2  How does virtual memory support memory protect...  Resp 15             2
3  How does virtual memory help in managing memor...   Resp 9             4


In [99]:
print(cacheVM.embeddings_cache)

[[ 0.00736909  0.00921233 -0.01392672 ... -0.05852419 -0.09660058
  -0.09026206]
 [ 0.00430942  0.05077929 -0.00995335 ... -0.06675062 -0.03632697
  -0.06660838]
 [-0.11883846  0.04829861 -0.00254809 ...  0.12640952  0.04654904
  -0.01571725]
 [-0.11883838  0.04829865 -0.00254807 ...  0.12640947  0.04654909
  -0.01571732]]
