In [1]:
import requests
import json
from datetime import datetime as dt

import torch

if torch.backends.mps.is_available():
    print("MPS is available!")
    mps_device = torch.device("mps")
else:
    print("MPS is not available on this device.")
    mps_device = torch.device("cpu") # Fallback to CPU if MPS is not available

from storybot.models import Entry

MPS is available!


In [2]:
file_dir = "./data"

# Load conversations
with open("/".join([file_dir, 'conversations.json']), 'r') as f:
    conversations = json.load(f)

# Load discussions
with open("/".join([file_dir, 'discussions.json']), 'r') as f:
    discussions = json.load(f)

# Load activity data
with open("/".join([file_dir, 'activity.json']), 'r') as f:
    activities = json.load(f)

In [11]:
conversations[0]

{'messages_list': [{'ref_conversation_id': 98696,
   'ref_user_id': 782,
   'transaction_datetime_utc': '2023-10-01T10:15:00Z',
   'screen_name': 'ChattyPenguin',
   'message': 'Hello StoryBot, I’m having a tough time with this app. My fingers aren’t what they used to be. Can you help me?'},
  {'ref_conversation_id': 98696,
   'ref_user_id': 1,
   'transaction_datetime_utc': '2023-10-01T10:20:00Z',
   'screen_name': 'StoryBot',
   'message': "Hello ChattyPenguin! I’m here to help. Can you tell me what issues you're experiencing with the app?"},
  {'ref_conversation_id': 98696,
   'ref_user_id': 782,
   'transaction_datetime_utc': '2023-10-01T10:25:00Z',
   'screen_name': 'ChattyPenguin',
   'message': "It's just so complicated! I keep hitting the wrong buttons. And sometimes it logs me out unexpectedly."},
  {'ref_conversation_id': 98696,
   'ref_user_id': 1,
   'transaction_datetime_utc': '2023-10-01T10:30:00Z',
   'screen_name': 'StoryBot',
   'message': "That sounds frustrating! Unf

In [15]:
feature_url = "http://127.0.0.1:8000/extract_sentiment"
db_url = "http://127.0.0.1:8001/entry"

# conv = conversations[1]


headers = {
    "Content-Type": "application/json"
}

# Mimic a streaming conversation
for conv in conversations[:2]:
    for message in conv['messages_list']:
        # Inefficient multiple lookup
        user_id = message['ref_user_id']
        conversation_id = message['ref_conversation_id']
        screen_name = message['screen_name']
        
        if message['ref_user_id'] == 1:
            continue
        
        payload = {
            "message": message['message'],
            "metadata": {
                "user_id": user_id,
                "conversation_id": conversation_id,
                "screen_name": screen_name,
                "timestamp": message['transaction_datetime_utc']
            }
        }
        
        try:
            feature_response = requests.post(feature_url, headers=headers, json=payload)
        
            # Check if the request was successful (status code 2xx)
            if feature_response.status_code >= 200 and feature_response.status_code < 300:
                print("API Response:")
                print(json.dumps(feature_response.json(), indent=2))
            else:
                print(f"Error: API request failed with status code {response.status_code}")
                print(f"Response content: {feature_response.text}")
                
        except requests.exceptions.RequestException as e:
            print(f"An error occurred during the request: {e}")
    
        try:
            item = Entry(**feature_response.json())
            db_response = requests.post(db_url, headers=headers, data=item.model_dump_json())
          
            if db_response.status_code >= 200 and db_response.status_code < 300:
                print("DB API Response:")
                print(json.dumps(db_response.json(), indent=2))
            else: 
                print(f"Error: DB API request failed with status code {db_response.status_code}")
                print(f"Response content: {db_response.text}")
                
        except requests.exceptions.RequestException as e:
            print(f"An error occurred during the request: {e}")
            

API Response:
{
  "response": {
    "vader_scores": {
      "neg": 0.065,
      "neu": 0.819,
      "pos": 0.116,
      "compound": 0.296
    },
    "top_sentiment": {
      "label": "sadness",
      "score": 0.4427511990070343
    },
    "summary": {
      "summary_text": "Hello StoryBot, I\u2019m having a tough time with this app. My fingers aren\u2019t what they used to be. Can you help me?"
    },
    "metadata": {
      "user_id": 782,
      "conversation_id": 98696,
      "screen_name": "ChattyPenguin",
      "timestamp": "2023-10-01T10:15:00Z"
    }
  }
}
DB API Response:
{
  "response": {
    "id": 1
  }
}
API Response:
{
  "response": {
    "vader_scores": {
      "neg": 0.184,
      "neu": 0.816,
      "pos": 0.0,
      "compound": -0.5255
    },
    "top_sentiment": {
      "label": "surprise",
      "score": 0.7161477208137512
    },
    "summary": {
      "summary_text": "It's just so complicated! I keep hitting the wrong buttons. And sometimes it logs me out unexpectedly.

In [69]:
user_id = 782

# db_url = "http://127.0.0.1:8001/messages"
db_url = "http://127.0.0.1:8001/top-features"

headers = {
    "Content-Type": "application/json"
}

payload = {
    "user_id": user_id
}

resp = requests.post(db_url, headers=headers, json=payload)

In [70]:
resp = resp.json()['response']
resp

{'afraid': 2.7047480922384253,
 'afraid afraid': 2.7047480922384253,
 'afraid attacked': 2.7047480922384253,
 'afraid comes': 2.7047480922384253,
 'afraid dark': 2.7047480922384253}

In [63]:
from dataclasses import dataclass, field
from typing import List, Dict
from collections import defaultdict

from sklearn.feature_extraction.text import TfidfVectorizer
import scipy.sparse as sp


@dataclass
class MessageProcessor:

    conversation: List[dict] | dict
    messages: List = field(default_factory=list)

    tfidf_matrix: sp.spmatrix = None
    tfidf_vectorizer: TfidfVectorizer = None

    def __post_init__(self):
        # strip the 'response' key off
        if isinstance(self.conversation, dict) and 'response' in self.conversation.keys():
            self.conversation = self.conversation['response']

        self._process_messages()
    
    def _process_messages(self):
        for msg in self.conversation:
            self.messages.append(msg['message'])

    def set_tfidf_message_matrix(self):
        # self.tfidf_vectorizer = TfidfVectorizer()
        self.tfidf_vectorizer = TfidfVectorizer(
            stop_words='english', 
            ngram_range=(1, 2),
            token_pattern=r'\b[a-zA-Z]+\b'
        )

        self.tfidf_vectorizer.fit(self.messages)

        self.tfidf_matrix = self.tfidf_vectorizer.transform(self.messages)


    def get_top_features(self, max_features: int=5):
        
        feature_names = self.tfidf_vectorizer.get_feature_names_out()

        # Get the IDF (Inverse Document Frequency) scores
        idf_scores = self.tfidf_vectorizer.idf_
        
        # Create a list of (feature, idf_score) tuples
        feature_idf_pairs = list(zip(feature_names, idf_scores))
        
        # Sort the list by IDF score in descending order (higher IDF means more important)
        sorted_features_by_importance = sorted(feature_idf_pairs, key=lambda item: item[1], reverse=True)

        return sorted_features_by_importance[:max_features]

    

In [64]:
ms = MessageProcessor(resp)
ms.get_tfidf_message_matrix()

In [65]:
top = ms.get_top_features(max_features=10)
top

[('afraid', np.float64(2.7047480922384253)),
 ('afraid afraid', np.float64(2.7047480922384253)),
 ('afraid attacked', np.float64(2.7047480922384253)),
 ('afraid comes', np.float64(2.7047480922384253)),
 ('afraid dark', np.float64(2.7047480922384253)),
 ('afraid small', np.float64(2.7047480922384253)),
 ('amazing', np.float64(2.7047480922384253)),
 ('amazing sunsets', np.float64(2.7047480922384253)),
 ('app', np.float64(2.7047480922384253)),
 ('app fingers', np.float64(2.7047480922384253))]

In [66]:
top = {token: float(val) for token, val in top}
top

{'afraid': 2.7047480922384253,
 'afraid afraid': 2.7047480922384253,
 'afraid attacked': 2.7047480922384253,
 'afraid comes': 2.7047480922384253,
 'afraid dark': 2.7047480922384253,
 'afraid small': 2.7047480922384253,
 'amazing': 2.7047480922384253,
 'amazing sunsets': 2.7047480922384253,
 'app': 2.7047480922384253,
 'app fingers': 2.7047480922384253}