In [1]:
import asyncio
import re
from typing import List, Optional
from datetime import datetime

from pydantic import BaseModel, Field

from vexa import VexaAPI
from qdrant_search import QdrantSearchEngine

from core import system_msg, user_msg, assistant_msg, generic_call_stream, count_tokens, BaseCall
from prompts import Prompts
from pydantic_models import ThreadName
from thread_manager import ThreadManager
from core import generic_call_
import pandas as pd

In [2]:

class SearchResult(BaseModel):
    output: str
    messages: List[dict]
    meeting_ids: List[str]
    full_context: str
    thread_id: str
    thread_name: str
    indexed_meetings: dict
    linked_output: str


class SearchAssistant:
    def __init__(self):
        self.search_engine = QdrantSearchEngine()
        self.thread_manager = None  # Initialize to None
        self.prompts = Prompts()
        self.model = "gpt-4o-mini"
        self.indexing_jobs = {}
        
    async def initialize(self):
       self.thread_manager = await ThreadManager.create()  # Use the async create method
       
    
    async def get_thread(self, thread_id: str):
        return await self.thread_manager.get_thread(thread_id)

    async def get_user_threads(self, user_id: str):
        return await self.thread_manager.get_user_threads(user_id)

    async def count_documents(self, user_id: str):
        return await self.analyzer.count_documents(user_id=user_id)

    async def get_messages_by_thread_id(self, thread_id: str):
        return await self.thread_manager.get_messages_by_thread_id(thread_id)

    async def delete_thread(self, thread_id: str) -> bool:
        return await self.thread_manager.delete_thread(thread_id)

    async def is_indexing(self, user_id: str) -> bool:
        return self.indexing_jobs.get(user_id, False)

    async def remove_user_data(self, user_id: str) -> int:
        return await self.analyzer.remove_user_data(user_id)

    # The following methods should be updated to be async if they involve I/O operations
    async def parse_refs(self, text):
        pattern = r'\[(\d+)\]'
        return list(set(re.findall(pattern, text)))

    async def get_indexed_meetings(self, meeting_ids, refs):
        indexed_meetings = {}
        for i, meeting_id in enumerate(meeting_ids):
            if str(i + 1) in refs:
                indexed_meetings[str(i + 1)] = meeting_id
        return indexed_meetings

    async def embed_links(self, text, url_dict):
        for key, url in url_dict.items():
            text = text.replace(f'[{key}]', f'[{key}]({url})')
        return text
    
    async def search(self, query: str) -> tuple[str, list]:
        # Get search results
        main_results = await self.search_engine.search(
            query_text=query,
            limit=200,
            min_score=0.4,
        )

        speaker_results = await self.search_engine.search_by_speaker(
            speaker_query=query,
            limit=200,
            min_score=0.49
        )

        # Process results into DataFrames
        main_df = pd.DataFrame(main_results) if main_results else pd.DataFrame()
        speaker_df = pd.DataFrame(speaker_results) if speaker_results else pd.DataFrame()

        # Select relevant columns and combine results
        columns = ['topic_name', 'speaker_name', 'summary', 'details','meeting_id','timestamp']
        score_columns = ['score', 'vector_scores', 'exact_matches']

        if len(main_df) > 0:
            main_df = main_df[columns + score_columns]
            main_df['source'] = 'main'
        else:
            main_df = pd.DataFrame(columns=columns + score_columns + ['source'])

        if len(speaker_df) > 0:
            speaker_df = speaker_df[columns + ['score']]  # Speaker search has simpler scoring
            speaker_df['source'] = 'speaker'
        else:
            speaker_df = pd.DataFrame(columns=columns + ['score', 'source'])

        # Combine, deduplicate and sort results
        self.results = pd.concat([main_df, speaker_df]).drop_duplicates(subset=columns).reset_index(drop=True)
        if not self.results.empty:
            self.results = self.results.sort_values('score', ascending=False)
            
        return self.results


In [3]:
def prep_context(search_results):
    columns = ['topic_name', 'speaker_name', 'summary', 'details','meeting_id']
    context = search_results[columns].to_markdown(index=False) if not search_results.empty else "No relevant context found."
    return context

In [4]:
import numpy as np

In [5]:
import pandas as pd

def normalize_series(series: pd.Series) -> pd.Series:

    min_value = series.min()
    max_value = series.max()
    normalized = (series - min_value) / (max_value - min_value)
    return normalized * 0.5 + 0.5  # Scale to range [0.5, 1]

In [35]:
prompts = Prompts()

In [36]:
self = SearchAssistant()
await self.initialize()


In [37]:
query = 'vexa'
thread_id = None


In [38]:
search_results = await self.search('vexa')

In [39]:
if thread_id:
    thread = await self.thread_manager.get_thread(thread_id)
    if not thread:
        raise ValueError(f"Thread with id {thread_id} not found")
    messages = thread.messages
    thread_name = thread.thread_name
else:
    messages = []
    thread_name = None

In [48]:
def prep_context(search_results: pd.DataFrame) -> str:
    search_results['relevance_score'] = normalize_series(search_results['score']).round(2)
    search_results = search_results.sort_values('timestamp').reset_index(drop=True)
    search_results['datetime'] = pd.to_datetime(search_results['timestamp']).dt.strftime('%A %Y-%m-%d %H:%M')

    meetinds_df = search_results[['meeting_id']].drop_duplicates().reset_index(drop=True)
    meetinds_df['meeting_index'] = meetinds_df.index + 1
    prepared_df = search_results.drop(columns=['timestamp', 'vector_scores', 'exact_matches', 'source', 'score']).merge(meetinds_df, on='meeting_id').drop(columns=['meeting_id'])
    
    meetings = meetinds_df.to_dict(orient='records')
    
    return prepared_df.to_markdown(index=False) if not prepared_df.empty else "No relevant context found.",  {meeting['meeting_index']: meeting['meeting_id'] for meeting in meetings}




In [67]:
context, indexed_meetings = prep_context(search_results)

In [68]:
url_dict = {k: f'https://dashboard.vexa.ai/#{v}' for k, v in indexed_meetings.items()}

In [69]:

context_msg = system_msg(f"Context: {context}")

# Generate response
messages = [
    system_msg(self.prompts.perplexity),
    *messages,
    context_msg
]


In [70]:
response = await generic_call_stream(messages)

Vexa is an AI meeting assistant designed to enhance the efficiency of meetings by providing real-time transcription and contextual support. It captures spoken dialogue during meetings, allowing participants to follow along with a live transcript. Vexa also features an Assistant tab that offers summaries and additional assistance, making it a comprehensive tool for managing meeting discussions[1][5][10].

The product is currently in the testing phase, with plans for a paid subscription model to generate revenue. User feedback is being actively sought to refine its features and improve user experience. Vexa aims to assist users in capturing and understanding information in real-time, particularly in diverse professional settings[1][9][54].


In [87]:
output = ""
async for chunk in generic_call_(messages,streaming=True):
    output += chunk
   # yield chunk

In [96]:
async def embed_links(text, url_dict):
    # First, add a space between consecutive reference numbers
    text = re.sub(r'\]\[', '] [', text)
    
    # Then replace each reference with its link
    for key, url in url_dict.items():
        text = text.replace(f' [{key}]', f'[{key}]({url})')
    return text

In [97]:
linked_output = await embed_links(output, url_dict)

In [98]:
print(linked_output)

Vexa is an AI meeting assistant designed to facilitate meetings by providing real-time transcription and assistance. It captures spoken dialogue during meetings, allowing participants to follow along with a live written record. Vexa also features an Assistant tab that provides access to summaries, questions, and additional support during discussions[1](https://dashboard.vexa.ai/#c5828919-ac2d-4fe9-bd7a-8eaa91f743c8) [5](https://dashboard.vexa.ai/#ba7c1f92-a033-4302-bc60-a57ef802d232) [10](https://dashboard.vexa.ai/#1b8005e2-bf6f-461f-813e-2955c2e8d46a). 

The product is currently in the testing phase, focusing on enhancing user experience and functionality based on feedback. It aims to improve corporate communication by structuring and returning relevant information in real-time, making it particularly useful for users interacting with diverse accents and industries[1](https://dashboard.vexa.ai/#c5828919-ac2d-4fe9-bd7a-8eaa91f743c8) [9](https://dashboard.vexa.ai/#e1d2fbb4-8f91-42bd-99d

In [99]:
from IPython.display import Markdown
Markdown(linked_output)

Vexa is an AI meeting assistant designed to facilitate meetings by providing real-time transcription and assistance. It captures spoken dialogue during meetings, allowing participants to follow along with a live written record. Vexa also features an Assistant tab that provides access to summaries, questions, and additional support during discussions[1](https://dashboard.vexa.ai/#c5828919-ac2d-4fe9-bd7a-8eaa91f743c8) [5](https://dashboard.vexa.ai/#ba7c1f92-a033-4302-bc60-a57ef802d232) [10](https://dashboard.vexa.ai/#1b8005e2-bf6f-461f-813e-2955c2e8d46a). 

The product is currently in the testing phase, focusing on enhancing user experience and functionality based on feedback. It aims to improve corporate communication by structuring and returning relevant information in real-time, making it particularly useful for users interacting with diverse accents and industries[1](https://dashboard.vexa.ai/#c5828919-ac2d-4fe9-bd7a-8eaa91f743c8) [9](https://dashboard.vexa.ai/#e1d2fbb4-8f91-42bd-99d2-438d13778da9) [24](https://dashboard.vexa.ai/#f0d5f231-b866-4cb4-b59d-73899f2e0dc9). 

Vexa operates as a Chrome extension, which minimizes its presence during calls, and it is being marketed through various strategies, including influencer partnerships and social media campaigns[1](https://dashboard.vexa.ai/#c5828919-ac2d-4fe9-bd7a-8eaa91f743c8) [54](https://dashboard.vexa.ai/#70cd7290-801a-4caf-9786-359cc6e16c60).

In [None]:
messages.append(user_msg(query))
messages.append(assistant_msg(msg=linked_output, service_content=output))

In [29]:
async def parse_refs(text):
    pattern = r'\[(\d+)\]'
    refs = list(set(re.findall(pattern, text)))
    
    return [int(r) for r in refs]

In [30]:
await parse_refs(response)

[24, 54, 5, 12]

In [7]:

async def chat(self, user_id: str, query: str, user_name: str='', thread_id: Optional[str] = None, model: Optional[str] = None, temperature: Optional[float] = None, debug: bool = False):
    if thread_id:
        thread = await self.thread_manager.get_thread(thread_id)
        if not thread:
            raise ValueError(f"Thread with id {thread_id} not found")
        messages = thread.messages
        thread_name = thread.thread_name
    else:
        messages = []
        thread_name = None

    query_ = ' '.join([m.content for m in messages]) + ' ' + query
    queries = await self.analyzer.generate_search_queries(query_, user_id=user_id, user_name=user_name)
    
    summaries = await self.analyzer.get_summaries(user_id=user_id, user_name=user_name)
    full_context, meeting_ids = await self.analyzer.build_context(queries, summaries, include_all_summaries=False, user_id=user_id, user_name=user_name, k=20)

    pref = "Based on the following context, answer the question:" if len(messages) == 0 else "Follow-up request:"
    user_info = f"The User is {user_name}"
    messages_context = [
        system_msg(self.prompts.perplexity + f'. {user_info}'), 
        user_msg(f"Context:\n{full_context}"),
    ] + messages + [user_msg(f"{pref} {query}. Always supply references to meetings as [1][2][3] etc.")]

    model_to_use = model or self.model

    output = ""
    async for chunk in generic_call_(messages_context, model=model_to_use, temperature=temperature, streaming=True):
        output += chunk
        yield chunk
    
    indexed_meetings = await self.get_indexed_meetings(meeting_ids, await self.parse_refs(output))
    url_dict = {k: f'https://dashboard.vexa.ai/#{v}' for k, v in indexed_meetings.items()}
    linked_output = await self.embed_links(output, url_dict)
    
    messages.append(user_msg(query))
    messages.append(assistant_msg(msg=linked_output, service_content=output))

    if not thread_id:
        messages_str = ';'.join([m.content for m in messages if m.role == 'user'])
        thread_name = await ThreadName.call([user_msg(messages_str)])
        thread_name = thread_name[0].thread_name
        thread_id = await self.thread_manager.upsert_thread(user_id=user_id, thread_name=thread_name, messages=messages)
    else:
        await self.thread_manager.upsert_thread(user_id=user_id, messages=messages, thread_id=thread_id)

    result = {
        "thread_id": thread_id,
        "linked_output": linked_output
    }

    if debug:
        result.update({
            "output": output,
            "summaries": summaries,
            "full_context": full_context,
            "meeting_ids": meeting_ids,
            "queries": queries,
        })

    yield result

