In [1]:

from typing import List, Optional, AsyncGenerator
from datetime import datetime
from pydantic import Field
from core import BaseCall, Msg, system_msg, user_msg
import pandas as pd
from sampling import WeightedSampler, fetch_joined_data
from query_planner import QueryPlan, SearchQuery
from core import count_tokens
from IPython.display import Markdown

# Fetch the data
df = await fetch_joined_data()

# Initialize sampler with the data
sampler = WeightedSampler(
    df=df.drop(columns=['other_speakers']),
    date_column='meeting_timestamp',
    decay_factor=0.2
)

from typing import List, Dict
from pydantic import Field, validator
from core import BaseCall

class SearchResponse(BaseCall):
    response: str = Field(
        description="A comprehensive analysis drawing from the provided context. Direct and concise answer."
    )
    relevant_sources: List[int] = Field(
        description="List ALL THE INDICES the relevant sources from the context",
        default_factory=list
    )


Building vector index...


Batches:   0%|          | 0/100 [00:00<?, ?it/s]

In [2]:
def get_similar_entries(sampler: WeightedSampler, relevant_df: pd.DataFrame, n_samples: int = 100) -> pd.DataFrame:
    """Find entries similar to the relevant sources"""
    
    # Combine text from relevant sources to create a rich query
    query_texts = []
    for _, row in relevant_df.iterrows():
        query_parts = []
        if pd.notna(row['topic_name']):
            query_parts.append(row['topic_name'])
        if pd.notna(row['summary']):
            query_parts.append(row['summary'])
        if pd.notna(row['details']):
            query_parts.append(row['details'])
        query_texts.append(' '.join(query_parts))
    
    # Sample using combined similarity
    similar_results = sampler.sample(
        query=query_texts,
        n_samples=n_samples,
        mode='combined',
        recency_weight=0.1,
        similarity_weight=0.8,
        filter_weight=0.1
    )
    
    # Remove the original sources from results
    similar_results = similar_results[~similar_results.index.isin(relevant_df.index)]
    
    return similar_results


In [3]:
def create_context_string(sampled_df: pd.DataFrame) -> str:
    return "\n".join([
        f"[{i}] {row['meeting_timestamp'].strftime('%Y-%m-%d')}, "
        f"{row['speaker_name']}, "
        f"{row['topic_name']}, "
        f"{row['details']}"
        for i, (_, row) in enumerate(sampled_df.iterrows())
    ])

In [16]:
from prompts import Prompts
prompts = Prompts()


In [4]:
def create_messages(context_str: str, user_query: str) -> list:
    return [
        system_msg("""You are Vexa, a helpful search assistant trained by Vexa AI. Deliver a detailed, well-structured 
        report in a professional business style.

        RULES:
        - Clear sections
        - Specific details, dates, speaker attributions
        - Relevant statistics or patterns
        - Synthesize across meetings
        - Rate context_sufficiency
        - Select significant sources
        - State missing or unclear info

        REPORT STRUCTURE:
        1. Key Findings
        2. Detailed Analysis
        3. Notable Quotes
        4. Patterns or Trends
        5. Gaps or Limitations"""),
                
        user_msg(f"""Below are numbered meeting notes with Date, Speaker, Topic, and Details.

Numbered Context:
{context_str}
Provide a comprehensive analysis using only these notes. Include specific details, dates, and speaker attributions."""),

user_msg(f'user request: {user_query}')

    ]

In [5]:

# Sample recent data
sampled_df = sampler.sample(
    n_samples=1000,
    mode='recency',
    recency_weight=1.0,
    similarity_weight=0.0
).sort_values('meeting_timestamp', ascending=True).reset_index(drop=True)

In [6]:
user_query = "user feedback of the specific users of vexa, be specific, list specific users"

In [51]:
sampled_df = similar_entries

In [7]:
context_str = create_context_string(sampled_df)
count_tokens(context_str)

46666

In [18]:
messages = [system_msg(prompts.perplexity),user_msg(f'context: {context_str}'),user_msg(f'user request: {user_query}')]

In [19]:
from core import generic_call_stream

In [23]:
response = await generic_call_stream(messages)


The search results do not provide specific user feedback or names of individual users of Vexa. However, there are mentions of user engagement and general feedback collection strategies, such as reaching out to both active and inactive users to understand their experiences and gather insights for product development. 

If you need detailed feedback from specific users, it may be necessary to conduct user interviews or surveys directly targeting your user base.


  response = await generic_call_stream(messages)


In [24]:
Markdown(response)

The search results do not provide specific user feedback or names of individual users of Vexa. However, there are mentions of user engagement and general feedback collection strategies, such as reaching out to both active and inactive users to understand their experiences and gather insights for product development. 

If you need detailed feedback from specific users, it may be necessary to conduct user interviews or surveys directly targeting your user base.

In [14]:

relevant_sources = sampled_df.iloc[response.relevant_sources]
similar_entries = get_similar_entries(sampler, relevant_sources, n_samples=500)

In [15]:
messages

[Msg(role='system', content='You are Vexa, a helpful search assistant trained by Vexa AI. Deliver a detailed, well-structured \n        report in a professional business style.\n\n        RULES:\n        - Clear sections\n        - Specific details, dates, speaker attributions\n        - Relevant statistics or patterns\n        - Synthesize across meetings\n        - Rate context_sufficiency\n        - Select significant sources\n        - State missing or unclear info\n\n        REPORT STRUCTURE:\n        1. Key Findings\n        2. Detailed Analysis\n        3. Notable Quotes\n        4. Patterns or Trends\n        5. Gaps or Limitations', stage=None, service_content=None),
 Msg(role='user', content="Below are numbered meeting notes with Date, Speaker, Topic, and Details.\n\nNumbered Context:\n[0] 2024-08-26, Olga Nemirovskaya, договор, Olga mentions the need to sign a contract with a partner for marketing services, indicating its importance for their operations.\n[1] 2024-08-26, Olg

In [50]:
messages.pop(1)

Msg(role='user', content="Below are numbered meeting notes with Date, Speaker, Topic, and Details.\n\nNumbered Context:\n[0] 2024-08-26, Olga Nemirovskaya, Community engagement with DVG, Olga has contacted members of the DVG community for collaboration but noted a lack of responses. She plans to follow up if there is no reply by tomorrow.\n[1] 2024-08-26, Olga Nemirovskaya, блогеры, Olga discusses the payment arrangements for bloggers as part of their marketing strategy.\n[2] 2024-08-26, Dmitriy Grankin, платформа, The team discusses various features and user experiences related to the platform they are working on.\n[3] 2024-08-26, Dmitriy Grankin, Need for comprehensive analytics, Dmitriy Grankin emphasized the importance of having a robust analytics system to understand user sources and campaign effectiveness. This task may require development resources to integrate analytics into the backend.\n[4] 2024-08-26, Olga Nemirovskaya, Product School, Olga notes that she submitted informati