In [1]:
%load_ext autoreload
%autoreload 2


In [9]:
import pandas as pd
from core import system_msg, user_msg,assistant_msg


In [82]:
from sampling import fetch_joined_data
df = await fetch_joined_data()
df = df.sort_values(['meeting_timestamp'])
from elastic_search import VectorSearchEngine
search_engine = VectorSearchEngine(device=0)
search_engine.create_index(df)

Using device: cuda:0
Creating field-specific indices...
Processing topic_name...


  0%|          | 0/400 [00:00<?, ?it/s]

Processing summary...


  0%|          | 0/400 [00:00<?, ?it/s]

Processing details...


  0%|          | 0/400 [00:00<?, ?it/s]

In [144]:
from typing import List, Optional
from pydantic import BaseModel, Field
from core import BaseCall, system_msg, user_msg
import pandas as pd
from datetime import datetime
from typing import List, Optional, Set
from pydantic import BaseModel, Field
from core import BaseCall, system_msg, user_msg, assistant_msg
import pandas as pd
from datetime import datetime

class FollowUpSearch(BaseModel):
    query: str = Field(..., description="Follow-up search query")
    rationale: str = Field(..., description="Reason for this follow-up search")
    priority: int = Field(..., ge=1, le=5, description="Priority level (1-5)")
    focus_areas: List[str] = Field(..., description="Specific areas or topics to focus on")

class ReportSection(BaseModel):
    title: str = Field(..., description="Section title")
    content: str = Field(..., description="Section content")
    confidence: float = Field(..., ge=0, le=1, description="Confidence score for this section")
    sources: List[int] = Field(..., description="Source indices that support this section")


class SearchReport(BaseCall):
    """Generates structured report and follow-up searches from search results"""
    
    report_sections: List[ReportSection] = Field(
        ..., 
        description="Organized sections of the report"
    )
    
    key_findings: List[str] = Field(
        ..., 
        description="Main insights extracted from the search results"
    )
    
    information_gaps: List[str] = Field(
        ..., 
        description="Identified gaps in current information"
    )
    
    follow_up_searches: List[FollowUpSearch] = Field(
        ..., 
        max_items=5,
        description="Suggested follow-up searches to fill information gaps"
    )
    
    context_quality: float = Field(
        ..., 
        ge=0, 
        le=1,
        description="Overall quality score of available context"
    )

    @classmethod
    async def extract(cls, 
                     df: pd.DataFrame,
                     original_query: str,
                     previous_findings: List[str] = None,
                     explored_queries: Set[str] = None,
                     use_cache: bool = False) -> "SearchReport":
        
        # Initialize tracking of explored queries
        if explored_queries is None:
            explored_queries = set()
        if previous_findings is None:
            previous_findings = []
            
        # Format DataFrame for context
        context = df[['meeting_timestamp', 'speaker_name', 'topic_name', 'summary', 'details']].to_string()
        
        # Build cumulative context from previous findings
        cumulative_context = "\n\nPrevious Findings:\n" + "\n".join(previous_findings) if previous_findings else ""
        
        messages = [
            system_msg("""You are an analytical search assistant conducting an iterative research process. Your task is to:
            1. Analyze search results and create a structured report
            2. Identify information gaps
            3. Suggest strategic follow-up searches that build upon previous findings
            4. Rate information quality and confidence
            5. Ensure follow-up searches explore new angles while avoiding redundancy
            
            Be precise, evidence-based, and never speculate beyond available data."""),
            
            user_msg(f"""Original Query: {original_query}

Search Results Context:
{context}
{cumulative_context}

Previously Explored Queries: {', '.join(explored_queries) if explored_queries else 'None'}

Generate a comprehensive report with strategic follow-up searches that build upon our current understanding.""")
        ]
        
        return await cls.call(messages, model="default", use_cache=use_cache)

async def conduct_iterative_research(
    search_engine,
    initial_query: str,
    max_iterations: int = 5,
    min_context_quality: float = 0.8
) -> List[SearchReport]:
    """
    Conduct iterative research by following up on gaps and building context
    """
    reports = []
    explored_queries = set([initial_query])
    cumulative_findings = []
    todo_queries = [initial_query]
    
    while len(reports) < max_iterations and todo_queries:
        current_query = todo_queries.pop(0)
        
        # Execute search
        results = search_engine.search(
            query=current_query,
            k=200,
            min_similarity=0.49,
            exact_match_boost=0.3,
            return_scores=True
        )
        
        # Generate report with cumulative context
        report = await SearchReport.extract(
            df=results,
            original_query=current_query,
            previous_findings=cumulative_findings,
            explored_queries=explored_queries
        )
        
        reports.append(report)
        
        # Update cumulative findings
        cumulative_findings.extend(report.key_findings)
        
        # Add new follow-up searches to todo list, avoiding duplicates
        new_queries = [
            search.query for search in report.follow_up_searches
            if search.query not in explored_queries
            and search.priority >= 3  # Only follow high-priority leads
        ]
        todo_queries.extend(new_queries)
        explored_queries.update(new_queries)
        
        # Check if we've reached sufficient context quality
        if report.context_quality >= min_context_quality:
            print(f"Reached sufficient context quality: {report.context_quality}")
            break
            
        print(f"Completed iteration {len(reports)}/{max_iterations}. "
              f"Context quality: {report.context_quality}")
    
    return reports

# Example usage:
reports = []
todo_queries = []
todo_queries.append('vexa')

# Initial search
q = todo_queries.pop(0)
results = search_engine.search(
    query=q,
    k=200,
    min_similarity=0.49,
    exact_match_boost=0.3,
    return_scores=True
)

# Start iterative research
reports = await conduct_iterative_research(search_engine, q)

# Access cumulative findings
all_findings = []
for r in reports:
    all_findings.extend(r.key_findings)


Initializing Search...
Initial search complete - found 119 results

Starting Iterative Research for: vexa

Iteration 1/5
Current Query: vexa
Remaining queries in queue: 0

Search Results:
- Found 119 matches
- Similarity range: 0.495 to 1.436

=== Generating Report for Query: vexa ===
Previous findings count: 0
Explored queries count: 1
Results data shape: (119, 11)

Report Summary:


AttributeError: 'NoneType' object has no attribute 'report_sections'

In [93]:
reports = []
todo_queries = []
todo_queries.append('vexa')

In [120]:
q = todo_queries.pop(0)
print(q)
results = search_engine.search(
    query=q,
    k=200,
    min_similarity=0.49,
    exact_match_boost=0.3,  # Moderate boost for exact matches
    return_scores=True
)
results[['topic_name','speaker_name','summary','details','meeting_timestamp','similarity_score']]

Timeline for Vexa's paid plan rollout


Unnamed: 0,topic_name,speaker_name,summary,details,meeting_timestamp,similarity_score
1654,Plans for rolling out a paid plan for Vexa,Dmitry Grankin,Dmitry Grankin discussed plans to introduce a ...,The paid plan aims to generate revenue to cove...,2024-08-30 10:29:05.146,0.678391
1488,paid plan,Ahmed Abdelaziz,A paid plan is a subscription model that allow...,Ahmed inquires about the rollout of a paid pla...,2024-08-30 10:29:05.146,0.538884
2146,Vexa,Dmitriy Grankin,Vexa is a venture-backed startup focused on de...,Vexa aims to provide tools that highlight key ...,2024-09-04 15:00:03.180,0.500262
2449,Introduction of Vexa startup and its current p...,Dmitriy Grankin,"Dmitriy Grankin introduced Vexa, a startup foc...",Vexa is incorporated in the USA and has been i...,2024-09-06 16:22:35.780,0.498155
2502,Vexa,Olga Nemirovskaya,Vexa is a product being discussed in the meeti...,The team is considering how to improve user ac...,2024-09-06 19:03:43.560,0.494625
1652,Discussion about Vexa's updates and features,Dmitry Grankin,Dmitry Grankin provided updates on Vexa's feat...,Vexa now delivers transcripts in real-time and...,2024-08-30 10:29:05.146,0.494579
676,Vexa,Dmitry Grankin,"Vexa is a product discussed in the meeting, fo...",Vexa is currently in the testing phase with a ...,2024-09-30 10:01:22.780,0.493481


In [121]:
report = await SearchReport.extract(results,q)
reports.append(report)
todo_queries.extend([r.query for r in report.follow_up_searches])

In [122]:
todo_queries

['User acquisition strategies for Vexa',
 "Feedback on Vexa's interface and usability",
 "Sustainability of Vexa's pricing model",
 'Comparison of Vexa and VEX features and pricing',
 'User demographics for audio transcription products',
 'Effectiveness of influencer marketing in tech startups',
 'Technological advancements in audio transcription',
 'User feedback on Vexa and similar products',
 'User demographics for Vexa',
 'User acquisition and retention metrics for Vexa',
 'Competitive analysis of Vexa in the meeting assistant market',
 'User feedback and reviews on Vexa',
 'Marketing strategies for tech startups in Brazil',
 'Vexa user retention rates statistics',
 "Effectiveness of Vexa's marketing strategies",
 'User demographics and behavior for Vexa',
 'User feedback on Vexa interface improvements',
 'Competitor analysis for user retention in similar products',
 'Vexa paid plan rollout timeline',
 "Features included in Vexa's paid version",
 'User feedback implementation in Ve

In [128]:
[r.model_dump()['key_findings'] for r in reports]



[['Vexa is a real-time meeting assistant focused on enhancing productivity through transcription and contextual support.',
  'The product is currently in the testing phase and aims to improve its market presence through user feedback and marketing strategies.',
  'User feedback highlights both positive experiences with the interface and concerns regarding usability and visibility.',
  'Marketing efforts are primarily through influencer partnerships and social media, but user acquisition remains low due to limited outreach.',
  'Future plans include a paid plan rollout and the development of collaborative features.'],
 ['Vexa is planning to introduce a paid subscription model to generate revenue.',
  'The proposed pricing includes a lifetime deal at approximately $200 per user.',
  'Current free plan features may be adjusted to encourage upgrades to paid plans.',
  'User feedback indicates a need for interface improvements and better marketing strategies.'],
 ['Vexa is in the pre-seed s

In [133]:
from core import generic_call_stream

In [138]:
r = await generic_call_stream(
    messages=[
        system_msg("summarize findings"),
        user_msg(' '.join([' '.join(r.model_dump()['key_findings']) for r in reports]))
    ],
)

Vexa is a real-time meeting assistant currently in the testing phase, aimed at enhancing productivity through transcription and contextual support. User feedback has been mixed, with positive comments on the interface but concerns regarding usability and visibility. The product is facing challenges in user acquisition due to limited marketing efforts, primarily relying on influencer partnerships and social media, despite having good user retention rates.

Vexa plans to introduce a paid subscription model, proposing a lifetime deal at approximately $200 per user, while considering adjustments to current free plan features to encourage upgrades. The company is in the pre-seed stage and is focused on refining its product, with plans for collaborative features and improved marketing strategies to boost user acquisition.

Current user numbers are low, with around 200 free users, but gradual growth is anticipated as user feedback is implemented. Key areas for improvement include the interfac

  r = await generic_call_stream(


In [140]:
from IPython import display
display.Markdown(r)

Vexa is a real-time meeting assistant currently in the testing phase, aimed at enhancing productivity through transcription and contextual support. User feedback has been mixed, with positive comments on the interface but concerns regarding usability and visibility. The product is facing challenges in user acquisition due to limited marketing efforts, primarily relying on influencer partnerships and social media, despite having good user retention rates.

Vexa plans to introduce a paid subscription model, proposing a lifetime deal at approximately $200 per user, while considering adjustments to current free plan features to encourage upgrades. The company is in the pre-seed stage and is focused on refining its product, with plans for collaborative features and improved marketing strategies to boost user acquisition.

Current user numbers are low, with around 200 free users, but gradual growth is anticipated as user feedback is implemented. Key areas for improvement include the interface and overall usability, which are critical for enhancing the user experience. Vexa competes with similar products like VEX and is actively seeking user feedback to inform its development and marketing strategies. Overall, Vexa's future growth will depend on addressing user concerns, refining its features, and effectively marketing its offerings.

In [139]:
r

"Vexa is a real-time meeting assistant currently in the testing phase, aimed at enhancing productivity through transcription and contextual support. User feedback has been mixed, with positive comments on the interface but concerns regarding usability and visibility. The product is facing challenges in user acquisition due to limited marketing efforts, primarily relying on influencer partnerships and social media, despite having good user retention rates.\n\nVexa plans to introduce a paid subscription model, proposing a lifetime deal at approximately $200 per user, while considering adjustments to current free plan features to encourage upgrades. The company is in the pre-seed stage and is focused on refining its product, with plans for collaborative features and improved marketing strategies to boost user acquisition.\n\nCurrent user numbers are low, with around 200 free users, but gradual growth is anticipated as user feedback is implemented. Key areas for improvement include the int