In [1]:
%load_ext autoreload
%autoreload 2

import asyncio
import pandas as pd
from transcript_analyzer import TranscriptAnalyzer, extract_tag_content, generate_summary
from vexa import VexaAPI
import pandas as pd

# Initialize the VexaAPI
vexa = VexaAPI()
from core import generic_call, system_msg, user_msg

In [2]:
from pydantic import BaseModel, Field
from typing import List
from core import BaseCall

class Point(BaseModel):
    c: str = Field(..., description="Point content mentioning speakers, facts, main ideas, and concise information")
    s: str = Field(..., description="Start index")
    e: str = Field(..., description="End index")

class Summary(BaseCall):
    """points must cover all the qoutes in the transcript"""
    thinking: str = Field(..., description="full log of your thought process about how you are to create a summary and points that cover all the quotes in the transcript")
    meeting_name: str = Field(..., max_length=50, description="Explanotory consice dense name of the meeting, no generic words, 50 char max")
    summary: str = Field(..., max_length=500, description="Concise summary of the text with attention to company facts, names, people, dates, numbers, and facts")
    points: List[Point] = Field(..., description="Main points as bullets, each covering a specific qoute range in the transcript")

def create_exploded_points_df(output):
    points_data = output[0].model_dump()['points']
    df_points = pd.DataFrame(points_data)
    df_points = df_points.rename(columns={'c': 'point'})
    def create_range(start, end):
        return list(range(int(start), int(end) + 1))
    df_points['range'] = df_points.apply(lambda row: create_range(row['s'], row['e']), axis=1)
    df_exploded = df_points.explode('range')
    df_exploded = df_exploded.drop(columns=['s', 'e'])
    df_exploded = df_exploded.reset_index(drop=True)
    return df_exploded

In [3]:
def combine_initials_and_content(group):
    combined = group['speaker'].fillna('').astype(str) + ': ' + group['content'].fillna('')


    return ' '.join(combined)

In [4]:
from prompts import Prompts
prompts = Prompts()

system_prompt = prompts.think+ f""":   Create a concise summary of the following text as markdown with attention to company FACTS, names, people, dates, numbers, and facts. All FACTS must be preserved.
                    The output should be at most 10000 characters long, if the meeting is long enough.
                    Cover all the quote ID ranges in the transcript.
                    Structure:
                    1. Short summary (500 characters max)
                    2. Main points as bullets (500 characters each). Each point should:
                       - Mention speakers
                       - Include FACTS
                       - Present main ideas
                       - Provide concise information

                    
                    All the  quote IDs present in the text must be covered with quote ID ranges!
                    """

In [5]:
analyzer = TranscriptAnalyzer(gpu_device=3)
meetings = vexa.get_meetings()
meetings = meetings[-200:]




In [6]:
#analyzer.delete_collection()

In [7]:
for meeting in meetings:
    meeting_id = meeting['id']
    if not analyzer.check_meeting_session_id_exists(meeting_id):
        result = vexa.get_transcription(meeting_session_id=meeting_id,use_index=True)
        if result:
            df, formatted_input, start_datetime, speakers = result
            output = await Summary.call([system_msg(system_prompt),user_msg(formatted_input)],model='gpt-4o-mini')
            df_exploded = create_exploded_points_df(output)
            joined_df = df_exploded.join(df, on='range', rsuffix='_transcript')

            points_with_qoutes = joined_df.groupby('point').apply(combine_initials_and_content)
            points_with_qoutes.name = 'qoutes'
            points_with_qoutes = points_with_qoutes.reset_index().to_dict(orient='records')
            summary = output[0].summary
            meeting_name = output[0].meeting_name
            
            if points_with_qoutes:
                chunks = [f"{summary}\n\n{p['point']}\n\n{p['qoutes']}" for p in points_with_qoutes]
                points = [p['point'] for p in points_with_qoutes]
                qoutes = [p['qoutes'] for p in points_with_qoutes]
                await analyzer.add_summary(meeting_name, summary, start_datetime, speakers, meeting_id)
                await analyzer.update_vectorstore_with_qoutes(chunks, points, qoutes, start_datetime, speakers, meeting_id)

  points_with_qoutes = joined_df.groupby('point').apply(combine_initials_and_content)
  points_with_qoutes = joined_df.groupby('point').apply(combine_initials_and_content)
  points_with_qoutes = joined_df.groupby('point').apply(combine_initials_and_content)
  points_with_qoutes = joined_df.groupby('point').apply(combine_initials_and_content)
  points_with_qoutes = joined_df.groupby('point').apply(combine_initials_and_content)
  points_with_qoutes = joined_df.groupby('point').apply(combine_initials_and_content)
  points_with_qoutes = joined_df.groupby('point').apply(combine_initials_and_content)
  points_with_qoutes = joined_df.groupby('point').apply(combine_initials_and_content)
  points_with_qoutes = joined_df.groupby('point').apply(combine_initials_and_content)
  points_with_qoutes = joined_df.groupby('point').apply(combine_initials_and_content)
  points_with_qoutes = joined_df.groupby('point').apply(combine_initials_and_content)
  points_with_qoutes = joined_df.groupby('point').appl