In [1]:
%reload_ext autoreload
%autoreload 2

In [2]:
import pandas as pd

from vexa import VexaAPI
from core import system_msg, user_msg
from prompts import Prompts
from pydantic_models import MeetingExtraction, EntityExtraction, SummaryIndexesRefs, MeetingSummary
pd.options.display.max_colwidth = 100

import datetime

from psql_models import Speaker, Meeting, DiscussionPoint, get_session, engine,read_table_async
from sqlalchemy import func
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy.future import select
from sqlalchemy.orm import selectinload

import nest_asyncio
import asyncio

nest_asyncio.apply()

In [3]:
# from psql_models import init_db
# await init_db()


In [4]:
from sqlalchemy import select
from sqlalchemy.sql import exists

async def check_item_exists(meeting_id):
    async with get_session() as session:
        # Convert UUID to string if necessary
        meeting_id_str = str(meeting_id)
        
        # Query to check if meeting_id exists in items table
        query = select(exists().where(DiscussionPoint.meeting_id == meeting_id_str))
        result = await session.execute(query)
        item_present = result.scalar()
        
        return item_present


In [5]:
def flatten_context(context):
    flattened = []
    for item in context:
        base = {k: v for k, v in item.items() if k != 'objects'}
        if 'objects' in item:
            for obj in item['objects']:
                flattened.append({**base, **obj})
        else:
            flattened.append(base)
    return flattened

In [6]:
async def process_meeting_data(formatted_input, df):
    extraction_tasks = [
        MeetingExtraction.extract(formatted_input),
        EntityExtraction.extract(formatted_input)
    ]
    discussion_points_df, topics_df = await asyncio.gather(*extraction_tasks)
    
    discussion_points_df['model'] = 'MeetingExtraction'
    topics_df['model'] = 'EntityExtraction'
    
    # Rename columns to match the new schema
    discussion_points_df = discussion_points_df.rename(columns={'item': 'topic_name', 'type': 'topic_type'})
    topics_df = topics_df.rename(columns={'entity': 'topic_name', 'type': 'topic_type'})
    
    # Combine the dataframes
    summary_df = pd.concat([discussion_points_df, topics_df]).reset_index(drop=True)
    
    summary_refs = await SummaryIndexesRefs.extract(summary_df, formatted_input)

    # Create a new dataframe for the references
    ref_df = pd.DataFrame([(ref['summary_index'], r['s'], r['e']) 
                           for ref in summary_refs 
                           for r in ref['references']],
                          columns=['summary_index', 'start', 'end'])

    # Merge the ref_df with summary_df
    entities_with_refs = summary_df.reset_index().rename(columns={'index': 'summary_index'})
    entities_with_refs = entities_with_refs.merge(ref_df, on='summary_index', how='left')

    # Function to extract text from df based on start and end indices, including speaker
    def get_text_range_with_speaker(row):
        text_range = df.loc[row['start']:row['end']]
        return ' | '.join(f"{speaker}: {content}" for speaker, content in zip(text_range['speaker'], text_range['content']))

    # Apply the function to get the referenced text with speakers
    entities_with_refs['referenced_text'] = entities_with_refs.apply(get_text_range_with_speaker, axis=1)

    # Group by summary_index to combine multiple references
    try:
        final_df = entities_with_refs.groupby('summary_index').agg({
            'topic_name': 'first',
            'topic_type': 'first',
            'summary': 'first',
            'details': 'first',
            'speaker': 'first',
            'referenced_text': ' | '.join,
            'model': 'first'
        }).reset_index()

        return final_df
    except Exception as e:
        print(f"Error processing meeting data: {e}")
        return pd.DataFrame()
    

In [7]:
async def save_meeting_data_to_db(final_df, meeting_id, transcript, meeting_datetime):
    async with AsyncSession(engine) as session:
        try:
            existing_meeting = await session.execute(
                select(Meeting).where(Meeting.meeting_id == meeting_id)
            )
            existing_meeting = existing_meeting.scalar_one_or_none()

            if not existing_meeting:
                naive_datetime = meeting_datetime.replace(tzinfo=None) - meeting_datetime.utcoffset()
                new_meeting = Meeting(
                    meeting_id=meeting_id, 
                    transcript=str(transcript),
                    timestamp=naive_datetime
                )
                session.add(new_meeting)
                await session.flush()
            else:
                new_meeting = existing_meeting

            for _, row in final_df.iterrows():
                speaker_query = await session.execute(
                    select(Speaker).where(Speaker.name == row['speaker'])
                )
                speaker = speaker_query.scalar_one_or_none()
                
                if not speaker:
                    speaker = Speaker(name=row['speaker'])
                    session.add(speaker)
                    await session.flush()

                new_discussion_point = DiscussionPoint(
                    summary_index=row['summary_index'],
                    summary=row['summary'],
                    details=row['details'],
                    referenced_text=row['referenced_text'],
                    meeting_id=new_meeting.meeting_id,
                    speaker_id=speaker.id,
                    topic_name=row['topic_name'],
                    topic_type=row['topic_type'],
                    model=row['model']
                )
                session.add(new_discussion_point)
                await session.flush()

            await session.commit()
            print("Meeting data and discussion points saved successfully to the database.")
        except Exception as e:
            await session.rollback()
            print(f"Error saving to database: {e}")
            raise

In [13]:
vexa = VexaAPI()
await vexa.get_user_info()
meetings = await vexa.get_meetings()
meetings = meetings[-250:]


User information retrieved successfully.
Request URL: http://127.0.0.1:8001/api/v1/calls/all
Request Params: {'token': '3ae04e20124d40babc5107e658c666b6'}


In [14]:
meeting_id = meetings[0]['id']

In [15]:
trasncription = await vexa.get_transcription(meeting_session_id=meeting_id, use_index=True)
if trasncription:   
    df, formatted_input, start_datetime, speakers, transcript = trasncription

In [16]:
df

Unnamed: 0,index,speaker,speaker_id,content,html_content,html_content_short,keywords,timestamp,formatted_time,time_tuple,initials,chunk_number
0,0,Enigmatic Sage,TBD,"I don't have much to say, just I wonder if you changed.",,,[],2024-08-16T09:21:24.452317Z,00:00,"(0, 0)",ES,0
1,1,Enigmatic Sage,TBD,"We'll change something about the disappearing parts and the highlight is not working, right?",,,[],2024-08-16T09:21:32.768317Z,00:08,"(0, 8)",ES,0
2,2,Mystic Wizard,TBD,Right?,,,[],2024-08-16T09:21:33.788317Z,00:09,"(0, 9)",MW,0
3,3,Mystic Wizard,TBD,Mm-hmm.,,,[],2024-08-16T09:21:34.788317Z,00:10,"(0, 10)",MW,0
4,4,Mystic Wizard,TBD,So we cannot add…,,,[],2024-08-16T09:21:36.388317Z,00:11,"(0, 11)",MW,0
5,5,Enigmatic Sage,TBD,And the three parts of this… Sorry?,,,[],2024-08-16T09:21:39.008317Z,00:14,"(0, 14)",ES,0
6,6,Mystic Wizard,TBD,"Is that something you experience in the stage, in the deaf environment?",,,[],2024-08-16T09:21:48.058317Z,00:23,"(0, 23)",MW,0
7,7,Mystic Wizard,TBD,"Is that something you experience in the stage, in the dev environment?",,,[],2024-08-16T09:21:48.078317Z,00:23,"(0, 23)",MW,0
8,8,Enigmatic Sage,TBD,X-Dev environment.,,,[],2024-08-16T09:21:50.378317Z,00:25,"(0, 25)",ES,1
9,9,Enigmatic Sage,TBD,XDEV environment.,,,[],2024-08-16T09:21:50.438317Z,00:25,"(0, 25)",ES,1


In [49]:
for meeting in meetings:
    meeting_id = meeting['id']
    try:
        if not await check_item_exists(meeting_id):
            trasncription = await vexa.get_transcription(meeting_session_id=meeting_id, use_index=True)
            if trasncription:   
                df, formatted_input, start_datetime, speakers, transcript = trasncription
                final_df = await asyncio.wait_for(
                    process_meeting_data(formatted_input, df),
                    timeout=60
                )
                await save_meeting_data_to_db(final_df, meeting_id, transcript, start_datetime)
    except asyncio.TimeoutError:
        print(f"Timeout occurred while processing meeting {meeting_id}")
        continue
    except Exception as e:
        print(f"Error processing meeting {meeting_id}: {e}")
        continue

Meeting data and discussion points saved successfully to the database.
Meeting data and discussion points saved successfully to the database.
Meeting data and discussion points saved successfully to the database.
Meeting data and discussion points saved successfully to the database.
Meeting data and discussion points saved successfully to the database.
Meeting data and discussion points saved successfully to the database.
Meeting data and discussion points saved successfully to the database.
Meeting data and discussion points saved successfully to the database.
Meeting data and discussion points saved successfully to the database.
Meeting data and discussion points saved successfully to the database.
Meeting data and discussion points saved successfully to the database.
Meeting data and discussion points saved successfully to the database.
Meeting data and discussion points saved successfully to the database.
Meeting data and discussion points saved successfully to the database.
Timeou

In [10]:


# Usage:
df = await read_table_async(DiscussionPoint)

In [11]:
df

Unnamed: 0,id,summary_index,summary,details,referenced_text,meeting_id,speaker_id,topic_name,topic_type,model
0,1,0,"The discussion focused on how technology should adapt to the user's needs, emphasizing the impor...",The technology should learn from user interactions to provide better service. This includes unde...,"Sergey Ryabenko: То, что она учится, ну то есть, ничего, ну это она учится. | Dmitriy Grankin: ...",2be605d6-d98d-475a-8969-838cb44a6fe9,1,Adapting technology to user needs,idea,MeetingExtraction
1,2,1,A proposed solution involves a system that not only learns from user interactions but also prese...,"The system should be able to store and retrieve information effectively, ensuring that users do ...","Sergey Ryabenko: Ну, во-первых, хранит информацию. | Dmitriy Grankin: но только надо хранить о...",2be605d6-d98d-475a-8969-838cb44a6fe9,2,System that learns and preserves knowledge,solution,MeetingExtraction
2,3,2,"The need for a task management feature was identified, which would help users manage their tasks...","This feature would provide users with a summary of tasks assigned during meetings, helping them ...","Sergey Ryabenko: Ну нет, таски – это одно из. | Sergey Ryabenko: То есть... | Sergey Ryabenko:...",2be605d6-d98d-475a-8969-838cb44a6fe9,1,Task management feature,task,MeetingExtraction
3,4,3,The importance of summarizing meetings effectively was emphasized to cut down on unnecessary inf...,"The goal is to create a system that can provide concise summaries of meetings, highlighting acti...","Sergey Ryabenko: Ага. Может, это cuts the fluff. | Sergey Ryabenko: И cut the fluff вот в этот...",2be605d6-d98d-475a-8969-838cb44a6fe9,2,Effective meeting summarization,goal,MeetingExtraction
4,5,4,The integration of knowledge management and task management was discussed as a way to enhance pr...,"This integration would allow for better tracking of project changes and updates, ensuring that a...",Sergey Ryabenko: knowledge knowledge management я все про knowledge по касту то есть у меня два...,2be605d6-d98d-475a-8969-838cb44a6fe9,1,Knowledge and task management integration,opportunity,MeetingExtraction
...,...,...,...,...,...,...,...,...,...,...
1268,1269,21,"Colombia is mentioned as a potential destination for living and working, particularly in the con...",Sergio expressed interest in Colombia as a place he initially wanted to move to before choosing ...,Sergio Goriachev: Это Колумбия.,2647ed9b-411e-46dc-ba53-c08ef2887479,39,Colombia,country,EntityExtraction
1269,1270,22,"Turkey is a country where Dmitriy lived before moving to Argentina, sharing his experiences and ...",Dmitriy discussed his time in Turkey and how it influenced his decision to move to Argentina.,"Sergio Goriachev: совсем с года в Турции, именно тогда я начал.",2647ed9b-411e-46dc-ba53-c08ef2887479,1,Turkey,country,EntityExtraction
1270,1271,23,Brazil is mentioned in the context of travel and potential business opportunities.,"Sergio discussed Brazil as a destination for travel and investment, particularly in real estate.","Dmitriy Grankin: Это когда берешь деньги, которые, в общем-то, плюс-минус всегда были доступны,...",2647ed9b-411e-46dc-ba53-c08ef2887479,39,Brazil,country,EntityExtraction
1271,1272,24,"France is mentioned as a country where Dmitriy lived and worked, sharing his experiences there.",Dmitriy compared his experiences in France with those in Portugal and other countries.,Sergio Goriachev: это конечно капец боль вот только жили в хорошей самом центре но там жесточай...,2647ed9b-411e-46dc-ba53-c08ef2887479,1,France,country,EntityExtraction


In [12]:
final_df

In [13]:
objects_df

NameError: name 'objects_df' is not defined

In [None]:
await read_table_async(Item)

NameError: name 'read_table_async' is not defined

In [None]:
pd.read_sql_table('items',engine)

[autoreload of psql_models failed: Traceback (most recent call last):
  File "/home/dima/anaconda3/envs/langchain/lib/python3.11/site-packages/IPython/extensions/autoreload.py", line 273, in check
    superreload(m, reload, self.old_objects)
  File "/home/dima/anaconda3/envs/langchain/lib/python3.11/site-packages/IPython/extensions/autoreload.py", line 496, in superreload
    update_generic(old_obj, new_obj)
  File "/home/dima/anaconda3/envs/langchain/lib/python3.11/site-packages/IPython/extensions/autoreload.py", line 393, in update_generic
    update(a, b)
  File "/home/dima/anaconda3/envs/langchain/lib/python3.11/site-packages/IPython/extensions/autoreload.py", line 331, in update_class
    if (old_obj == new_obj) is True:
        ^^^^^^^^^^^^^^^^^^
  File "/home/dima/anaconda3/envs/langchain/lib/python3.11/site-packages/sqlalchemy/sql/operators.py", line 582, in __eq__
    return self.operate(eq, other)
           ^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/dima/anaconda3/envs/langchain/

AttributeError: 'AsyncEngine' object has no attribute 'cursor'

In [None]:
pd.read_sql_table('meetings',engine)

Unnamed: 0,id,meeting_id,transcript,timestamp
0,1,2be605d6-d98d-475a-8969-838cb44a6fe9,"[{'speaker': 'Dmitriy Grankin', 'speaker_id': 'TBD', 'content': ' И хотел бы тебе показать ленди...",2024-09-16 13:09:25.985999


In [None]:
pd.read_sql_table('speakers',engine)

Unnamed: 0,id,name
0,1,Sergey Ryabenko
1,2,Dmitriy Grankin


In [None]:
from pydantic import BaseModel, Field
from core import BaseCall
from typing import List



In [None]:
r = await MeetingSummary.extract(formatted_input, final_df,use_cache=True)
print(r)

summary="The discussion focused on the new landing pages and their clarity and purpose for users (0). Concerns were raised about the system's adaptability to user needs, emphasizing the importance of using the term 'adapt' instead of 'learn' to avoid privacy issues (1). Data preservation and access control were identified as potential risks, with a need for users to manage their data (2). The necessity of task management features was highlighted, suggesting the system should provide reminders and action points after meetings (3). Summarizing meetings to eliminate unnecessary information was deemed crucial for effective communication (4). Knowledge management was recognized as essential for project management, ensuring that information remains current (5). The potential for automatic documentation updates based on discussions was proposed as a valuable feature (6). Sergey Ryabenko and Dmitriy Grankin emphasized the importance of a system that can adapt to user needs while managing tasks

In [None]:
from IPython.display import Markdown
Markdown(r.summary)

The discussion focused on the new landing pages and their clarity and purpose for users (0). Concerns were raised about the system's adaptability to user needs, emphasizing the importance of using the term 'adapt' instead of 'learn' to avoid privacy issues (1). Data preservation and access control were identified as potential risks, with a need for users to manage their data (2). The necessity of task management features was highlighted, suggesting the system should provide reminders and action points after meetings (3). Summarizing meetings to eliminate unnecessary information was deemed crucial for effective communication (4). Knowledge management was recognized as essential for project management, ensuring that information remains current (5). The potential for automatic documentation updates based on discussions was proposed as a valuable feature (6). Sergey Ryabenko and Dmitriy Grankin emphasized the importance of a system that can adapt to user needs while managing tasks effectively (7)(8). Concepts such as knowledge management (9), task management (10), and project management (11) were discussed, with a focus on cutting unnecessary information (12). The idea of a personal chat GPT that tailors responses to individual user needs was suggested (13). The accumulation of knowledge over time (14) and the need for a task board to visualize responsibilities were also mentioned (15). Adaptive learning, where the system adjusts based on user interactions, was considered beneficial (16).

In [None]:
r.meeting_name

'Project Management System Discussion'