In [1]:
%reload_ext autoreload
%autoreload 2

In [2]:
import pandas as pd

from vexa import VexaAPI
from core import system_msg, user_msg
from prompts import Prompts
from pydantic_models import MeetingExtraction, EntityExtraction, SummaryIndexesRefs, MeetingSummary
pd.options.display.max_colwidth = 100

import datetime

from psql_models import Speaker, Meeting, DiscussionPoint, get_session, engine,read_table_async
from sqlalchemy import func
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy.future import select
from sqlalchemy.orm import selectinload

import nest_asyncio
import asyncio

nest_asyncio.apply()

In [3]:
# from psql_models import init_db
# await init_db()


In [4]:
from sqlalchemy import select
from sqlalchemy.sql import exists

async def check_item_exists(meeting_id):
    async with get_session() as session:
        # Convert UUID to string if necessary
        meeting_id_str = str(meeting_id)
        
        # Query to check if meeting_id exists in items table
        query = select(exists().where(DiscussionPoint.meeting_id == meeting_id_str))
        result = await session.execute(query)
        item_present = result.scalar()
        
        return item_present


In [5]:
def flatten_context(context):
    flattened = []
    for item in context:
        base = {k: v for k, v in item.items() if k != 'objects'}
        if 'objects' in item:
            for obj in item['objects']:
                flattened.append({**base, **obj})
        else:
            flattened.append(base)
    return flattened

In [6]:
async def process_meeting_data(formatted_input, df):
    extraction_tasks = [
        MeetingExtraction.extract(formatted_input),
        EntityExtraction.extract(formatted_input)
    ]
    discussion_points_df, topics_df = await asyncio.gather(*extraction_tasks)
    
    discussion_points_df['model'] = 'MeetingExtraction'
    topics_df['model'] = 'EntityExtraction'
    
    # Rename columns to match the new schema
    discussion_points_df = discussion_points_df.rename(columns={'item': 'topic_name', 'type': 'topic_type'})
    topics_df = topics_df.rename(columns={'entity': 'topic_name', 'type': 'topic_type'})
    
    # Combine the dataframes
    summary_df = pd.concat([discussion_points_df, topics_df]).reset_index(drop=True)
    
    summary_refs = await SummaryIndexesRefs.extract(summary_df, formatted_input)

    # Create a new dataframe for the references
    ref_df = pd.DataFrame([(ref['summary_index'], r['s'], r['e']) 
                           for ref in summary_refs 
                           for r in ref['references']],
                          columns=['summary_index', 'start', 'end'])

    # Merge the ref_df with summary_df
    entities_with_refs = summary_df.reset_index().rename(columns={'index': 'summary_index'})
    entities_with_refs = entities_with_refs.merge(ref_df, on='summary_index', how='left')

    # Function to extract text from df based on start and end indices, including speaker
    def get_text_range_with_speaker(row):
        text_range = df.loc[row['start']:row['end']]
        return ' | '.join(f"{speaker}: {content}" for speaker, content in zip(text_range['speaker'], text_range['content']))

    # Apply the function to get the referenced text with speakers
    entities_with_refs['referenced_text'] = entities_with_refs.apply(get_text_range_with_speaker, axis=1)

    # Group by summary_index to combine multiple references
    final_df = entities_with_refs.groupby('summary_index').agg({
        'topic_name': 'first',
        'topic_type': 'first',
        'summary': 'first',
        'details': 'first',
        'speaker': 'first',
        'referenced_text': ' | '.join,
        'model': 'first'
    }).reset_index()

    return final_df

In [7]:
async def save_meeting_data_to_db(final_df, meeting_id, transcript, meeting_datetime):
    async with AsyncSession(engine) as session:
        try:
            existing_meeting = await session.execute(
                select(Meeting).where(Meeting.meeting_id == meeting_id)
            )
            existing_meeting = existing_meeting.scalar_one_or_none()

            if not existing_meeting:
                naive_datetime = meeting_datetime.replace(tzinfo=None) - meeting_datetime.utcoffset()
                new_meeting = Meeting(
                    meeting_id=meeting_id, 
                    transcript=str(transcript),
                    timestamp=naive_datetime
                )
                session.add(new_meeting)
                await session.flush()
            else:
                new_meeting = existing_meeting

            for _, row in final_df.iterrows():
                speaker_query = await session.execute(
                    select(Speaker).where(Speaker.name == row['speaker'])
                )
                speaker = speaker_query.scalar_one_or_none()
                
                if not speaker:
                    speaker = Speaker(name=row['speaker'])
                    session.add(speaker)
                    await session.flush()

                new_discussion_point = DiscussionPoint(
                    summary_index=row['summary_index'],
                    summary=row['summary'],
                    details=row['details'],
                    referenced_text=row['referenced_text'],
                    meeting_id=new_meeting.meeting_id,
                    speaker_id=speaker.id,
                    topic_name=row['topic_name'],
                    topic_type=row['topic_type'],
                    model=row['model']
                )
                session.add(new_discussion_point)
                await session.flush()

            await session.commit()
            print("Meeting data and discussion points saved successfully to the database.")
        except Exception as e:
            await session.rollback()
            print(f"Error saving to database: {e}")
            raise

In [14]:
vexa = VexaAPI()
await vexa.get_user_info()
meetings = await vexa.get_meetings()
meetings = meetings[-101:]


User information retrieved successfully.
Request URL: http://127.0.0.1:8001/api/v1/calls/all
Request Params: {'token': '3ae04e20124d40babc5107e658c666b6'}


In [18]:
for meeting in meetings[:10]:
    meeting_id = meeting['id']
    if not await check_item_exists(meeting_id):
        trasncription = await vexa.get_transcription(meeting_session_id=meeting_id, use_index=True)
        df, formatted_input, start_datetime, speakers,transcript = trasncription
        final_df = await process_meeting_data(formatted_input, df)
        await save_meeting_data_to_db(final_df, meeting_id, transcript, start_datetime)

In [16]:


# Usage:
df = await read_table_async(DiscussionPoint)

In [17]:
df

Unnamed: 0,id,summary_index,summary,details,referenced_text,meeting_id,speaker_id,topic_name,topic_type,model
0,1,0,"The discussion focused on how technology should adapt to the user's needs, emphasizing the impor...",The technology should learn from user interactions to provide better service. This includes unde...,"Sergey Ryabenko: То, что она учится, ну то есть, ничего, ну это она учится. | Dmitriy Grankin: ...",2be605d6-d98d-475a-8969-838cb44a6fe9,1,Adapting technology to user needs,idea,MeetingExtraction
1,2,1,A proposed solution involves a system that not only learns from user interactions but also prese...,"The system should be able to store and retrieve information effectively, ensuring that users do ...","Sergey Ryabenko: Ну, во-первых, хранит информацию. | Dmitriy Grankin: но только надо хранить о...",2be605d6-d98d-475a-8969-838cb44a6fe9,2,System that learns and preserves knowledge,solution,MeetingExtraction
2,3,2,"The need for a task management feature was identified, which would help users manage their tasks...","This feature would provide users with a summary of tasks assigned during meetings, helping them ...","Sergey Ryabenko: Ну нет, таски – это одно из. | Sergey Ryabenko: То есть... | Sergey Ryabenko:...",2be605d6-d98d-475a-8969-838cb44a6fe9,1,Task management feature,task,MeetingExtraction
3,4,3,The importance of summarizing meetings effectively was emphasized to cut down on unnecessary inf...,"The goal is to create a system that can provide concise summaries of meetings, highlighting acti...","Sergey Ryabenko: Ага. Может, это cuts the fluff. | Sergey Ryabenko: И cut the fluff вот в этот...",2be605d6-d98d-475a-8969-838cb44a6fe9,2,Effective meeting summarization,goal,MeetingExtraction
4,5,4,The integration of knowledge management and task management was discussed as a way to enhance pr...,"This integration would allow for better tracking of project changes and updates, ensuring that a...",Sergey Ryabenko: knowledge knowledge management я все про knowledge по касту то есть у меня два...,2be605d6-d98d-475a-8969-838cb44a6fe9,1,Knowledge and task management integration,opportunity,MeetingExtraction
...,...,...,...,...,...,...,...,...,...,...
173,174,10,Tilda is another design tool mentioned in the context of creating project designs.,"Dmitriy considers using Tilda for assembling their project designs, showing the collaboration be...",Dmitriy Grankin: Хорошо.,e941833d-5234-4f52-9a2a-d68f9ba45e13,1,Tilda,tool,EntityExtraction
174,175,11,Web3 is a topic mentioned by Olga in relation to her work and community engagement.,"Olga refers to Web3 as an area she is exploring, indicating its relevance to her projects and di...","Olga Nemirovskaya: Веб-3, просто на прибавление.",e941833d-5234-4f52-9a2a-d68f9ba45e13,4,Web3,concept,EntityExtraction
175,176,12,Beta testers are individuals Olga plans to invite to test their projects on Product Hunt.,Olga expresses the intention to invite beta testers to create buzz and gather feedback on their ...,Olga Nemirovskaya: сейчас площадь там по по нескольку раз посмотрим что из этого выйдет вот и з...,e941833d-5234-4f52-9a2a-d68f9ba45e13,4,Beta testers,group,EntityExtraction
176,177,13,"Notetakers are a subject of interest for Olga, who is following new posts about them.","Olga mentions looking at new posts about notetakers, indicating her interest in this area and it...",": Плохо. Ну ладно. Я, в общем, сейчас что делаю? Практически каждый день смотрю новые посты про...",e941833d-5234-4f52-9a2a-d68f9ba45e13,4,Notetakers,topic,EntityExtraction


In [None]:
final_df

Unnamed: 0,summary_index,topic,type,summary,details,speaker,referenced_text
0,0,,idea,"The team discussed the adaptability of landing pages and how they can be tailored to user needs,...","The landing pages should adapt to user interactions, enhancing user engagement and satisfaction....",Dmitriy Grankin,"Dmitriy Grankin: И хотел бы тебе показать лендинги, которые я что-то понять что понятно непонят..."
1,1,,concern,"There was a concern about the use of the term 'learn' in AI systems, as it may imply a level of ...",The discussion highlighted the need to avoid language that suggests the AI is learning in a way ...,Sergey Ryabenko,"Sergey Ryabenko: По-моему, это не совсем правильно, то есть вот использование слова learn. | Se..."
2,2,,task,The team identified the need for the AI system to preserve information discussed during meetings...,"The AI should be able to store and recall information from previous discussions, ensuring that u...",Dmitriy Grankin,"Sergey Ryabenko: Ну, во-первых, хранит информацию. | Dmitriy Grankin: но только надо хранить о..."
3,3,,idea,The conversation included the potential for the AI to manage tasks and provide reminders based o...,"The AI could generate a task list from meeting notes, sending reminders to users about their res...",Sergey Ryabenko,"Sergey Ryabenko: Ну нет, таски – это одно из. | Sergey Ryabenko: То есть... | Sergey Ryabenko:..."
4,4,,goal,"The team emphasized the need for the AI to summarize discussions effectively, removing fluff and...","The AI should be capable of distilling conversations into concise summaries, highlighting action...",Dmitriy Grankin,"Sergey Ryabenko: Ага. Может, это cuts the fluff. | : Что это такое? | Dmitriy Grankin: Это из..."
5,5,,opportunity,The discussion pointed towards the development of a holistic system that integrates knowledge ma...,A comprehensive system would allow users to manage tasks while also keeping track of knowledge a...,Sergey Ryabenko,"Dmitriy Grankin: Да, здесь смысл в том, что у тебя есть целостная система."
6,6,,concern,"The importance of knowledge management in project management was highlighted, particularly in ma...","Effective knowledge management is crucial for project success, as it ensures that all team membe...",Dmitriy Grankin,Sergey Ryabenko: knowledge knowledge management я все про knowledge по касту то есть у меня два...
7,7,,task,The team discussed the necessity of tracking the history of tasks to understand changes and deci...,Having a historical record of task discussions and decisions would allow teams to review the evo...,Sergey Ryabenko,Sergey Ryabenko: отдельно постараться исторически создать по каждой задачи исторически с | Dmit...
8,8,,idea,"The conversation included the significance of context in managing tasks effectively, ensuring th...","Understanding the context of tasks is essential for effective project management, as it helps pr...",Dmitriy Grankin,"Sergey Ryabenko: Угу. Смотри, в верхнее, while you talk over Google Meet, мне кажется, будет иг..."
9,9,Sergey Ryabenko,person,A speaker in the meeting discussing the functionalities and features of a system that learns and...,"Sergey expresses concerns about the use of the term 'learn' in the context of the system, sugges...",Sergey Ryabenko,"Sergey Ryabenko: То, что она учится, ну то есть, ничего, ну это она учится. | Sergey Ryabenko: ..."


In [None]:
objects_df

Unnamed: 0,id,name,type
0,1,Adapting technology to user needs,idea
1,2,System that learns and preserves knowledge,solution
2,3,Task management feature,task
3,4,Effective meeting summarization,goal
4,5,Knowledge and task management integration,opportunity
5,6,Change management in project management,concern
6,7,Sergey Ryabenko,person
7,8,Dmitriy Grankin,person
8,9,knowledge management,concept
9,10,task management,concept


In [None]:
await read_table_async(Item)

NameError: name 'read_table_async' is not defined

In [None]:
pd.read_sql_table('items',engine)

[autoreload of psql_models failed: Traceback (most recent call last):
  File "/home/dima/anaconda3/envs/langchain/lib/python3.11/site-packages/IPython/extensions/autoreload.py", line 273, in check
    superreload(m, reload, self.old_objects)
  File "/home/dima/anaconda3/envs/langchain/lib/python3.11/site-packages/IPython/extensions/autoreload.py", line 496, in superreload
    update_generic(old_obj, new_obj)
  File "/home/dima/anaconda3/envs/langchain/lib/python3.11/site-packages/IPython/extensions/autoreload.py", line 393, in update_generic
    update(a, b)
  File "/home/dima/anaconda3/envs/langchain/lib/python3.11/site-packages/IPython/extensions/autoreload.py", line 331, in update_class
    if (old_obj == new_obj) is True:
        ^^^^^^^^^^^^^^^^^^
  File "/home/dima/anaconda3/envs/langchain/lib/python3.11/site-packages/sqlalchemy/sql/operators.py", line 582, in __eq__
    return self.operate(eq, other)
           ^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/dima/anaconda3/envs/langchain/

AttributeError: 'AsyncEngine' object has no attribute 'cursor'

In [None]:
pd.read_sql_table('meetings',engine)

Unnamed: 0,id,meeting_id,transcript,timestamp
0,1,2be605d6-d98d-475a-8969-838cb44a6fe9,"[{'speaker': 'Dmitriy Grankin', 'speaker_id': 'TBD', 'content': ' И хотел бы тебе показать ленди...",2024-09-16 13:09:25.985999


In [None]:
pd.read_sql_table('speakers',engine)

Unnamed: 0,id,name
0,1,Sergey Ryabenko
1,2,Dmitriy Grankin


In [None]:
from pydantic import BaseModel, Field
from core import BaseCall
from typing import List



In [None]:
r = await MeetingSummary.extract(formatted_input, final_df,use_cache=True)
print(r)

summary="The discussion focused on the new landing pages and their clarity and purpose for users (0). Concerns were raised about the system's adaptability to user needs, emphasizing the importance of using the term 'adapt' instead of 'learn' to avoid privacy issues (1). Data preservation and access control were identified as potential risks, with a need for users to manage their data (2). The necessity of task management features was highlighted, suggesting the system should provide reminders and action points after meetings (3). Summarizing meetings to eliminate unnecessary information was deemed crucial for effective communication (4). Knowledge management was recognized as essential for project management, ensuring that information remains current (5). The potential for automatic documentation updates based on discussions was proposed as a valuable feature (6). Sergey Ryabenko and Dmitriy Grankin emphasized the importance of a system that can adapt to user needs while managing tasks

In [None]:
from IPython.display import Markdown
Markdown(r.summary)

The discussion focused on the new landing pages and their clarity and purpose for users (0). Concerns were raised about the system's adaptability to user needs, emphasizing the importance of using the term 'adapt' instead of 'learn' to avoid privacy issues (1). Data preservation and access control were identified as potential risks, with a need for users to manage their data (2). The necessity of task management features was highlighted, suggesting the system should provide reminders and action points after meetings (3). Summarizing meetings to eliminate unnecessary information was deemed crucial for effective communication (4). Knowledge management was recognized as essential for project management, ensuring that information remains current (5). The potential for automatic documentation updates based on discussions was proposed as a valuable feature (6). Sergey Ryabenko and Dmitriy Grankin emphasized the importance of a system that can adapt to user needs while managing tasks effectively (7)(8). Concepts such as knowledge management (9), task management (10), and project management (11) were discussed, with a focus on cutting unnecessary information (12). The idea of a personal chat GPT that tailors responses to individual user needs was suggested (13). The accumulation of knowledge over time (14) and the need for a task board to visualize responsibilities were also mentioned (15). Adaptive learning, where the system adjusts based on user interactions, was considered beneficial (16).

In [None]:
r.meeting_name

'Project Management System Discussion'