# Build Structured Dataset from Meeting Notes

In [42]:
# Import necessary libraries
from openai import OpenAI
import pandas as pd
import json

client = OpenAI()

## Meeting Notes data

In [43]:
# Sample DataFrame structure: `meeting_id` and `meeting_notes`
df_meeting_notes = pd.DataFrame({
    'meeting_id': ['001', '002'],
    'meeting_notes': [
        """
        Discussed project deadlines. John is responsible for creating the project timeline, and it's due by September 15th. 
        Sarah will handle client communication, and she needs to send the initial report by September 20th. 
        The budget report will be prepared by Michael, but there's no set deadline yet.
        """,
        """
        The website redesign is in progress. Emily will create the new layout by October 1st. 
        Tom will review the SEO strategy by September 30th. 
        We need to finalize the new logo, and James is in charge, but no date has been set.
        """
    ]
})
df_meeting_notes

Unnamed: 0,meeting_id,meeting_notes
0,1,\n Discussed project deadlines. John is...
1,2,\n The website redesign is in progress....


In [44]:
print(df_meeting_notes['meeting_notes'][0])


        Discussed project deadlines. John is responsible for creating the project timeline, and it's due by September 15th. 
        Sarah will handle client communication, and she needs to send the initial report by September 20th. 
        The budget report will be prepared by Michael, but there's no set deadline yet.
        


## Extract Action Items from Meeting Notes

In [55]:
class MeetingNotesProcessor:
    def __init__(self, dataframe):
        self.df = dataframe

    def extract_action_items(self, meeting_notes):
        # Simulated response from OpenAI API (replace with actual API call)
        # The client API call should be placed here
        response = client.chat.completions.create(
            model="gpt-4o-2024-08-06",
            messages=[
                {
                "role": "system",
                "content": "Extract action items, due dates, and owners from meeting notes."
                },
                {
                "role": "user",
                "content": meeting_notes
                }
            ],
            response_format={
                "type": "json_schema",
                "json_schema":{
                "name": "action_items",
                "strict": True,
                "schema": {
                    "type": "object",
                    "properties": {
                    "action_items": {
                        "type": "array",
                        "items": {
                        "type": "object",
                        "properties": {
                            "description": {
                            "type": "string",
                            "description": "Description of the action item."
                            },
                            "due_date": {
                            "type": ["string", "null"],
                            "description": "Due date for the action item, can be null if not specified."
                            },
                            "owner": {
                            "type": ["string", "null"],
                            "description": "Owner responsible for the action item, can be null if not specified."
                            }
                        },
                        "required": ["description", "due_date", "owner"],
                        "additionalProperties": False
                        },
                        "description": "List of action items from the meeting."
                    }
                    },
                    "required": ["action_items"],
                    "additionalProperties": False
                }
                }
            }
        )
        # Extract the action items from the response
        json_content = response.choices[0].message.content
        parsed_json = json.loads(json_content)
        return parsed_json['action_items']

    def process_notes(self):
        # Apply the extract_action_items function to populate action_items
        self.df['action_items'] = self.df['meeting_notes'].apply(self.extract_action_items)
        
        # Normalize the 'action_items' column and explode the lists into separate rows
        exploded_df = self.df.explode('action_items')
        action_items_df = pd.json_normalize(exploded_df['action_items'])
        
        # Concatenate action_items_df with the original exploded DataFrame
        self.df = pd.concat([exploded_df.reset_index(drop=True), action_items_df], axis=1)

        # Drop the old action_items column
        self.df = self.df.drop(columns=['action_items'])


## Build Structured Dataset

In [56]:
# Create an instance of the class and process the notes
processor = MeetingNotesProcessor(df_meeting_notes)
processor.process_notes()

In [57]:
processor.df

Unnamed: 0,meeting_id,meeting_notes,description,due_date,owner
0,1,\n Discussed project deadlines. John is...,Create the project timeline.,September 15th,John
1,1,\n Discussed project deadlines. John is...,Send the initial client communication report.,September 20th,Sarah
2,1,\n Discussed project deadlines. John is...,Prepare the budget report.,,Michael
3,2,\n The website redesign is in progress....,Create the new website layout,October 1st,Emily
4,2,\n The website redesign is in progress....,Review the SEO strategy,September 30th,Tom
5,2,\n The website redesign is in progress....,Finalize the new logo,,James
