In [1]:
import os
import sys
import time
import yaml
import pandas as pd
import numpy as np

with open('../../config.local.yaml', 'r') as f:
    local_config = yaml.safe_load(f)

LOCAL_PATH = local_config['LOCAL_PATH']

sys.path.append(os.path.join(LOCAL_PATH, "src/python"))

from llm import get_response

LLM_OVERWRITE = False

In [2]:
meetings_df = pd.read_csv(os.path.join(LOCAL_PATH, "intermediate_data/cpc/meetings-manifest.csv"))
DATES = sorted(list(meetings_df['date']))

In [3]:
PROMPT = """
==== LIST OF AGENDA ITEMS ====

{}

==== DOCUMENT ====

{}

==== PROMPT ====

I just gave you a list of agenda items from a LA City Planning Commission meeting, followed by a document submitted to that meeting. 

Return a response in the following format:


==== YOUR RESPONSE FORMAT ====

TYPE OF DOCUMENT:
<What type of document is it? Your only options are: LETTER OR PETITION, TECHNICAL MODIFICATION OR PROCEDURAL MATTER, SCIENTIFIC OR TECHNICAL REPORT, CV OR BIOGRAPHY, CORRUPTED/ILLEGIBLE/BLANK, TITLE OR SECTION HEADING, OTHER.>

TYPE OF AUTHOR:
<What type of entity wrote the document? Your only options are: INDIVIDUAL, ADVOCACY GROUP, CONSULTANT, LAWYER, DEVELOPER, PUBLIC OFFICIAL, OTHER.>

SUMMARY OF DOCUMENT:
<Summarize the contents of the document.>

REFERENCED AGENDA ITEMS:
<List the agenda items, as a comma delimited list of item numbers, that the submitted document references or is relevant to. If none, say NONE.>

SUPPORT OR OPPOSE:
<Does the submitted document support or oppose the referenced agenda items? Your only options are: DEFINITELY SUPPORT, SOMEWHAT SUPPORT, DEFINITELY OPPOSE, SOMEWHAT OPPOSE, NEUTRAL, NOT RELEVANT.>
"""


In [4]:
t0 = time.time()
for date in DATES:
    year = date[0:4]
    PATH = os.path.join(LOCAL_PATH, f"intermediate_data/cpc/{year}/{date}")
    print(date)

    agenda_file = os.path.join(PATH, 'agenda-items.pkl')
    docs_file = os.path.join(PATH, 'supplemental-docs.pkl')
    output_file = os.path.join(PATH, 'supplemental-docs-summaries.pkl')

    if (not os.path.exists(agenda_file)) or (not os.path.exists(docs_file)):
        continue

    out_df = []

    agenda_df = pd.read_pickle(agenda_file)
    agenda = ""
    for j, jrow in agenda_df.iterrows():
        item_no = jrow['item_no']
        item_title = jrow['title']
        is_casenum = jrow['is_casenum']
        if is_casenum:
            text = jrow['content']
            agenda += f"ITEM NO. {item_no}\n{item_title}\n\n"
            agenda += text
            agenda += "\n\n--------\n\n"
        
    docs_df = pd.read_pickle(docs_file)
    docs_df['skipped'] = docs_df['content'].str.startswith('SKIPPED')
    for j, jrow in docs_df.loc[~docs_df['skipped']].iterrows():
        doc_id = jrow['doc_id']
        start_page = jrow['start_page']
        end_page = jrow['end_page']
        content = jrow['content']
        prompt = PROMPT.format(agenda, content)
        response = get_response(prompt, overwrite=LLM_OVERWRITE)
        msg = response['message']
        perplexity = response['perplexity']
        out_df.append({
            'year': year,
            'date': date,
            'doc_id': doc_id,
            'start_page': start_page,
            'end_page': end_page,
            'prompt': prompt,
            'response': msg,
            'perplexity': perplexity
        })
        print(f"{end_page}... ", end='')

    out_df = pd.DataFrame.from_dict(out_df)
    out_df.to_pickle(output_file)
    print('')
t1 = time.time()
print(f"Elapsed time: {(t1-t0)/60} minutes.")


2018-05-10
1... 2... 8... 9... 10... 174... 176... 178... 179... 188... 189... 193... 207... 208... 209... 210... 211... 212... 213... 214... 215... 216... 218... 219... 
2018-05-23
1... 2... 3... 4... 6... 7... 20... 30... 31... 
2018-06-14
1... 2... 3... 5... 7... 9... 17... 21... 24... 31... 43... 47... 51... 61... 62... 66... 71... 77... 78... 80... 81... 
2018-07-12
1... 2... 3... 4... 9... 12... 13... 16... 21... 23... 25... 26... 34... 38... 43... 50... 59... 64... 66... 67... 68... 69... 71... 74... 75... 
2018-07-26
1... 2... 4... 5... 6... 7... 8... 9... 10... 11... 12... 14... 15... 16... 17... 18... 19... 20... 21... 22... 23... 24... 25... 26... 27... 30... 31... 32... 35... 36... 37... 39... 40... 41... 42... 43... 44... 45... 46... 47... 48... 49... 
2018-08-09
1... 2... 6... 10... 12... 14... 23... 29... 30... 31... 32... 33... 34... 57... 66... 67... 89... 90... 133... 134... 135... 136... 140... 141... 143... 144... 148... 149... 150... 152... 153... 154... 155... 
20

In [5]:
i=3
print(out_df.loc[i, 'prompt'])
print('')
print('')
print(out_df.loc[i, 'response'])


==== LIST OF AGENDA ITEMS ====

ITEM NO. 6
CPC-2022-7045-CU3-DB-DRB-SPPC-HCA

     6.  CPC-2022-7045-CU3-DB-DRB-SPPC-HCA                Council District: 5 – Yaroslavsky
         CEQA: ENV-2022-7046-CE                                Last Day to Act: 12-19-24
         Plan Area: Westwood                                  Continued from: 10-24-24
                                                                                    
         PUBLIC HEARING – Completed July 17, 2024                                   
                                                                                    
         PROJECT SITE:  1515 South Veteran Avenue (1505, 1507, 1509, 1511, 1513,    
                        1517, 1519, 1521, and 1523 South Veteran Avenue)            
                                                                                    
         PROPOSED PROJECT:                                                          
         Demolition of three existing multi-family dwellin