In [1]:
import os
import sys
import re
import time
from pathlib import Path
import pdfplumber
import pytesseract
from pdf2image import convert_from_path
import warnings
import logging
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt

sys.path.append('../python')
warnings.filterwarnings('default')
logging.getLogger("pdfminer").setLevel(logging.ERROR)

import api

rng = np.random.default_rng(12898)


  from tqdm.autonotebook import tqdm


In [2]:
PROMPT = """
==== LIST OF AGENDA ITEM SUMMARIES ====

{}

==== DOCUMENT ====

{}

==== PROMPT ====

I just gave you a list of agenda items from a LA City Planning Commission meeting, followed by a document submitted to that meeting. 

Return a response in the following format:


==== YOUR RESPONSE FORMAT ====

TYPE OF DOCUMENT:
<Is the submitted document a letter, a procedural matter, a scientific report, a CV/biography, or other type of document?>

TYPE OF AUTHOR:
<Is the author of the submitted document an individual, an advocacy group, a consultant, a lawyer, a developer, a public official, or other?>

SUMMARY OF DOCUMENT:
<Summarize the contents of the document.>

REFERENCED AGENDA ITEMS:
<List the agenda items, as a comma delimited list of item numbers, that the submitted document references or is relevant to. If none, say none.>

SUPPORT OR OPPOSE:
<Does the submitted document support or oppose the referenced agenda items? Say Definitely Support, Somewhat Support, Definitely Oppose, Somewhat Oppose, Neutral, or Not Relevant.>
"""


In [3]:
meta_df = pd.read_csv("../../intermediate_data/cpc/meetings_metadata.csv")

In [4]:
t0 = time.time()
for i, irow in meta_df.iterrows():
    date = irow['date']
    year = irow['year']
    print(date)

    agenda_file = f"../../intermediate_data/cpc/{year}/{date}/minutes-summaries.pkl"
    docs_file = f"../../intermediate_data/cpc/{year}/{date}/supplemental-docs.pkl"
    output_file = f"../../intermediate_data/cpc/{year}/{date}/supplemental-docs-summaries.pkl"

    if (not os.path.exists(agenda_file)) or (not os.path.exists(docs_file)):
        continue

    out_df = []

    agenda_df = pd.read_pickle(f"../../intermediate_data/cpc/{year}/{date}/minutes-summaries.pkl")
    minutes = ""
    for j, jrow in agenda_df.iterrows():
        item_no = jrow['item_no']
        item_title = jrow['title']
        minutes_text = jrow['response'].replace("---- YOUR RESPONSE FORMAT ----\n\n","")
        minutes += f"ITEM NO. {item_no}\n{item_title}\n\n"
        minutes += minutes_text
        minutes += "\n\n------------\n\n"

    docs_df = pd.read_pickle(f"../../intermediate_data/cpc/{year}/{date}/supplemental-docs.pkl")
    docs_df['skipped'] = docs_df['content'].str.startswith('SKIPPED')
    for j, jrow in docs_df.loc[~docs_df['skipped']].iterrows():
        doc_id = jrow['doc_id']
        start_page = jrow['start_page']
        end_page = jrow['end_page']
        content = jrow['content']
        prompt = PROMPT.format(minutes, content)
        response, score = api.get_response(prompt)
        out_df.append({
            'year': year,
            'date': date,
            'doc_id': doc_id,
            'start_page': start_page,
            'end_page': end_page,
            'prompt': prompt,
            'response': response,
            'score': score
        })
        print(f"{end_page}... ", end='')

    out_df = pd.DataFrame.from_dict(out_df)
    out_df.to_pickle(output_file)
    print('')
t1 = time.time()
print(f"Elapsed time: {(t1-t0)/60} minutes.")


2018-05-10
1... 2... 8... 9... 10... 174... 176... 178... 179... 188... 189... 193... 207... 208... 209... 210... 211... 212... 213... 214... 215... 216... 218... 219... 
2018-05-23
1... 2... 3... 4... 6... 7... 20... 30... 31... 
2018-06-14
1... 2... 3... 5... 7... 9... 17... 21... 24... 31... 43... 47... 51... 61... 62... 66... 71... 77... 78... 80... 81... 
2018-07-12
1... 2... 3... 4... 9... 12... 13... 16... 21... 23... 25... 26... 34... 38... 43... 50... 59... 64... 66... 67... 68... 69... 71... 74... 75... 
2018-07-26
1... 2... 4... 5... 6... 7... 8... 9... 10... 11... 12... 14... 15... 16... 17... 18... 19... 20... 21... 22... 23... 24... 25... 26... 27... 30... 31... 32... 35... 36... 37... 39... 40... 41... 42... 43... 44... 45... 46... 47... 48... 49... 
2018-08-09
1... 2... 6... 10... 12... 14... 23... 29... 30... 31... 32... 33... 34... 57... 66... 67... 89... 90... 133... 134... 135... 136... 140... 141... 143... 144... 148... 149... 150... 152... 153... 154... 155... 
20

In [5]:
i=5
print(out_df.loc[i, 'prompt'])
print('')
print('')
print(out_df.loc[i, 'response'])


==== LIST OF AGENDA ITEM SUMMARIES ====

ITEM NO. 5a
CPC-2023-7708-DB-CDO-HCA

RELATED CASES:
CPC-2023-7708-DB-CDO-HCA, ENV-2023-7709-CE

SUMMARY OF AGENDA ITEM:
The agenda item concerns the construction, use, and maintenance of a new, three-story over one basement-story mixed-use building totaling 18,799 square feet at 4002 – 4006 North Verdugo Road and 3067 North Delevan Drive. The project includes 14 dwelling units, with two units set aside for Very Low Income Households, and a ground-floor commercial space. The project requests a Density Bonus and several Off-Menu Incentives and Waivers of Development Standards, including increased building height, increased Floor Area Ratio (FAR), and reduced yard setbacks. The project is exempt from CEQA under the Infill Development category.

SUMMARY OF CPC DELIBERATIONS:
The CPC deliberations involved reviewing the proposed project, its compliance with the City's Density Bonus program, and the requested Off-Menu Incentives and Waivers of Devel

In [7]:
print(out_df.loc[17, 'response'])


==== YOUR RESPONSE FORMAT ====

TYPE OF DOCUMENT:
Letter

TYPE OF AUTHOR:
Advocacy group

SUMMARY OF DOCUMENT:
The document is a letter from the California Housing Defense Fund (CalHDF) addressed to the Los Angeles City Planning Commission. It emphasizes the Commission's obligation to comply with state laws, such as the Housing Accountability Act (HAA), Density Bonus Law (DBL), and California Environmental Quality Act (CEQA) Guidelines, when evaluating the proposed 52-unit housing development project at 5201 and 5211 West Venice Boulevard. The letter argues that the project is legally protected under these laws and should be approved unless specific health and safety hazards are identified. It highlights the project's compliance with local zoning and general plans, its exemption from CEQA under the Class 32 categorical exemption, and the public benefits of providing affordable housing. The letter urges the Commission to approve the project in line with state law obligations.

REFERENC