In [1]:
import os
import sys
import re
import time
from pathlib import Path
import pdfplumber
import pytesseract
from pdf2image import convert_from_path
import warnings
import logging
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt

sys.path.append('../python')
warnings.filterwarnings('default')
logging.getLogger("pdfminer").setLevel(logging.ERROR)

import api


  from tqdm.autonotebook import tqdm


In [2]:
PROMPT = """
--- AGENDA ITEM ----

{}

--- MINUTES OF DISCUSSION ----

{}

--- PROMPT ----

I just gave you two documents related to a Los Angeles City Planning Commission (CPC) hearing.

The first document is the agenda item to be discussed, with requested actions.

The second document is the minutes of the discussion, the proposed motion by the CPC, the votes on the motion by the CPC members, and whether the motion ultimately passed.

Please return a response in the following format:

---- YOUR RESPONSE FORMAT ----
RELATED CASES:
<A comma separated list of relevant planning department case numbers>

SUMMARY OF AGENDA ITEM:
<A summary of the agenda item to be discussed>

SUMMARY OF CPC DELIBERATIONS:
<A summary of the deliberations of the CPC>

SUMMARY OF CPC MOTION:
<A summary of the motion voted on by the CPC>

ALIGNMENT OF MOTION TO PROPOSAL:
<Did the CPC vote to approve the agenda item, deny it, or continue the discussion to a future meeting?>

VOTE TOTAL:
<The vote total on the CPC motion. Return in the format of X-Y, where X is votes for and Y is votes against>

RESULT:
<Result of the vote on the motion. Either MOTION PASSED or MOTION FAILED>
"""

In [3]:
meta_df2 = pd.read_csv("../../intermediate_data/cpc/meetings_metadata.csv")

In [4]:
t0 = time.time()
for i, row in meta_df2.iterrows():
    date = row['date']
    year = row['year']
    print(date)
    agenda_df = pd.read_pickle(f"../../intermediate_data/cpc/{year}/{date}/agenda-items.pkl")
    minutes_df = pd.read_pickle(f"../../intermediate_data/cpc/{year}/{date}/minutes-items.pkl")
    df = []
    for j, row2 in agenda_df.iterrows():
        item_no = row2['item_no']
        title = row2['title']
        is_casenum = row2['is_casenum']
        if is_casenum:
            print(f"{item_no}... ", end='')
            output_dir = Path(f'../../intermediate_data/cpc/{year}/{date}/minutes-summaries')
            output_dir.mkdir(parents=True, exist_ok=True)
            agenda_content = row2['content']
            minutes_content = minutes_df.loc[minutes_df['item_no']==item_no].iloc[0]['content']
            prompt = PROMPT.format(agenda_content, minutes_content)
            response, score = api.get_response(prompt)
            df.append({
                'year': year,
                'date': date,
                'item_no': item_no,
                'title': title,
                'prompt': prompt,
                'response': response,
                'score': score
            })
            with open(f"../../intermediate_data/cpc/{year}/{date}/minutes-summaries/{item_no}.txt", 'w') as f:
                f.write(response)
    print('')
    df = pd.DataFrame.from_dict(df)
    df.to_pickle(f"../../intermediate_data/cpc/{year}/{date}/minutes-summaries.pkl")
            
t1 = time.time()
print(f"Elapsed time: {(t1-t0)/60} minutes.")


2018-05-10
5a... 6... 7... 8... 9... 10... 
2018-05-23
5a... 6... 7... 8... 9... 10... 11... 
2018-06-14
5a... 6... 7... 8... 9... 10... 11... 
2018-07-12
7... 8... 9... 10... 11... 
2018-07-26
7... 8... 
2018-08-09
6... 7... 8... 9... 10... 11... 12... 13... 
2018-08-23
6... 7... 
2018-09-13
5b... 6... 7... 8... 9... 10... 11... 12... 13... 
2018-09-27
6... 7... 8... 9... 10... 
2018-10-11
6... 7... 8... 9... 10... 11... 
2018-10-25
7... 8... 9... 10... 
2018-11-08
6... 7... 8... 9... 10... 11... 
2018-11-29
7... 8... 
2018-12-13
7... 8... 
2018-12-20
7... 
2019-01-10
7... 8... 9... 
2019-01-24
5b... 6... 7... 8... 9... 10... 11... 
2019-02-14
7... 8... 9... 10... 
2019-02-28
5b... 6... 7... 8... 
2019-03-14
5b... 6... 7... 8... 9... 10... 11... 12... 13... 
2019-03-28
5b... 5c... 6... 7... 8... 9... 10... 
2019-04-11
7... 8... 9... 
2019-05-09
5b... 6... 7... 8... 9... 10... 11... 12... 
2019-05-23
7... 8... 9... 10... 
2019-06-13
6... 7... 8... 9... 
2019-06-27
7... 8... 9... 
2019-