In [1]:
import os
import sys
import re
from pathlib import Path
import pdfplumber
import pytesseract
from pdf2image import convert_from_path
import warnings
import logging
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt

sys.path.append('../python')
warnings.filterwarnings('default')
logging.getLogger("pdfminer").setLevel(logging.ERROR)

import api


  from tqdm.autonotebook import tqdm


In [2]:
meta_df2 = pd.read_csv("../../intermediate_data/cpc/meetings_metadata.csv")

In [3]:
meta_df2['total_supplemental_docs'] = 0
for idx, row in meta_df2.iterrows():
    year = row['year']
    date = row['date']
    if row['supplemental_pages']>0:
        supplemental_docs_df = pd.read_pickle(f"../../intermediate_data/cpc/{year}/{date}/supplemental-docs.pkl")
        meta_df2.loc[idx, 'total_supplemental_docs'] = len(supplemental_docs_df)    

In [4]:
meta_df2['total_agenda_items'] = 0
for idx, row in meta_df2.iterrows():
    year = row['year']
    date = row['date']
    has_all = row['has_all']
    if has_all:
        agenda_items_df = pd.read_pickle(f"../../intermediate_data/cpc/{year}/{date}/agenda-items.pkl")
        meta_df2.loc[idx, 'total_agenda_items'] = len(agenda_items_df)
        meta_df2.loc[idx, 'total_agenda_cases'] = len(agenda_items_df.loc[agenda_items_df['is_casenum']])

In [5]:
idx = meta_df2['has_all']
n_meetings = len(meta_df2.loc[idx])
n_years = len(meta_df2.loc[idx,'year'].unique())
min_year = meta_df2.loc[idx,'year'].astype('int').min()
max_year = meta_df2.loc[idx,'year'].astype('int').max()
n_supplemental_pages = meta_df2.loc[idx, 'supplemental_pages'].sum()
n_supplemental_docs = meta_df2.loc[idx, 'total_supplemental_docs'].sum()
n_agenda_items = meta_df2.loc[idx, 'total_agenda_items'].sum()
n_agenda_cases = meta_df2.loc[idx, 'total_agenda_cases'].sum()
n_pages = meta_df2.loc[idx, 'total_pages'].sum()

print(f"{n_meetings} meetings")
print(f"across {n_years} years from {min_year} to {max_year}")
print(f"totaling {n_agenda_items:,g} agenda items")
print(f"and {n_agenda_cases:,g} planning department cases on the agenda")
print(f"and {n_supplemental_docs:,g} supplemental documents across {n_supplemental_pages:,g} pages")
print(f"total pages of documents: {n_pages:,g}")


150 meetings
across 7 years from 2018 to 2024
totaling 1,442 agenda items
and 700 planning department cases on the agenda
and 6,423 supplemental documents across 21,189 pages
total pages of documents: 23,430


In [10]:
date = meta_df2.sample(1).iloc[0]['date']
year = date[0:4]
agenda_df = pd.read_pickle(f"../../intermediate_data/cpc/{year}/{date}/agenda-items.pkl")
minutes_df = pd.read_pickle(f"../../intermediate_data/cpc/{year}/{date}/minutes-items.pkl")

item_no = agenda_df.loc[agenda_df['is_casenum']].sample(1).iloc[0]['item_no']
agenda_content = agenda_df.loc[agenda_df['item_no']==item_no].iloc[0]['content']
minutes_content = minutes_df.loc[minutes_df['item_no']==item_no].iloc[0]['content']

print(date)
print('-----')
print('AGENDA')
print('')
print(agenda_content)
print('')
print('-----')
print('MINUTES')
print('')
print(minutes_content)

2021-11-04
-----
AGENDA

       7.   VTT-82107-1A                                   Council District: 5 – Koretz
            CEQA: ENV-2019-5735-SCEA                       **Last Day to Act: 11-05-21
            Council File No. 20-1624                                                
            Plan Area: Westwood                                                     
            Related Cases: ZA-2018-3422-ELD-CU-DRB-SPP-SPR-1A;                      
                       DIR-2020-3896-DRB-SPP; DIR-2020-3896-DRB-SPP-P               
                                                                                    
            PUBLIC HEARING REQUIRED                                                 
                                                                                    
            PROJECT SITE:   10822 West Wilshire Boulevard and 10812 West Ashton Avenue
                                                                                    
            PROPOSED PROJECT:    

In [None]:
PROMPT = 