In [2]:
import os
import sys
import time
import yaml
import pandas as pd
import numpy as np
import re

with open('../../config.local.yaml', 'r') as f:
    local_config = yaml.safe_load(f)

LOCAL_PATH = local_config['LOCAL_PATH']
DATA_PATH = os.path.join(LOCAL_PATH, "intermediate_data/cpc")

sys.path.append(os.path.join(LOCAL_PATH, "src/python"))

import data_tools as dt

rng = np.random.default_rng(12898)

RESULTS = {}

MANIFEST = pd.read_csv(os.path.join(DATA_PATH, 'meetings-manifest.csv'))


In [3]:
df = dt.get_analysis_data()
dfa = dt.get_agenda_items()

No data found for 2021-01-14
No data found for 2022-03-17
No data found for 2022-10-13


In [4]:
# Basic info

NUMBER_OF_MEETINGS = df['date'].nunique()
FIRST_MEETING_DATE = pd.to_datetime(df['date']).min()
LAST_MEETING_DATE = pd.to_datetime(df['date']).max()
NUMBER_OF_YEARS = LAST_MEETING_DATE.year - FIRST_MEETING_DATE.year + 1
NUMBER_OF_CASES = len(df)

RESULTS['NumberOfMeetings'] = f"{NUMBER_OF_MEETINGS}"
RESULTS['FirstMeetingDate'] = FIRST_MEETING_DATE.strftime('%Y-%m-%d')
RESULTS['LastMeetingDate'] = LAST_MEETING_DATE.strftime('%Y-%m-%d')
RESULTS['NumberOfYears'] = f"{NUMBER_OF_YEARS}"
RESULTS['NumberOfCases'] = f"{NUMBER_OF_CASES:,.0f}"

In [9]:
# Number of agenda items (total, including non-cases)

AGENDA_ITEMS = 0
for date in df['date'].unique():
    filename = os.path.join(DATA_PATH, date[0:4], date, 'agenda-items.pkl')
    mydf = pd.read_pickle(filename)
    AGENDA_ITEMS += len(mydf)

RESULTS['NumberOfAgendaItems'] = f"{AGENDA_ITEMS:,.0f}"

In [6]:
# Number of supplemental documents

SUPPLEMENTAL_DOCS = 0
for date in df['date'].unique():
    filename = os.path.join(DATA_PATH, date[0:4], date, 'supplemental-docs.pkl')
    mydf = pd.read_pickle(filename)
    SUPPLEMENTAL_DOCS += len(mydf)

RESULTS['NumberOfSupplementalDocs'] = f"{SUPPLEMENTAL_DOCS:,.0f}"

In [7]:
# Page counts

PAGE_COUNT = 0
for date in df['date'].unique():
    row = MANIFEST.loc[MANIFEST['date'] == date].iloc[0]
    PAGE_COUNT += row['agenda_pages'] + row['minutes_pages'] + row['supdocs_pages']

RESULTS['PageCount'] = f"{PAGE_COUNT:,.0f}"
    

In [8]:
dt.update_results(RESULTS)

{'NumberOfMeetings': '153',
 'FirstMeetingDate': '2018-05-10',
 'LastMeetingDate': '2024-12-19',
 'NumberOfCases': '727',
 'PageCount': '23,633',
 'NumberOfYears': '7',
 'NumberOfAgendaItems': '1,497',
 'NumberOfSupplementalDocs': '6,447'}

In [26]:
df['title'].sample(1)

399    CPC-2020-6050-CU-ZV-F-WDI
Name: title, dtype: object

In [33]:
date = '2024-12-19'
df.loc[(df['date'] == date) & (df['item_no']=='7'), 'project_result']

724    APPROVED
Name: project_result, dtype: object

In [35]:
df['sortvec'] = df['n__support']*df['n__oppose']
df.sort_values(by='sortvec', ascending=False).head(15)

Unnamed: 0,year,date,item_no,title,related_cases,council_district,agenda_content,agenda_perplexity,agenda_order,num_agenda_items,...,sfx_SPP,sfx_AMDT2,sfx_CUW,sfx_HD,sfx_SN,sfx_M3,sfx_PA2,sfx_ELD,sfx_DB,sortvec
697,2024,2024-09-26,7,CPC-2023-7068-CA,"CPC-2023-7068-CA, ENV-2020-6762-EIR, ENV-2020-...",CITYWIDE,7. CPC-2023-7068-CA ...,1.07321,2,3,...,0,0,0,0,0,0,0,0,0,23901
515,2022,2022-11-17,9,CPC-2022-3413-CA,"CPC-2022-3413-CA, CPC-2022-3712-ZC","4, 5",9. *CPC-2022-3413-CA ...,1.140391,5,6,...,0,0,0,0,0,0,0,0,0,13688
516,2022,2022-11-17,10,CPC-2022-3712-ZC,CPC-2022-3413-CA,"4, 5",10. *CPC-2022-3712-ZC ...,1.080483,6,6,...,0,0,0,0,0,0,0,0,0,11328
257,2020,2020-05-28,9,CPC-2019-6069-CU-DB-CDP-CDO-SPP-MEL-WDI,"CPC-2019-6069-CU-DB-CDP-CDO-SPP-MEL-WDI, ENV-2...",11,9. CPC-2019-6069-CU-DB-CDP-CDO-SPP-ME...,1.062578,4,9,...,1,0,0,0,0,0,0,0,1,3996
300,2020,2020-10-08,6,DIR-2019-6048-TOC-SPR-WDI-1A,"DIR-2019-6048-TOC-SPR-WDI-1A, ENV-2016-273-MND...",1,6. DIR-2019-6048-TOC-SPR-WDI-1A ...,1.119763,3,6,...,0,0,0,0,0,0,0,0,0,2808
337,2021,2021-02-18,6,CPC-2016-1450-CPU,"CPC-2016-1450-CPU, ENV-2016-1451-EIR","4, 5, 13",6. CPC-2016-1450-CPU Coun...,1.108964,1,1,...,0,0,0,0,0,0,0,0,0,2070
517,2022,2022-12-08,6,CPC-2022-3413-CA,"CPC-2022-3413-CA, CPC-2022-3712-ZC","4, 5",6. CPC-2022-3413-CA ...,1.131911,1,6,...,0,0,0,0,0,0,0,0,0,1806
375,2021,2021-06-17,6,CPC-2017-432-CPU,"CPC-2017-432-CPU, CPC-2014-1582-CA",CITYWIDE,6. CPC-2017-432-CPU ...,1.207942,1,1,...,0,0,0,0,0,0,0,0,0,1632
518,2022,2022-12-08,7,CPC-2022-3712-ZC,"CPC-2022-3712-ZC, CPC-2022-3413-CA","4, 5",7. CPC-2022-3712-ZC ...,1.081877,2,6,...,0,0,0,0,0,0,0,0,0,1218
368,2021,2021-05-27,10,CPC-2018-7344-GPAJ-VZCJ-HD-SP-SPP-CDP-MEL-SPR-PHP,CPC-2018-7344-GPAJ-VZCJ-HD-SP-SPP-CDP-MEL-SPR-...,11,10. CPC-2018-7344-GPAJ-VZCJ-HD-SP-SPP-...,1.155764,5,6,...,1,0,0,1,0,0,0,0,0,946


In [32]:
print(list(df.columns))

['year', 'date', 'item_no', 'title', 'related_cases', 'council_district', 'agenda_content', 'agenda_perplexity', 'agenda_order', 'num_agenda_items', 'consent_calendar', 'minutes_content', 'minutes_perplexity', 'agenda_item_summary', 'deliberations_summary', 'motion_summary', 'appealed', 'relevant_laws', 'num_referenced_laws', 'moved', 'seconded', 'ayes', 'nays', 'abstained', 'recused', 'absent', 'vote_result', 'n_ayes', 'n_nays', 'n_abstained', 'n_recused', 'n_absent', 'appeal_result', 'project_result', 'n__support', 'n__oppose', 'n_individual_support', 'n_individual_oppose', 'n_official_support', 'n_official_oppose', 'n_other_support', 'n_other_oppose', 'sfx_MSP', 'sfx_BL', 'sfx_SUD', 'sfx_CUX', 'sfx_ZV', 'sfx_WDI', 'sfx_CN', 'sfx_PHP', 'sfx_SPR', 'sfx_SPPC', 'sfx_VZCJ', 'sfx_CU', 'sfx_ZC', 'sfx_CUB', 'sfx_BSA', 'sfx_PMEX', 'sfx_PMLA', 'sfx_ZAA', 'sfx_CCMP', 'sfx_CPU', 'sfx_PAB', 'sfx_CDO', 'sfx_SP', 'sfx_ACI', 'sfx_CDP', 'sfx_1A', 'sfx_TDR', 'sfx_GPA', 'sfx_MSC', 'sfx_PSH', 'sfx_CPIO