Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
new script to get total number of pages/words enacted by Congress, pl…
…us adding a note to the statistics page about how counting bills isn't interesting
- Loading branch information
Showing
6 changed files
with
152 additions
and
22 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
#!script | ||
|
||
from collections import defaultdict | ||
import csv | ||
import sys | ||
|
||
from django.db.models import Count | ||
|
||
from bill.models import * | ||
|
||
# Collection congress/type/status pairs. | ||
data = Bill.objects.filter(congress__gte=93).values("congress", "bill_type", "current_status").annotate(count=Count('id')) | ||
data = list(data) # fetch all | ||
|
||
# Replace numeric bill type and status with enum value and get the domain of statuses. | ||
all_statuses = set() | ||
all_bill_types = set() | ||
for rec in data: | ||
rec["bill_type"] = BillType.by_value(rec["bill_type"]) | ||
rec["current_status"] = BillStatus.by_value(rec["current_status"]) | ||
all_statuses.add(rec["current_status"]) | ||
all_bill_types.add(rec["bill_type"]) | ||
|
||
# Sort statuses in our canonical order. | ||
all_statuses = sorted(all_statuses, key = lambda status : status.sort_order) | ||
|
||
# Form a matrix. | ||
matrix = defaultdict(lambda : 0) | ||
for rec in data: | ||
matrix[(rec["congress"], rec["bill_type"], rec["current_status"])] += 1 | ||
|
||
# Output. | ||
W = csv.writer(sys.stdout) | ||
W.writerow(["congress", "bill type"] + [status.key for status in all_statuses]) | ||
for congress in range(min(rec["congress"] for rec in data), max(rec["congress"] for rec in data)+1): | ||
for bill_type in all_bill_types: | ||
W.writerow([congress, bill_type.label] + [matrix[(congress, bill_type, status)] for status in all_statuses]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
import glob | ||
import json | ||
import re | ||
|
||
# Iterate over all bills... | ||
for fn in glob.glob("data/congress/11*/bills/*/*/data.json"): | ||
with open(fn) as f: | ||
bill = json.load(f) | ||
for title in bill.get("titles", []): | ||
title = title["title"] | ||
|
||
# Okay, now the fun part... | ||
|
||
# Does it start with a two-or-more capital letter sequence + space? | ||
m = re.match(r"^([A-Z]{2,})(.*?)( Act(?: of \d\d\d\d)?)?$", title) | ||
if not m: continue | ||
acronym, remainder, act_of_year = m.groups() | ||
remainder = remainder.strip() | ||
|
||
# The remainder must be at least as long as the acronym (after the first letter). | ||
if len(remainder) <= len(acronym)-1: continue | ||
|
||
# Does the potential acronym match the remainder of the title? | ||
# Each letter in the acronym, after the first (which matches the | ||
# acronym itself, if it's recursive) must match another letter | ||
# in the title. Normally it must match on capital letters, but | ||
# that's too strict. Every capital letter in the title must match, | ||
# and other lowercase letters and the "A" in "Act (of YYYY)" may | ||
# also be used to match. | ||
remainder_re = re.split("([A-Z])", remainder) | ||
remainder_re = [r for r in remainder_re if len(r.strip()) > 0] | ||
remainder_re = "".join( | ||
r if re.match("[A-Z]$", r) | ||
else "[" + "".join(re.escape(c) for c in r if c != " ") + "]*" | ||
for r in remainder_re | ||
) | ||
if re.match("^" + remainder_re + "A?$", acronym[1:], re.I): | ||
print(title) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
#!script | ||
|
||
from django.db.models import Count | ||
from bill.models import Bill | ||
import csv, sys | ||
|
||
counts = { } | ||
for b in Bill.objects.filter( | ||
#introduced_date__gte="2019-01-01" | ||
introduced_date__gte="2017-01-01", introduced_date__lte="2017-08-06" | ||
).only("sponsor"): | ||
counts[b.sponsor] = counts.get(b.sponsor, 0) + 1 | ||
|
||
counts = sorted([[v, k] for (k, v) in counts.items()], key = lambda kv : -kv[0]) | ||
|
||
W = csv.writer(sys.stdout) | ||
for count, person in counts: | ||
W.writerow([person, count]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters