In [None]:
#  Analysis of the most effective legislators
# In this section, we will see who the most effective legislators are. The output shows legislator name, number of bills proposed, number of bills passed, proportion of proposed bills which pass, and the avg days per bill

In [None]:
import pandas as pd
import re

# Load dataset
file_path = "legislation_data.csv"
df = pd.read_csv(file_path)

# Function to extract the first date from the history column
def extract_date(history):
    if pd.isna(history):
        return None
    match = re.search(r"(\d{1,2}/\d{1,2}/\d{4})", history)
    return pd.to_datetime(match.group(1)) if match else None

# Function to determine if a bill progressed beyond committee
def bill_progressed(history):
    if pd.isna(history):
        return False  # No history means we assume it didn't progress
    return not bool(re.search(r"Referred to the committee", history, re.IGNORECASE))

# Function to extract the date of passage
def extract_passage_date(history):
    if pd.isna(history):
        return None
    match = re.findall(r"(\d{1,2}/\d{1,2}/\d{4})", history)
    return pd.to_datetime(match[-1]) if match else None

# Apply functions
df["assigned_date"] = df["history"].apply(extract_date)
df["progressed"] = df["history"].apply(bill_progressed)
df["passage_date"] = df["history"].apply(extract_passage_date)

# Calculate days taken for bills to pass
df["days_to_pass"] = (df["passage_date"] - df["assigned_date"]).dt.days

# Remove rows where assigned_date or days_to_pass is None
df = df.dropna(subset=["assigned_date", "days_to_pass"])

# Aggregate results
effectiveness = df.groupby("sponsor_name").agg(
    bills_proposed=("bill_number", "count"),
    bills_passed=("progressed", "sum"),
    avg_days_per_bill=("days_to_pass", "mean")
).reset_index()

effectiveness["proportion_bills_passed"] = effectiveness["bills_passed"] / effectiveness["bills_proposed"]

effectiveness = effectiveness.sort_values(by="bills_passed", ascending=False).iloc[1:]

# Display top 10 most effective legislators
print(effectiveness.head(10))

             sponsor_name  bills_proposed  bills_passed  avg_days_per_bill  \
384        Tarr, Bruce E.            1257            86         428.980907   
120    Eldridge, James B.             538            70         462.505576   
274     Moore, Richard T.             276            62         492.326087   
388     Tolman, Steven A.             101            61         564.504950   
35     Brewer, Stephen M.             145            60         447.241379   
385     Timilty, James E.             391            54         512.636829   
197     Jones, Bradley H.             514            48         511.060311   
71   Creem, Cynthia Stone             750            44         463.886667   
146    Galvin, William C.             261            39         442.674330   
309      Pacheco, Marc R.             546            35         424.448718   

     proportion_bills_passed  
384                 0.068417  
120                 0.130112  
274                 0.224638  
388              