In [5]:
from contrans import contrans
import numpy as np
import pandas as pd
import dotenv
import os
import json
import requests 
import psycopg
import sqlite3
import plotly.express as px
from sumy.parsers.plaintext import PlaintextParser
from sumy.nlp.tokenizers import Tokenizer
from sumy.summarizers.lsa import LsaSummarizer
from sqlalchemy import create_engine    
dotenv.load_dotenv()
congresskey = os.getenv('congresskey')
postgrespassword = os.getenv('POSTGRES_PASSWORD')

In [6]:
ct = contrans()

In [7]:
bills = ct.connect_to_mongo(host='localhost')

In [8]:
bills.count_documents({})

5285

In [9]:
billdf = ct.query_mongo(bills, {}, {'bill.sponsors': 1,
                            'bill_text': 1,
                           '_id': 0})

In [10]:
billdf

Unnamed: 0,bill,bill_text
0,"{'sponsors': [{'bioguideId': 'N000188', 'distr...",\n[Congressional Bills 118th Congress]\n[From ...
1,"{'sponsors': [{'bioguideId': 'N000188', 'distr...",\n[Congressional Bills 118th Congress]\n[From ...
2,"{'sponsors': [{'bioguideId': 'N000188', 'distr...",\n[Congressional Bills 118th Congress]\n[From ...
3,"{'sponsors': [{'bioguideId': 'N000188', 'distr...",\n[Congressional Bills 118th Congress]\n[From ...
4,"{'sponsors': [{'bioguideId': 'N000188', 'distr...",\n[Congressional Bills 118th Congress]\n[From ...
...,...,...
5280,"{'sponsors': [{'bioguideId': 'J000032', 'distr...",\n[Congressional Bills 118th Congress]\n[From ...
5281,"{'sponsors': [{'bioguideId': 'J000032', 'distr...",\n[Congressional Bills 118th Congress]\n[From ...
5282,"{'sponsors': [{'bioguideId': 'J000032', 'distr...",\n[Congressional Bills 118th Congress]\n[From ...
5283,"{'sponsors': [{'bioguideId': 'J000032', 'distr...",\n[Congressional Bills 118th Congress]\n[From ...


In [11]:
billdf['sponsor'] = [x['sponsors'][0]['bioguideId'] for x in billdf['bill']]
billdf.drop('bill', axis=1, inplace=True)
billdf

Unnamed: 0,bill_text,sponsor
0,\n[Congressional Bills 118th Congress]\n[From ...,N000188
1,\n[Congressional Bills 118th Congress]\n[From ...,N000188
2,\n[Congressional Bills 118th Congress]\n[From ...,N000188
3,\n[Congressional Bills 118th Congress]\n[From ...,N000188
4,\n[Congressional Bills 118th Congress]\n[From ...,N000188
...,...,...
5280,\n[Congressional Bills 118th Congress]\n[From ...,J000032
5281,\n[Congressional Bills 118th Congress]\n[From ...,J000032
5282,\n[Congressional Bills 118th Congress]\n[From ...,J000032
5283,\n[Congressional Bills 118th Congress]\n[From ...,J000032


In [13]:
billdf = billdf.groupby(['sponsor'])['bill_text'].apply(' '.join).reset_index()

In [15]:
billdf = billdf.set_index('sponsor')

billdf

Unnamed: 0_level_0,bill_text
sponsor,Unnamed: 1_level_1
B001230,\n[Congressional Bills 118th Congress]\n[From ...
B001236,\n[Congressional Bills 118th Congress]\n[From ...
B001261,\n[Congressional Bills 118th Congress]\n[From ...
B001267,\n[Congressional Bills 118th Congress]\n[From ...
B001288,\n[Congressional Bills 118th Congress]\n[From ...
...,...
W000800,\n[Congressional Bills 118th Congress]\n[From ...
W000802,\n[Congressional Bills 118th Congress]\n[From ...
W000805,\n[Congressional Bills 118th Congress]\n[From ...
W000817,\n[Congressional Bills 118th Congress]\n[From ...


In [16]:
from sklearn.feature_extraction.text import TfidfTransformer, TfidfVectorizer
tfIdfVectorizer= TfidfVectorizer(stop_words='english', 
                                 max_df = .8, 
                                 ngram_range = (1,3))
tfIdf = tfIdfVectorizer.fit_transform(billdf['bill_text'])

In [18]:
charwords = pd.DataFrame()
for t in range(0,tfIdf.shape[0]):
    print(f'Now working on {t} of {tfIdf.shape[0]}', end="\r")
    df = pd.DataFrame(tfIdf[t].T.todense(), index=tfIdfVectorizer.get_feature_names_out(), columns=["TF-IDF"])
    df = df.sort_values('TF-IDF', ascending=False).reset_index().head(10)
    df = df.rename({'index':'word', 'TF-IDF':'tf_idf'}, axis=1)
    df['sponsor_id'] = billdf.index[t]
    charwords = pd.concat([charwords, df], ignore_index=True)

Now working on 94 of 95

In [19]:
charwords

Unnamed: 0,word,tf_idf,sponsor_id
0,apprenticeship,0.314454,B001230
1,phs act,0.251474,B001230
2,phs,0.215691,B001230
3,000 shall,0.112240,B001230
4,state apprenticeship,0.106539,B001230
...,...,...,...
945,share agreement,0.152781,Y000064
946,educational isa,0.133392,Y000064
947,additional district,0.094663,Y000064
948,additional district judge,0.075636,Y000064


In [3]:
ct.plot_ideology('N000188')

In [4]:
server, engine = ct.connect_to_postgres(ct.POSTGRES_PASSWORD)
myquery = '''
SELECT bioguideid, district, name, partyname, state, nominate_dim1
FROM members
'''
ideo = pd.read_sql_query(myquery, con=engine)
ideo

Unnamed: 0,bioguideid,district,name,partyname,state,nominate_dim1
0,N000188,01,"Norcross, Donald",Democratic,NJ,-0.419
1,Y000064,S,"Young, Todd",Republican,IN,
2,W000802,S,"Whitehouse, Sheldon",Democratic,RI,
3,W000800,S,"Welch, Peter",Democratic,VT,
4,W000779,S,"Wyden, Ron",Democratic,OR,
...,...,...,...,...,...,...
534,C001087,01,"Crawford, Eric A. ""Rick""",Republican,AR,0.460
535,S001185,07,"Sewell, Terri A.",Democratic,AL,-0.402
536,A000055,04,"Aderholt, Robert B.",Republican,AL,0.405
537,R000575,03,"Rogers, Mike D.",Republican,AL,0.379
