In [16]:
import json
import pandas as pd
from sklearn.linear_model import LinearRegression

In [15]:
# Load data from files
with open('../data/all_windows_classified.json', 'r') as f:
    sentiment_data = json.load(f)

with open('../data/bill_window_matching.json', 'r') as f:
    bill_windows = json.load(f)

with open('../data/bill_data.json', 'r') as f:
    bill_data = json.load(f)

In [17]:
# Aggregate sentiment scores for each bill
bill_sentiment_data = {}
for bill_key in bill_windows:
    ordinality = bill_data[bill_key]['last_action_ordinal']

    bill_sentiment_data[bill_key] = {'positive': [], 'negative': [], 'neutral': [], 'ordinality': ordinality}

    for window_key in bill_windows[bill_key]:
        for sentiment_type in ['positive', 'negative', 'neutral']:
            bill_sentiment_data[bill_key][sentiment_type].append(sentiment_data[window_key]['sentiment'][sentiment_type])

    # Calculate average sentiment score for each sentiment type
    for sentiment_type in ['positive', 'negative', 'neutral']:
        if len(bill_sentiment_data[bill_key][sentiment_type]) > 0:
            bill_sentiment_data[bill_key][sentiment_type] = sum(bill_sentiment_data[bill_key][sentiment_type]) / len(bill_sentiment_data[bill_key][sentiment_type])
        else:
            bill_sentiment_data[bill_key][sentiment_type] = 0.0


# Create dataframe for regression
df = pd.DataFrame.from_dict(bill_sentiment_data, orient='index')
X = df[['positive', 'negative', 'neutral']]
y = df['ordinality']

# Fit linear regression
reg = LinearRegression().fit(X, y)

# Print regression coefficients
print('Intercept:', reg.intercept_)
print('Positive coefficient:', reg.coef_[0])
print('Negative coefficient:', reg.coef_[1])
print('Neutral coefficient:', reg.coef_[2])

Intercept: 2.859513910778239
Positive coefficient: 16.354801308680088
Negative coefficient: 19.353096637613696
Neutral coefficient: 8.472316412017243
