In [48]:
import pandas as pd
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt
import statsmodels.formula.api as sm
import statsmodels.api as sma
from statsmodels.iolib.summary2 import summary_col

In [12]:
# bring in the speaker specificities
def mergeSpeakerSpecs(df):
    specs = pd.read_csv('./data/hearings_with_specifs.csv', header=0, usecols=['speakerSpec', 'congress', 'speaker', 'filename'])
    specs['speaker'] = [x.lower() for x in specs['speaker'].astype(str)]
    specs['filename'] = specs['filename'].map(lambda x: x.rstrip('.txt'))
    df  = pd.merge(df, specs, on=['filename', 'speaker'], how='inner')
    # get rid of one congress col and fix the remaining congress col
    del df['congress_y']
    df.rename(columns={'congress_x' : 'congress'}, inplace=True)
    return df

In [24]:
# Generate a score by comparing hearings only within a single congress
def withinCongressZscore(df):
    df_z = []
    for i in df['congress'].unique():
        group = df[(df['congress']==i)]
        group['wc_zscore'] = (group['speakerSpec'] - group['speakerSpec'].mean())/group['speakerSpec'].std(ddof=0)
        df_z = df_z + list(group['wc_zscore'])
    df['wc_specZ'] = df_z
    return df

In [25]:
# Generate a variable measuring the strength of the majority party
def genMajStrength(hd):
    for i in range(len(hd)):
        if hd.loc[i, 'chamber'] == 0:
            hd.loc[i, 'majStrength'] = np.absolute(hd.loc[i, 'hdems'] - hd.loc[i, 'hreps'])
        elif hd.loc[i, 'chamber'] == 1:
            hd.loc[i, 'majStrength'] = np.absolute(hd.loc[i, 'sdems'] - hd.loc[i, 'hreps'])
        else:
            hd.loc[i, 'majStrength'] == 0
        print i
    return hd

In [26]:
# bring in the working datafame (hearing data --> "hd")
hd = pd.read_csv('./data/working_df.csv', header=0)
# create the measure of ideological distance between the chair and the speaker
# Note: the "ag_ideal" is the agency ideal point as a measure of the speaker's ideal point
hd['ideoDiff'] = np.absolute(hd['ag_ideal'] - hd['dw1'])
# zscore across the entire sample (between congress -- "bc")
hd['bc_specZ'] = stats.zscore(hd['speakerSpec'])
# create a divided gov variable (chair of different party than president)
# expectation: specificity should go up under div gov because of the ally principle
hd['divgov'] = np.absolute(hd['party_x']/100 - hd['pres_party'])
hd.rename(columns={'committee seniority': 'seniority'}, inplace=True)
hd['wc_specZ'] = withinCongressZscore(hd)

# final trim of the dataset to get rid of remaining columns that do not contribute
hd = hd.loc[:,hd.count() > 0]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


In [49]:
hd['wc_specZ'] = withinCongressZscore(hd)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


In [51]:
plt.figure();
hd['congress'].plot(kind='hist')
plt.show()

In [53]:
type(hd['congress'][1])

numpy.int64