In [1]:
from ucimlrepo import fetch_ucirepo 
import pandas as pd

congressional_voting_records = fetch_ucirepo(id=105)

X = congressional_voting_records.data.features
y = congressional_voting_records.data.targets
df = congressional_voting_records.data.original

default_table = {'republican':[0,0,0],'democrat':[0,0,0]}
default_df = pd.DataFrame(data=default_table, index=['y', 'n', 'nan'])
variables = congressional_voting_records.variables['name']

In [2]:
def simpleSample(data):
    testing = data.groupby('Class', group_keys=False).sample(frac=0.2)
    learning = data.drop(testing.index)
    return learning, testing

In [None]:
def fold10Sample(data):
    folds = []
    for i in range(0,10):
        folds.append(data.groupby('Class', group_keys=False).sample(frac=1/(10-i)))
        data = data.drop(folds[i].index)
    return folds

In [4]:
def transform(data):

    return data

In [5]:
def creatBayesTables(data):
    bayestables = {k:default_df.copy() for k in variables[1:]}
    #bayestablesClass = {'republican':0, 'democrat':0}
    for category in bayestables:
        for party in {'republican', 'democrat'}:
            bayestables[category][party]['y']=len([row for row in data.loc[data['Class']==party][category] if not pd.isnull(row) and row=='y' ])
            bayestables[category][party]['n']=len([row for row in data.loc[data['Class']==party][category] if not pd.isnull(row) and row=='n' ])
            bayestables[category][party]['nan']=len([row for row in data.loc[data['Class']==party][category] if pd.isnull(row) ])
    #for party in {'republican', 'democrat'}:
    bayestablesClass = {'republican':sum(data['Class'] == 'republican'), 'democrat':sum(data['Class'] == 'democrat')}

    return bayestablesClass,bayestables

In [6]:
categorytuple = {k:v for k,v in zip(X.columns,range(2,18))}

def naiveBayesClassifier(learning, testing):
    total, categorical = creatBayesTables(learning)

    leaning = {'republican': 1, 'democrat': 1}
    for c in categorical:
        for party in leaning:
            el = testing[categorytuple[c]]
            if pd.isnull(el):
                el = 'nan'
            leaning[party] *= categorical[c][party][el]/total[party]
    leaning['democrat'] *= total['democrat']/(total['democrat'] + total['republican'])
    leaning['republican'] *= total['republican']/(total['democrat'] + total['republican'])
    return max(leaning, key=leaning.get)


In [9]:
import numpy as np

#m = input("Mode?")
#m = int(m)
m = 0
if m == 1:
    df = transform(df)
    
data, testing = simpleSample(df)
_ , validate = simpleSample(data)

divisor = len(validate.index)

success = 0
for v in validate.itertuples():
    #print(learning.drop('class', axis=1), learning['class'])
    ys = naiveBayesClassifier(data, v)
    if ys == v[1]:
        success += 1

print("1.Train Set Accuracy:\nAccuracy:", "{:.2%}".format(success/divisor))

print("\n2.10-Fold Cross-Validation Results:")
folds = fold10Sample(data)
successes = [0] * 10
for i in range(0,10):
    divisor = len(folds[i].index)
    for t in folds[i].itertuples():
        ys = naiveBayesClassifier(data, t)
        if ys == t[1]:
            successes[i] += 1
    successes[i] = successes[i]/divisor
    print("Accuracy Fold", i+1, ":", "{:.2%}".format(successes[i]), " size", divisor)

print("\nAverage Accuracy:",  "{:.2%}".format(np.average(successes)))
print("Standard Deviation:",  "{:.2%}".format(np.std(successes)))

success = 0
for t in testing.itertuples():
    #print(learning.drop('class', axis=1), learning['class'])
    ys = naiveBayesClassifier(data, t)
    if ys == t[1]:
        success += 1
divisor = len(testing.index)
print("\n3.Test Set Accuracy:\nAccuracy:", "{:.2%}".format(success/divisor))


1.Train Set Accuracy:
Accuracy: 88.57%

2.10-Fold Cross-Validation Results:
          Class handicapped-infants water-project-cost-sharing  \
0    republican                   n                          y   
1    republican                   n                          y   
2      democrat                 NaN                          y   
3      democrat                   n                          y   
4      democrat                   y                          y   
..          ...                 ...                        ...   
429    democrat                   y                          n   
431    democrat                   n                          n   
432  republican                   n                        NaN   
433  republican                   n                          n   
434  republican                   n                          y   

    adoption-of-the-budget-resolution physician-fee-freeze el-salvador-aid  \
0                                   n                