Permalink
Find file
5a2e91f Jun 30, 2016
44 lines (32 sloc) 1.58 KB
import os
import settings
import pandas as pd
from sklearn import cross_validation
from sklearn.linear_model import LogisticRegression
from sklearn import metrics
def read():
train = pd.read_csv(os.path.join(settings.PROCESSED_DIR, "train.csv"))
return train
def cross_validate(train):
clf = LogisticRegression(random_state=1, class_weight="balanced")
predictors = train.columns.tolist()
predictors = [p for p in predictors if p not in settings.NON_PREDICTORS]
predictions = cross_validation.cross_val_predict(clf, train[predictors], train[settings.TARGET], cv=settings.CV_FOLDS)
return predictions
def compute_error(target, predictions):
return metrics.accuracy_score(target, predictions)
def compute_false_negatives(target, predictions):
df = pd.DataFrame({"target": target, "predictions": predictions})
return df[(df["target"] == 1) & (df["predictions"] == 0)].shape[0] / (df[(df["target"] == 1)].shape[0] + 1)
def compute_false_positives(target, predictions):
df = pd.DataFrame({"target": target, "predictions": predictions})
return df[(df["target"] == 0) & (df["predictions"] == 1)].shape[0] / (df[(df["target"] == 0)].shape[0] + 1)
if __name__ == "__main__":
train = read()
predictions = cross_validate(train)
error = compute_error(train[settings.TARGET], predictions)
fn = compute_false_negatives(train[settings.TARGET], predictions)
fp = compute_false_positives(train[settings.TARGET], predictions)
print("Accuracy Score: {}".format(error))
print("False Negatives: {}".format(fn))
print("False Positives: {}".format(fp))