#        Testing the Influence of Penalty Term Modification on Support Vector Classification

    Step 1: imports and magic commands
   

In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from my_measures import BinaryClassificationPerformance
from sklearn.feature_extraction.text import HashingVectorizer
from sklearn.feature_extraction.text import TfidfTransformer
from scipy.sparse import csr_matrix, hstack
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.pipeline import Pipeline
%matplotlib inline

        Step 2: Binary Classifier

In [4]:
help(BinaryClassificationPerformance)

Help on class BinaryClassificationPerformance in module my_measures:

class BinaryClassificationPerformance(builtins.object)
 |  BinaryClassificationPerformance(predictions, labels, desc, probabilities=None)
 |  
 |  Performance measures to evaluate the fit of a binary classification model, v1.02
 |  
 |  Methods defined here:
 |  
 |  __init__(self, predictions, labels, desc, probabilities=None)
 |      Initialize attributes: predictions-vector of predicted values for Y, labels-vector of labels for Y
 |  
 |  compute_measures(self)
 |      Compute performance measures defined by Flach p. 57
 |  
 |  img_indices(self)
 |      Get the indices of true and false positives to be able to locate the corresponding images in a list of image names
 |  
 |  ----------------------------------------------------------------------
 |  Data descriptors defined here:
 |  
 |  __dict__
 |      dictionary for instance variables (if defined)
 |  
 |  __weakref__
 |      list of weak references to the obj

        Step 3: Parametric Specification for Feature Extraction

In [5]:
# function that takes raw data and completes all preprocessing required before model fits
def process_raw_data(fn, my_random_seed, test=False):
    # read and summarize data
    movie_data = pd.read_csv(fn, sep='\t')
    print("movie_data is:", type(movie_data))
    print("movie_data has", movie_data.shape[0], "rows and", movie_data.shape[1], "columns", "\n")
    print("the data types for each of the columns in movie_data:")
    print(movie_data.dtypes, "\n")
    print("the first 10 rows in movie_data:")
    print(movie_data.head(5))
    if (not test):
        print("The rate of 'good' movie reviews in the dataset: ")
        print(movie_data['sentiment'].mean())

    # vectorize Bag of Words from review text; as sparse matrix
    if (not test): # fit_transform()
        hv = HashingVectorizer(n_features=2 ** 17, alternate_sign=False)
        X_hv = hv.fit_transform(movie_data.review)
        fitted_transformations.append(hv)
        print("Shape of HashingVectorizer X:")
        print(X_hv.shape)
    else: # transform() 
        X_hv = fitted_transformations[0].transform(movie_data.review)
        print("Shape of HashingVectorizer X:")
        print(X_hv.shape)
    
    # http://scikit-learn.org/stable/modules/generated/sklearn.feature_extraction.text.TfidfTransformer.html
    if (not test):
        transformer = TfidfTransformer()
        X_tfidf = transformer.fit_transform(X_hv)
        fitted_transformations.append(transformer)
    else:
        X_tfidf = fitted_transformations[1].transform(X_hv)
    
    # create additional quantitative features
    # features from Amazon.csv to add to feature set
    movie_data['word_count'] = movie_data['review'].str.split(' ').str.len()
    movie_data['punc_count'] = movie_data['review'].str.count("\.")

    X_quant_features = movie_data[["word_count", "punc_count"]]
    print("Look at a few rows of the new quantitative features: ")
    print(X_quant_features.head(10))
    
    # Combine all quantitative features into a single sparse matrix
    X_quant_features_csr = csr_matrix(X_quant_features)
    X_combined = hstack([X_tfidf, X_quant_features_csr])
    X_matrix = csr_matrix(X_combined) # convert to sparse matrix
    print("Size of combined bag of words and new quantitative variables matrix:")
    print(X_matrix.shape)
    
    # Create `X`, scaled matrix of features
    # feature scaling
    if (not test):
        sc = StandardScaler(with_mean=False)
        X = sc.fit_transform(X_matrix)
        fitted_transformations.append(sc)
        print(X.shape)
        y = movie_data['sentiment']
    else:
        X = fitted_transformations[2].transform(X_matrix)
        print(X.shape)
    
    # Create Training and Test Sets
    # enter an integer for the random_state parameter; any integer will work
    if (test):
        X_submission_test = X
        print("Shape of X_test for submission:")
        print(X_submission_test.shape)
        print('SUCCESS!')
        return(movie_data, X_submission_test)
    else: 
        X_train, X_test, y_train, y_test, X_raw_train, X_raw_test = train_test_split(X, y, movie_data, test_size=0.2, random_state=my_random_seed)
        print("Shape of X_train and X_test:")
        print(X_train.shape)
        print(X_test.shape)
        print("Shape of y_train and y_test:")
        print(y_train.shape)
        print(y_test.shape)
        print("Shape of X_raw_train and X_raw_test:")
        print(X_raw_train.shape)
        print(X_raw_test.shape)
        print('SUCCESS!')
        return(X_train, X_test, y_train, y_test, X_raw_train, X_raw_test)

        Step 4: Training and Test Datasets for Movie Reviews (we set the value of our random seed to 20)


In [6]:
# create an empty list to store any use of fit_transform() to transform() later
# it is a global list to store model and feature extraction fits
fitted_transformations = []

# CHANGE FILE PATH and my_random_seed number (any integer other than 74 will do): 
X_train, X_test, y_train, y_test, X_raw_train, X_raw_test = process_raw_data(fn='/Users/jamesboyd/Desktop/moviereviews_train.tsv', my_random_seed=20)

print("Number of fits stored in `fitted_transformations` list: ")
print(len(fitted_transformations))

movie_data is: <class 'pandas.core.frame.DataFrame'>
movie_data has 25000 rows and 3 columns 

the data types for each of the columns in movie_data:
id           object
sentiment     int64
review       object
dtype: object 

the first 10 rows in movie_data:
       id  sentiment                                             review
0  5814_8          1  With all this stuff going down at the moment w...
1  2381_9          1  \The Classic War of the Worlds\" by Timothy Hi...
2  7759_3          0  The film starts with a manager (Nicholas Bell)...
3  3630_4          0  It must be assumed that those who praised this...
4  9495_8          1  Superbly trashy and wondrously unpretentious 8...
The rate of 'good' movie reviews in the dataset: 
0.5
Shape of HashingVectorizer X:
(25000, 131072)
Look at a few rows of the new quantitative features: 
   word_count  punc_count
0         433          20
1         158          16
2         378          20
3         379           8
4         367           9


    Next, we perform 28 trials with different penalty terms "C", ranging from 0.00000000001 to 100. We sequence our trials by penalty value magnitude, beginning with 0.00000000001. Our trials assess the performance of our Support Vector Classifier for both the train and test data. 

In [25]:
from sklearn import svm
svc = svm.LinearSVC(C=0.00000000001)
svc.fit(X_train, y_train)

svc_performance_train = BinaryClassificationPerformance(svc.predict(X_train), y_train, 'svc_train')
svc_performance_train.compute_measures()
print(svc_performance_train.performance_measures)

{'Pos': 10021, 'Neg': 9979, 'TP': 8685, 'TN': 9765, 'FP': 214, 'FN': 1336, 'Accuracy': 0.9225, 'Precision': 0.9759523541971008, 'Recall': 0.8666799720586768, 'desc': 'svc_train'}


In [24]:
svc_test = BinaryClassificationPerformance(svc.predict(X_test), y_test, 'svc_test')
svc_test.compute_measures()
print(svc_test.performance_measures)

{'Pos': 2479, 'Neg': 2521, 'TP': 1889, 'TN': 2332, 'FP': 189, 'FN': 590, 'Accuracy': 0.8442, 'Precision': 0.9090471607314725, 'Recall': 0.7620008067769262, 'desc': 'svc_test'}


In [26]:
from sklearn import svm
svc = svm.LinearSVC(C=0.0000000001)
svc.fit(X_train, y_train)

svc_performance_train = BinaryClassificationPerformance(svc.predict(X_train), y_train, 'svc_train')
svc_performance_train.compute_measures()
print(svc_performance_train.performance_measures)

{'Pos': 10021, 'Neg': 9979, 'TP': 8685, 'TN': 9764, 'FP': 215, 'FN': 1336, 'Accuracy': 0.92245, 'Precision': 0.9758426966292135, 'Recall': 0.8666799720586768, 'desc': 'svc_train'}


In [27]:
svc_test = BinaryClassificationPerformance(svc.predict(X_test), y_test, 'svc_test')
svc_test.compute_measures()
print(svc_test.performance_measures)

{'Pos': 2479, 'Neg': 2521, 'TP': 1889, 'TN': 2332, 'FP': 189, 'FN': 590, 'Accuracy': 0.8442, 'Precision': 0.9090471607314725, 'Recall': 0.7620008067769262, 'desc': 'svc_test'}


In [34]:
from sklearn import svm
svc = svm.LinearSVC(C=0.000000001)
svc.fit(X_train, y_train)

svc_performance_train = BinaryClassificationPerformance(svc.predict(X_train), y_train, 'svc_train')
svc_performance_train.compute_measures()
print(svc_performance_train.performance_measures)

{'Pos': 10021, 'Neg': 9979, 'TP': 8686, 'TN': 9764, 'FP': 215, 'FN': 1335, 'Accuracy': 0.9225, 'Precision': 0.9758454106280193, 'Recall': 0.8667797624987527, 'desc': 'svc_train'}


In [36]:
svc_test = BinaryClassificationPerformance(svc.predict(X_test), y_test, 'svc_test')
svc_test.compute_measures()
print(svc_test.performance_measures)

{'Pos': 2479, 'Neg': 2521, 'TP': 1892, 'TN': 2332, 'FP': 189, 'FN': 587, 'Accuracy': 0.8448, 'Precision': 0.9091782796732341, 'Recall': 0.763210972166196, 'desc': 'svc_test'}


In [37]:
from sklearn import svm
svc = svm.LinearSVC(C=0.00000001)
svc.fit(X_train, y_train)

svc_performance_train = BinaryClassificationPerformance(svc.predict(X_train), y_train, 'svc_train')
svc_performance_train.compute_measures()
print(svc_performance_train.performance_measures)

{'Pos': 10021, 'Neg': 9979, 'TP': 8718, 'TN': 9755, 'FP': 224, 'FN': 1303, 'Accuracy': 0.92365, 'Precision': 0.9749496756877656, 'Recall': 0.8699730565811795, 'desc': 'svc_train'}


In [38]:
svc_test = BinaryClassificationPerformance(svc.predict(X_test), y_test, 'svc_test')
svc_test.compute_measures()
print(svc_test.performance_measures)

{'Pos': 2479, 'Neg': 2521, 'TP': 1908, 'TN': 2324, 'FP': 197, 'FN': 571, 'Accuracy': 0.8464, 'Precision': 0.9064133016627078, 'Recall': 0.7696651875756353, 'desc': 'svc_test'}


In [55]:
from sklearn import svm
svc = svm.LinearSVC(C=0.00000003)
svc.fit(X_train, y_train)

svc_performance_train = BinaryClassificationPerformance(svc.predict(X_train), y_train, 'svc_train')
svc_performance_train.compute_measures()
print(svc_performance_train.performance_measures)

{'Pos': 10021, 'Neg': 9979, 'TP': 8793, 'TN': 9738, 'FP': 241, 'FN': 1228, 'Accuracy': 0.92655, 'Precision': 0.9733230019924729, 'Recall': 0.8774573395868676, 'desc': 'svc_train'}


In [56]:
svc_test = BinaryClassificationPerformance(svc.predict(X_test), y_test, 'svc_test')
svc_test.compute_measures()
print(svc_test.performance_measures)

{'Pos': 2479, 'Neg': 2521, 'TP': 1925, 'TN': 2312, 'FP': 209, 'FN': 554, 'Accuracy': 0.8474, 'Precision': 0.9020618556701031, 'Recall': 0.7765227914481646, 'desc': 'svc_test'}


In [61]:
from sklearn import svm
svc = svm.LinearSVC(C=0.00000005)
svc.fit(X_train, y_train)

svc_performance_train = BinaryClassificationPerformance(svc.predict(X_train), y_train, 'svc_train')
svc_performance_train.compute_measures()
print(svc_performance_train.performance_measures)

{'Pos': 10021, 'Neg': 9979, 'TP': 8856, 'TN': 9731, 'FP': 248, 'FN': 1165, 'Accuracy': 0.92935, 'Precision': 0.9727592267135325, 'Recall': 0.8837441373116456, 'desc': 'svc_train'}


In [62]:
svc_test = BinaryClassificationPerformance(svc.predict(X_test), y_test, 'svc_test')
svc_test.compute_measures()
print(svc_test.performance_measures)

{'Pos': 2479, 'Neg': 2521, 'TP': 1945, 'TN': 2298, 'FP': 223, 'FN': 534, 'Accuracy': 0.8486, 'Precision': 0.897140221402214, 'Recall': 0.7845905607099637, 'desc': 'svc_test'}


In [67]:
from sklearn import svm
svc = svm.LinearSVC(C=0.00000006)
svc.fit(X_train, y_train)

svc_performance_train = BinaryClassificationPerformance(svc.predict(X_train), y_train, 'svc_train')
svc_performance_train.compute_measures()
print(svc_performance_train.performance_measures)

{'Pos': 10021, 'Neg': 9979, 'TP': 8878, 'TN': 9722, 'FP': 257, 'FN': 1143, 'Accuracy': 0.93, 'Precision': 0.9718664477285167, 'Recall': 0.885939526993314, 'desc': 'svc_train'}


In [68]:
svc_test = BinaryClassificationPerformance(svc.predict(X_test), y_test, 'svc_test')
svc_test.compute_measures()
print(svc_test.performance_measures)

{'Pos': 2479, 'Neg': 2521, 'TP': 1952, 'TN': 2295, 'FP': 226, 'FN': 527, 'Accuracy': 0.8494, 'Precision': 0.8962350780532599, 'Recall': 0.7874142799515934, 'desc': 'svc_test'}


In [59]:
from sklearn import svm
svc = svm.LinearSVC(C=0.00000007)
svc.fit(X_train, y_train)

svc_performance_train = BinaryClassificationPerformance(svc.predict(X_train), y_train, 'svc_train')
svc_performance_train.compute_measures()
print(svc_performance_train.performance_measures)

{'Pos': 10021, 'Neg': 9979, 'TP': 8904, 'TN': 9719, 'FP': 260, 'FN': 1117, 'Accuracy': 0.93115, 'Precision': 0.9716281099956351, 'Recall': 0.8885340784352859, 'desc': 'svc_train'}


In [60]:
svc_test = BinaryClassificationPerformance(svc.predict(X_test), y_test, 'svc_test')
svc_test.compute_measures()
print(svc_test.performance_measures)

{'Pos': 2479, 'Neg': 2521, 'TP': 1966, 'TN': 2291, 'FP': 230, 'FN': 513, 'Accuracy': 0.8514, 'Precision': 0.895264116575592, 'Recall': 0.7930617184348527, 'desc': 'svc_test'}


In [39]:
from sklearn import svm
svc = svm.LinearSVC(C=0.0000001)
svc.fit(X_train, y_train)

svc_performance_train = BinaryClassificationPerformance(svc.predict(X_train), y_train, 'svc_train')
svc_performance_train.compute_measures()
print(svc_performance_train.performance_measures)

{'Pos': 10021, 'Neg': 9979, 'TP': 8986, 'TN': 9715, 'FP': 264, 'FN': 1035, 'Accuracy': 0.93505, 'Precision': 0.9714594594594594, 'Recall': 0.8967168945215048, 'desc': 'svc_train'}


In [40]:
svc_test = BinaryClassificationPerformance(svc.predict(X_test), y_test, 'svc_test')
svc_test.compute_measures()
print(svc_test.performance_measures)

{'Pos': 2479, 'Neg': 2521, 'TP': 1977, 'TN': 2276, 'FP': 245, 'FN': 502, 'Accuracy': 0.8506, 'Precision': 0.8897389738973898, 'Recall': 0.7974989915288423, 'desc': 'svc_test'}


In [65]:
from sklearn import svm
svc = svm.LinearSVC(C=0.0000003)
svc.fit(X_train, y_train)

svc_performance_train = BinaryClassificationPerformance(svc.predict(X_train), y_train, 'svc_train')
svc_performance_train.compute_measures()
print(svc_performance_train.performance_measures)

{'Pos': 10021, 'Neg': 9979, 'TP': 9225, 'TN': 9699, 'FP': 280, 'FN': 796, 'Accuracy': 0.9462, 'Precision': 0.970541820094687, 'Recall': 0.9205668096996308, 'desc': 'svc_train'}


In [66]:
svc_test = BinaryClassificationPerformance(svc.predict(X_test), y_test, 'svc_test')
svc_test.compute_measures()
print(svc_test.performance_measures)

{'Pos': 2479, 'Neg': 2521, 'TP': 2058, 'TN': 2235, 'FP': 286, 'FN': 421, 'Accuracy': 0.8586, 'Precision': 0.8779863481228669, 'Recall': 0.8301734570391287, 'desc': 'svc_test'}


In [63]:
from sklearn import svm
svc = svm.LinearSVC(C=0.0000005)
svc.fit(X_train, y_train)

svc_performance_train = BinaryClassificationPerformance(svc.predict(X_train), y_train, 'svc_train')
svc_performance_train.compute_measures()
print(svc_performance_train.performance_measures)

{'Pos': 10021, 'Neg': 9979, 'TP': 9359, 'TN': 9711, 'FP': 268, 'FN': 662, 'Accuracy': 0.9535, 'Precision': 0.972161628752467, 'Recall': 0.9339387286697934, 'desc': 'svc_train'}


In [64]:
svc_test = BinaryClassificationPerformance(svc.predict(X_test), y_test, 'svc_test')
svc_test.compute_measures()
print(svc_test.performance_measures)

{'Pos': 2479, 'Neg': 2521, 'TP': 2095, 'TN': 2216, 'FP': 305, 'FN': 384, 'Accuracy': 0.8622, 'Precision': 0.8729166666666667, 'Recall': 0.845098830173457, 'desc': 'svc_test'}


In [71]:
from sklearn import svm
svc = svm.LinearSVC(C=0.0000006)
svc.fit(X_train, y_train)

svc_performance_train = BinaryClassificationPerformance(svc.predict(X_train), y_train, 'svc_train')
svc_performance_train.compute_measures()
print(svc_performance_train.performance_measures)

{'Pos': 10021, 'Neg': 9979, 'TP': 9408, 'TN': 9724, 'FP': 255, 'FN': 613, 'Accuracy': 0.9566, 'Precision': 0.9736106799130705, 'Recall': 0.9388284602335096, 'desc': 'svc_train'}


In [72]:
svc_test = BinaryClassificationPerformance(svc.predict(X_test), y_test, 'svc_test')
svc_test.compute_measures()
print(svc_test.performance_measures)

{'Pos': 2479, 'Neg': 2521, 'TP': 2109, 'TN': 2212, 'FP': 309, 'FN': 370, 'Accuracy': 0.8642, 'Precision': 0.8722084367245657, 'Recall': 0.8507462686567164, 'desc': 'svc_test'}


In [69]:
from sklearn import svm
svc = svm.LinearSVC(C=0.0000007)
svc.fit(X_train, y_train)

svc_performance_train = BinaryClassificationPerformance(svc.predict(X_train), y_train, 'svc_train')
svc_performance_train.compute_measures()
print(svc_performance_train.performance_measures)

{'Pos': 10021, 'Neg': 9979, 'TP': 9447, 'TN': 9730, 'FP': 249, 'FN': 574, 'Accuracy': 0.95885, 'Precision': 0.974319306930693, 'Recall': 0.9427202873964674, 'desc': 'svc_train'}


In [70]:
svc_test = BinaryClassificationPerformance(svc.predict(X_test), y_test, 'svc_test')
svc_test.compute_measures()
print(svc_test.performance_measures)

{'Pos': 2479, 'Neg': 2521, 'TP': 2112, 'TN': 2210, 'FP': 311, 'FN': 367, 'Accuracy': 0.8644, 'Precision': 0.8716467189434586, 'Recall': 0.8519564340459863, 'desc': 'svc_test'}


In [73]:
from sklearn import svm
svc = svm.LinearSVC(C=0.0000008)
svc.fit(X_train, y_train)

svc_performance_train = BinaryClassificationPerformance(svc.predict(X_train), y_train, 'svc_train')
svc_performance_train.compute_measures()
print(svc_performance_train.performance_measures)

{'Pos': 10021, 'Neg': 9979, 'TP': 9473, 'TN': 9737, 'FP': 242, 'FN': 548, 'Accuracy': 0.9605, 'Precision': 0.9750900669068451, 'Recall': 0.9453148388384393, 'desc': 'svc_train'}


In [74]:
svc_test = BinaryClassificationPerformance(svc.predict(X_test), y_test, 'svc_test')
svc_test.compute_measures()
print(svc_test.performance_measures)

{'Pos': 2479, 'Neg': 2521, 'TP': 2125, 'TN': 2208, 'FP': 313, 'FN': 354, 'Accuracy': 0.8666, 'Precision': 0.8716160787530763, 'Recall': 0.8572004840661557, 'desc': 'svc_test'}


In [41]:
from sklearn import svm
svc = svm.LinearSVC(C=0.000001)
svc.fit(X_train, y_train)

svc_performance_train = BinaryClassificationPerformance(svc.predict(X_train), y_train, 'svc_train')
svc_performance_train.compute_measures()
print(svc_performance_train.performance_measures)

{'Pos': 10021, 'Neg': 9979, 'TP': 9542, 'TN': 9752, 'FP': 227, 'FN': 479, 'Accuracy': 0.9647, 'Precision': 0.9767632306274951, 'Recall': 0.9522003792036723, 'desc': 'svc_train'}


In [42]:
svc_test = BinaryClassificationPerformance(svc.predict(X_test), y_test, 'svc_test')
svc_test.compute_measures()
print(svc_test.performance_measures)

{'Pos': 2479, 'Neg': 2521, 'TP': 2139, 'TN': 2208, 'FP': 313, 'FN': 340, 'Accuracy': 0.8694, 'Precision': 0.8723491027732463, 'Recall': 0.8628479225494151, 'desc': 'svc_test'}


In [75]:
from sklearn import svm
svc = svm.LinearSVC(C=0.000002)
svc.fit(X_train, y_train)

svc_performance_train = BinaryClassificationPerformance(svc.predict(X_train), y_train, 'svc_train')
svc_performance_train.compute_measures()
print(svc_performance_train.performance_measures)

{'Pos': 10021, 'Neg': 9979, 'TP': 9702, 'TN': 9822, 'FP': 157, 'FN': 319, 'Accuracy': 0.9762, 'Precision': 0.9840754640430064, 'Recall': 0.9681668496158068, 'desc': 'svc_train'}


In [76]:
svc_test = BinaryClassificationPerformance(svc.predict(X_test), y_test, 'svc_test')
svc_test.compute_measures()
print(svc_test.performance_measures)

{'Pos': 2479, 'Neg': 2521, 'TP': 2186, 'TN': 2202, 'FP': 319, 'FN': 293, 'Accuracy': 0.8776, 'Precision': 0.8726546906187624, 'Recall': 0.881807180314643, 'desc': 'svc_test'}


In [53]:
from sklearn import svm
svc = svm.LinearSVC(C=0.000003)
svc.fit(X_train, y_train)

svc_performance_train = BinaryClassificationPerformance(svc.predict(X_train), y_train, 'svc_train')
svc_performance_train.compute_measures()
print(svc_performance_train.performance_measures)

{'Pos': 10021, 'Neg': 9979, 'TP': 9787, 'TN': 9858, 'FP': 121, 'FN': 234, 'Accuracy': 0.98225, 'Precision': 0.9877876463463867, 'Recall': 0.9766490370222533, 'desc': 'svc_train'}


In [54]:
svc_test = BinaryClassificationPerformance(svc.predict(X_test), y_test, 'svc_test')
svc_test.compute_measures()
print(svc_test.performance_measures)

{'Pos': 2479, 'Neg': 2521, 'TP': 2199, 'TN': 2212, 'FP': 309, 'FN': 280, 'Accuracy': 0.8822, 'Precision': 0.8767942583732058, 'Recall': 0.8870512303348125, 'desc': 'svc_test'}


In [79]:
from sklearn import svm
svc = svm.LinearSVC(C=0.000005)
svc.fit(X_train, y_train)

svc_performance_train = BinaryClassificationPerformance(svc.predict(X_train), y_train, 'svc_train')
svc_performance_train.compute_measures()
print(svc_performance_train.performance_measures)

{'Pos': 10021, 'Neg': 9979, 'TP': 9891, 'TN': 9898, 'FP': 81, 'FN': 130, 'Accuracy': 0.98945, 'Precision': 0.9918772563176895, 'Recall': 0.9870272427901408, 'desc': 'svc_train'}


In [80]:
svc_test = BinaryClassificationPerformance(svc.predict(X_test), y_test, 'svc_test')
svc_test.compute_measures()
print(svc_test.performance_measures)

{'Pos': 2479, 'Neg': 2521, 'TP': 2219, 'TN': 2207, 'FP': 314, 'FN': 260, 'Accuracy': 0.8852, 'Precision': 0.8760363205684959, 'Recall': 0.8951189995966116, 'desc': 'svc_test'}


In [77]:
from sklearn import svm
svc = svm.LinearSVC(C=0.000007)
svc.fit(X_train, y_train)

svc_performance_train = BinaryClassificationPerformance(svc.predict(X_train), y_train, 'svc_train')
svc_performance_train.compute_measures()
print(svc_performance_train.performance_measures)


{'Pos': 10021, 'Neg': 9979, 'TP': 9927, 'TN': 9917, 'FP': 62, 'FN': 94, 'Accuracy': 0.9922, 'Precision': 0.9937931724897388, 'Recall': 0.990619698632871, 'desc': 'svc_train'}


In [78]:
svc_test = BinaryClassificationPerformance(svc.predict(X_test), y_test, 'svc_test')
svc_test.compute_measures()
print(svc_test.performance_measures)

{'Pos': 2479, 'Neg': 2521, 'TP': 2225, 'TN': 2214, 'FP': 307, 'FN': 254, 'Accuracy': 0.8878, 'Precision': 0.8787519747235387, 'Recall': 0.8975393303751513, 'desc': 'svc_test'}


In [43]:
from sklearn import svm
svc = svm.LinearSVC(C=0.00001)
svc.fit(X_train, y_train)

svc_performance_train = BinaryClassificationPerformance(svc.predict(X_train), y_train, 'svc_train')
svc_performance_train.compute_measures()
print(svc_performance_train.performance_measures)

{'Pos': 10021, 'Neg': 9979, 'TP': 9957, 'TN': 9942, 'FP': 37, 'FN': 64, 'Accuracy': 0.99495, 'Precision': 0.9962977786672004, 'Recall': 0.9936134118351462, 'desc': 'svc_train'}


In [44]:
svc_test = BinaryClassificationPerformance(svc.predict(X_test), y_test, 'svc_test')
svc_test.compute_measures()
print(svc_test.performance_measures)

{'Pos': 2479, 'Neg': 2521, 'TP': 2218, 'TN': 2216, 'FP': 305, 'FN': 261, 'Accuracy': 0.8868, 'Precision': 0.8791121680539041, 'Recall': 0.8947156111335216, 'desc': 'svc_test'}


In [45]:
from sklearn import svm
svc = svm.LinearSVC(C=0.0001)
svc.fit(X_train, y_train)

svc_performance_train = BinaryClassificationPerformance(svc.predict(X_train), y_train, 'svc_train')
svc_performance_train.compute_measures()
print(svc_performance_train.performance_measures)

{'Pos': 10021, 'Neg': 9979, 'TP': 10021, 'TN': 9979, 'FP': 0, 'FN': 0, 'Accuracy': 1.0, 'Precision': 1.0, 'Recall': 1.0, 'desc': 'svc_train'}


In [46]:
svc_test = BinaryClassificationPerformance(svc.predict(X_test), y_test, 'svc_test')
svc_test.compute_measures()
print(svc_test.performance_measures)

{'Pos': 2479, 'Neg': 2521, 'TP': 2184, 'TN': 2164, 'FP': 357, 'FN': 295, 'Accuracy': 0.8696, 'Precision': 0.859504132231405, 'Recall': 0.8810004033884631, 'desc': 'svc_test'}


In [7]:
from sklearn import svm
svc = svm.LinearSVC(C=0.0005)
svc.fit(X_train, y_train)

svc_performance_train = BinaryClassificationPerformance(svc.predict(X_train), y_train, 'svc_train')
svc_performance_train.compute_measures()
print(svc_performance_train.performance_measures)

{'Pos': 10021, 'Neg': 9979, 'TP': 10021, 'TN': 9979, 'FP': 0, 'FN': 0, 'Accuracy': 1.0, 'Precision': 1.0, 'Recall': 1.0, 'desc': 'svc_train'}


In [8]:
svc_test = BinaryClassificationPerformance(svc.predict(X_test), y_test, 'svc_test')
svc_test.compute_measures()
print(svc_test.performance_measures)

{'Pos': 2479, 'Neg': 2521, 'TP': 2167, 'TN': 2140, 'FP': 381, 'FN': 312, 'Accuracy': 0.8614, 'Precision': 0.8504709576138147, 'Recall': 0.8741427995159339, 'desc': 'svc_test'}


In [81]:
from sklearn import svm
svc = svm.LinearSVC(C=0.001)
svc.fit(X_train, y_train)

svc_performance_train = BinaryClassificationPerformance(svc.predict(X_train), y_train, 'svc_train')
svc_performance_train.compute_measures()
print(svc_performance_train.performance_measures)

{'Pos': 10021, 'Neg': 9979, 'TP': 10021, 'TN': 9979, 'FP': 0, 'FN': 0, 'Accuracy': 1.0, 'Precision': 1.0, 'Recall': 1.0, 'desc': 'svc_train'}


In [82]:
svc_test = BinaryClassificationPerformance(svc.predict(X_test), y_test, 'svc_test')
svc_test.compute_measures()
print(svc_test.performance_measures)

{'Pos': 2479, 'Neg': 2521, 'TP': 2164, 'TN': 2136, 'FP': 385, 'FN': 315, 'Accuracy': 0.86, 'Precision': 0.8489603766182817, 'Recall': 0.872932634126664, 'desc': 'svc_test'}


In [83]:
from sklearn import svm
svc = svm.LinearSVC(C=0.01)
svc.fit(X_train, y_train)

svc_performance_train = BinaryClassificationPerformance(svc.predict(X_train), y_train, 'svc_train')
svc_performance_train.compute_measures()
print(svc_performance_train.performance_measures)

{'Pos': 10021, 'Neg': 9979, 'TP': 10021, 'TN': 9979, 'FP': 0, 'FN': 0, 'Accuracy': 1.0, 'Precision': 1.0, 'Recall': 1.0, 'desc': 'svc_train'}


In [84]:
svc_test = BinaryClassificationPerformance(svc.predict(X_test), y_test, 'svc_test')
svc_test.compute_measures()
print(svc_test.performance_measures)

{'Pos': 2479, 'Neg': 2521, 'TP': 2157, 'TN': 2132, 'FP': 389, 'FN': 322, 'Accuracy': 0.8578, 'Precision': 0.8472113118617439, 'Recall': 0.8701089148850343, 'desc': 'svc_test'}


In [85]:
from sklearn import svm
svc = svm.LinearSVC(C=0.1)
svc.fit(X_train, y_train)

svc_performance_train = BinaryClassificationPerformance(svc.predict(X_train), y_train, 'svc_train')
svc_performance_train.compute_measures()
print(svc_performance_train.performance_measures)

{'Pos': 10021, 'Neg': 9979, 'TP': 10021, 'TN': 9979, 'FP': 0, 'FN': 0, 'Accuracy': 1.0, 'Precision': 1.0, 'Recall': 1.0, 'desc': 'svc_train'}


In [86]:
svc_test = BinaryClassificationPerformance(svc.predict(X_test), y_test, 'svc_test')
svc_test.compute_measures()
print(svc_test.performance_measures)

{'Pos': 2479, 'Neg': 2521, 'TP': 2156, 'TN': 2134, 'FP': 387, 'FN': 323, 'Accuracy': 0.858, 'Precision': 0.8478175383405426, 'Recall': 0.8697055264219443, 'desc': 'svc_test'}


In [87]:
from sklearn import svm
svc = svm.LinearSVC(C=0.5)
svc.fit(X_train, y_train)

svc_performance_train = BinaryClassificationPerformance(svc.predict(X_train), y_train, 'svc_train')
svc_performance_train.compute_measures()
print(svc_performance_train.performance_measures)

{'Pos': 10021, 'Neg': 9979, 'TP': 10021, 'TN': 9979, 'FP': 0, 'FN': 0, 'Accuracy': 1.0, 'Precision': 1.0, 'Recall': 1.0, 'desc': 'svc_train'}


In [88]:
svc_test = BinaryClassificationPerformance(svc.predict(X_test), y_test, 'svc_test')
svc_test.compute_measures()
print(svc_test.performance_measures)

{'Pos': 2479, 'Neg': 2521, 'TP': 2154, 'TN': 2134, 'FP': 387, 'FN': 325, 'Accuracy': 0.8576, 'Precision': 0.8476977567886659, 'Recall': 0.8688987494957644, 'desc': 'svc_test'}


In [89]:
from sklearn import svm
svc = svm.LinearSVC(C=1)
svc.fit(X_train, y_train)

svc_performance_train = BinaryClassificationPerformance(svc.predict(X_train), y_train, 'svc_train')
svc_performance_train.compute_measures()
print(svc_performance_train.performance_measures)

{'Pos': 10021, 'Neg': 9979, 'TP': 10021, 'TN': 9979, 'FP': 0, 'FN': 0, 'Accuracy': 1.0, 'Precision': 1.0, 'Recall': 1.0, 'desc': 'svc_train'}


In [90]:
svc_test = BinaryClassificationPerformance(svc.predict(X_test), y_test, 'svc_test')
svc_test.compute_measures()
print(svc_test.performance_measures)

{'Pos': 2479, 'Neg': 2521, 'TP': 2154, 'TN': 2134, 'FP': 387, 'FN': 325, 'Accuracy': 0.8576, 'Precision': 0.8476977567886659, 'Recall': 0.8688987494957644, 'desc': 'svc_test'}


In [91]:
from sklearn import svm
svc = svm.LinearSVC(C=10)
svc.fit(X_train, y_train)

svc_performance_train = BinaryClassificationPerformance(svc.predict(X_train), y_train, 'svc_train')
svc_performance_train.compute_measures()
print(svc_performance_train.performance_measures)

{'Pos': 10021, 'Neg': 9979, 'TP': 10021, 'TN': 9979, 'FP': 0, 'FN': 0, 'Accuracy': 1.0, 'Precision': 1.0, 'Recall': 1.0, 'desc': 'svc_train'}


In [92]:
svc_test = BinaryClassificationPerformance(svc.predict(X_test), y_test, 'svc_test')
svc_test.compute_measures()
print(svc_test.performance_measures)

{'Pos': 2479, 'Neg': 2521, 'TP': 2154, 'TN': 2134, 'FP': 387, 'FN': 325, 'Accuracy': 0.8576, 'Precision': 0.8476977567886659, 'Recall': 0.8688987494957644, 'desc': 'svc_test'}


In [93]:
from sklearn import svm
svc = svm.LinearSVC(C=100)
svc.fit(X_train, y_train)

svc_performance_train = BinaryClassificationPerformance(svc.predict(X_train), y_train, 'svc_train')
svc_performance_train.compute_measures()
print(svc_performance_train.performance_measures)

{'Pos': 10021, 'Neg': 9979, 'TP': 10021, 'TN': 9979, 'FP': 0, 'FN': 0, 'Accuracy': 1.0, 'Precision': 1.0, 'Recall': 1.0, 'desc': 'svc_train'}


In [94]:
svc_test = BinaryClassificationPerformance(svc.predict(X_test), y_test, 'svc_test')
svc_test.compute_measures()
print(svc_test.performance_measures)

{'Pos': 2479, 'Neg': 2521, 'TP': 2154, 'TN': 2134, 'FP': 387, 'FN': 325, 'Accuracy': 0.8576, 'Precision': 0.8476977567886659, 'Recall': 0.8688987494957644, 'desc': 'svc_test'}
