In [43]:
# comment out these lines if you have already installed these modules
#!pip install nltk
#!pip install tensorflow
#!pip install keras

import sklearn as sk
import numpy as np
import scipy
import matplotlib.pyplot as plt
import pandas as pd
from nltk.corpus import stopwords
import string
import nltk
# nltk.download()
import tensorflow
from sklearn.model_selection import train_test_split
from collections import Counter
from pandas import DataFrame
from matplotlib import pyplot
import re
from nltk.stem import WordNetLemmatizer 
from keras.preprocessing.text import Tokenizer
from sklearn.metrics import accuracy_score

# random.seed(10)

In [44]:
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/cassie/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

Preprocess Code

In [45]:
def process_news(news):
    _news = news.replace('b\"', "")
    _news = _news.replace('b\'', "")
    _news = _news.lower()
    _news = re.sub("[^a-zA-Z]", " ",_news)
    _news = re.sub('[\s]+', ' ', _news)
    
    tokens = _news.split(" ")
    if "" in tokens:
        tokens.remove("")
    
    lemmatizer = WordNetLemmatizer() 
    tokens = [lemmatizer.lemmatize(w) for w in tokens]
    #remove punctuation from each token
    table = str.maketrans('', '', string.punctuation)
    tokens = [w.translate(table) for w in tokens]
    
    # remove remaining tokens that are not alphabetic
    tokens = [word for word in tokens if word.isalpha()]
    # filter out stop words
    stop_words = set(stopwords.words('english'))
    tokens = [w for w in tokens if not w in stop_words]
    # filter out short tokens
    tokens = [word for word in tokens if len(word) > 1]
    
    _news = ' '.join(tokens)    
     
    return _news

  _news = re.sub('[\s]+', ' ', _news)


Load data

In [46]:
def read_data():

    data = pd.read_csv("../Datasets/djia/Combined_News_DJIA.csv")
    
    dfs = []
    data["News"] = ""
    for i in range(1,25):
        col = "Top"+str(i)
        data["News"] = data["News"] +" "+ data[col]
    data = data.dropna()
    data['PreProcessedNews'] = data['News'].map(process_news)
    
    data = data[['Date', 'News', 'PreProcessedNews', 'Label']]
    
    stock_prices = "../Datasets/djia/upload_DJIA_table.csv"
    stock_data = pd.read_csv(stock_prices)
    
    print(data.head(2))
    print(stock_data.head(2))
    
    
    #merged_dataframe = data.merge(stock_data, how='inner', on='Date')
    merged_dataframe = pd.merge(data, stock_data, how='inner', on = 'Date')

    Xy_train = merged_dataframe[:int(len(data)*0.8)]
    Xy_test = merged_dataframe[int(len(data)*0.8):]
    
    return merged_dataframe, Xy_train, Xy_test

Load data function call

In [47]:
news, Xy_train, Xy_test = read_data()

         Date                                               News  \
0  2008-08-08   b"Georgia 'downs two Russian warplanes' as co...   
1  2008-08-11   b'Why wont America and Nato help us? If they ...   

                                    PreProcessedNews  Label  
0  georgia two russian warplane country move brin...      0  
1  wont america nato help wont help help iraq bus...      1  
         Date          Open          High           Low         Close  \
0  2016-07-01  17924.240234  18002.380859  17916.910156  17949.369141   
1  2016-06-30  17712.759766  17930.609375  17711.800781  17929.990234   

      Volume     Adj Close  
0   82160000  17949.369141  
1  133030000  17929.990234  


In [48]:
X_train = Xy_train['PreProcessedNews']
X_test = Xy_test['PreProcessedNews']
y_train = Xy_train['Label'].to_numpy()
y_test = Xy_test['Label'].to_numpy()

Prepare data for vectorization

In [49]:
    
def prepare_data(train_docs, test_docs, mode):
    # create the tokenizer
    tokenizer = Tokenizer()
    # fit the tokenizer on the documents
    tokenizer.fit_on_texts(train_docs)
    # encode training data set
    Xtrain = tokenizer.texts_to_matrix(train_docs, mode=mode)
    # encode testing data set
    Xtest = tokenizer.texts_to_matrix(test_docs, mode=mode)
    
    return Xtrain, Xtest


Create vectors for each sentence
* binary
* count
* tfidf
* freq

In [50]:
#modes = ['binary', 'count', 'tfidf', 'freq']

# CHANGE MODE
mode = 'binary'

X_train, X_test = prepare_data(X_train, X_test, mode)

In [51]:
X_test.shape

(398, 25671)

In [52]:
X_train.shape

(1588, 25671)

In [53]:
X_train

array([[0., 1., 1., ..., 0., 0., 0.],
       [0., 1., 1., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 1., 0., ..., 0., 0., 0.],
       [0., 1., 1., ..., 0., 0., 0.],
       [0., 1., 1., ..., 1., 1., 1.]])

Reduce the dimentionality since this is a sparse vector

In [54]:
from sklearn.decomposition import TruncatedSVD
from sklearn.decomposition import PCA

# Make sure to comment one or the other.

# CHANGE TO SVD
# CHANGE: the n_components - Reduce dimensionality
svd = TruncatedSVD(n_components=50, n_iter=10)
X_train = svd.fit_transform(X_train)
X_test = svd.transform(X_test)

# # CHANGE TO PCA
# # CHANGE: the the n_components - Reduce dimensionality
# pca = PCA(n_components=2)
# X_train = pca.fit_transform(X_train)
# X_test = pca.transform(X_test)


In [55]:
X_train.shape

(1588, 50)

In [56]:
X_test.shape

(398, 50)

Apply machine learning example

In [57]:
# Feature Scaling
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)


In [65]:
# https://scikit-learn.org/stable/auto_examples/model_selection/plot_grid_search_digits.html
# https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.GridSearchCV.html

from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report
from sklearn.svm import SVC
import warnings
warnings.filterwarnings('always')  # "error", "ignore", "always", "default", "module" or "once"

# Please refer to:
# https://scikit-learn.org/stable/modules/generated/sklearn.svm.SVC.html
# for more information of which parameters to change for each kernel

# CHANGE: Set the parameters by cross-validation
tuned_parameters = [
    
{'kernel': ['rbf'], 'gamma': [1e-3, 1e-4],'C': [50, 100, 150, 200]},

{'kernel': ['sigmoid'], 'gamma': [1e-0,1e-1, 1e-2, 1e-3, 1e-4, 1e-5, 1e-6, 1e-7, 1e-8, 1e-9],'C': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 20, 50]},

{'kernel': ['poly'], 'gamma': [1e-1, 1e-2, 1e-3, 1e-4, 1e-5], 'C': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 20], 
'degree': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]},
 
{'kernel': ['linear'], 'C': [50, 100, 150, 200]}

]

scores = ['accuracy', 'precision']

for score in scores:
    print("# Tuning hyper-parameters for %s" % score)
    print()
    
    if score == "accuracy":
    
        clf = GridSearchCV(
            SVC(), tuned_parameters, scoring='%s' % score
        )
    
    if score == "precision":
        clf = GridSearchCV(
            SVC(), tuned_parameters, scoring='%s_macro' % score
        )
    
    
    clf.fit(X_train, y_train)


    print("Best parameters set found on development set:")
    print()
    print(clf.best_params_)
    print()
    print("Grid scores on development set:")
    print()
    means = clf.cv_results_['mean_test_score']
    stds = clf.cv_results_['std_test_score']
    for mean, std, params in zip(means, stds, clf.cv_results_['params']):
        print("%0.3f (+/-%0.03f) for %r"
              % (mean, std * 2, params))
    print()

    print("Detailed classification report:")
    print()
    print("The model is trained on the full development set.")
    print("The scores are computed on the full evaluation set.")
    print()
    y_true, y_pred = y_test, clf.predict(X_test)
    print(classification_report(y_true, y_pred))
    print()

# Note the problem is too easy: the hyperparameter plateau is too flat and the
# output model is the same for precision and recall with ties in quality.

# Tuning hyper-parameters for accuracy

Best parameters set found on development set:

{'C': 5, 'gamma': 0.001, 'kernel': 'sigmoid'}

Grid scores on development set:

0.509 (+/-0.040) for {'C': 1, 'gamma': 1.0, 'kernel': 'sigmoid'}
0.494 (+/-0.052) for {'C': 1, 'gamma': 0.1, 'kernel': 'sigmoid'}
0.523 (+/-0.014) for {'C': 1, 'gamma': 0.01, 'kernel': 'sigmoid'}
0.542 (+/-0.002) for {'C': 1, 'gamma': 0.001, 'kernel': 'sigmoid'}
0.542 (+/-0.002) for {'C': 1, 'gamma': 0.0001, 'kernel': 'sigmoid'}
0.542 (+/-0.002) for {'C': 1, 'gamma': 1e-05, 'kernel': 'sigmoid'}
0.542 (+/-0.002) for {'C': 1, 'gamma': 1e-06, 'kernel': 'sigmoid'}
0.542 (+/-0.002) for {'C': 1, 'gamma': 1e-07, 'kernel': 'sigmoid'}
0.542 (+/-0.002) for {'C': 1, 'gamma': 1e-08, 'kernel': 'sigmoid'}
0.542 (+/-0.002) for {'C': 1, 'gamma': 1e-09, 'kernel': 'sigmoid'}
0.514 (+/-0.026) for {'C': 2, 'gamma': 1.0, 'kernel': 'sigmoid'}
0.497 (+/-0.058) for {'C': 2, 'gamma': 0.1, 'kernel': 'sigmoid'}
0.503 (+/-0.057) for {'C': 2, 'gamma'

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_pr

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_pr

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_pr

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_pr

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_pr

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_pr

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_pr

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_pr

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_pr

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_pr

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_pr

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_pr

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_pr

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_pr

Best parameters set found on development set:

{'C': 3, 'gamma': 1.0, 'kernel': 'sigmoid'}

Grid scores on development set:

0.505 (+/-0.031) for {'C': 1, 'gamma': 1.0, 'kernel': 'sigmoid'}
0.489 (+/-0.051) for {'C': 1, 'gamma': 0.1, 'kernel': 'sigmoid'}
0.489 (+/-0.008) for {'C': 1, 'gamma': 0.01, 'kernel': 'sigmoid'}
0.271 (+/-0.001) for {'C': 1, 'gamma': 0.001, 'kernel': 'sigmoid'}
0.271 (+/-0.001) for {'C': 1, 'gamma': 0.0001, 'kernel': 'sigmoid'}
0.271 (+/-0.001) for {'C': 1, 'gamma': 1e-05, 'kernel': 'sigmoid'}
0.271 (+/-0.001) for {'C': 1, 'gamma': 1e-06, 'kernel': 'sigmoid'}
0.271 (+/-0.001) for {'C': 1, 'gamma': 1e-07, 'kernel': 'sigmoid'}
0.271 (+/-0.001) for {'C': 1, 'gamma': 1e-08, 'kernel': 'sigmoid'}
0.271 (+/-0.001) for {'C': 1, 'gamma': 1e-09, 'kernel': 'sigmoid'}
0.509 (+/-0.026) for {'C': 2, 'gamma': 1.0, 'kernel': 'sigmoid'}
0.490 (+/-0.055) for {'C': 2, 'gamma': 0.1, 'kernel': 'sigmoid'}
0.482 (+/-0.051) for {'C': 2, 'gamma': 0.01, 'kernel': 'sigmoid'}
0.271 (+/-0.0

  _warn_prf(average, modifier, msg_start, len(result))
