In [3]:
import os
import sys
from sklearn import tree
from sklearn.linear_model import SGDClassifier
from sklearn import svm

from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split


# get_data_block_start
from get_data import GetData

getData = GetData()

accuracy = {}

symbols = getData.getAllSymbols()

for symbol in symbols:
    accuracy[symbol] = []
    features = getData.getSymbolFeaturesWithoutDate(symbol)
    labels = getData.getSymbolCLFLabels(symbol, 4)

    ########################
    # now the real MA work #
    ########################
    # create train and test data set
    X_test, X_train, y_test,  y_train = train_test_split(features, labels, test_size=.5)

    dtree_classifier = tree.DecisionTreeClassifier()
    sgd_classifier = SGDClassifier(loss="log", penalty="elasticnet")
    svm_classifier = svm.SVC()

    # train the classifier
    dtree_classifier.fit(X_train, y_train)
    sgd_classifier.fit(X_train, y_train)
    svm_classifier.fit(X_train, y_train)
    # do prediction
    dtree_predictions = dtree_classifier.predict(X_test)
    sgd_predictions = sgd_classifier.predict(X_test)
    svm_predictions = svm_classifier.predict(X_test)

    accuracy[symbol].append(str(round(accuracy_score(y_test, dtree_predictions)*100, 2))+'%')
    accuracy[symbol].append(str(round(accuracy_score(y_test, sgd_predictions)*100, 2))+'%')
    accuracy[symbol].append(str(round(accuracy_score(y_test, svm_predictions)*100, 2))+'%')

    # print the result
    print("[INFO] %s: %s" % (symbol, ', '.join(accuracy[symbol])), file=sys.stderr)


DEBUG:get_data:Trying to load data
INFO:get_data:DONE
[INFO] A: 51.0%, 47.2%, 52.0%
[INFO] AAL: 53.6%, 49.6%, 52.2%
[INFO] AAP: 50.0%, 49.8%, 51.8%
[INFO] AAPL: 48.2%, 48.0%, 50.0%
[INFO] ABBV: 47.0%, 55.4%, 53.2%
[INFO] ABC: 50.4%, 54.6%, 51.2%
[INFO] ABT: 49.6%, 52.2%, 51.4%
[INFO] ACN: 54.0%, 56.8%, 53.8%
[INFO] ADBE: 48.2%, 54.4%, 50.6%
[INFO] ADI: 49.0%, 51.4%, 50.4%
[INFO] ADM: 47.0%, 43.4%, 47.6%
[INFO] ADP: 53.0%, 46.2%, 50.6%
[INFO] ADS: 51.2%, 52.6%, 45.4%
[INFO] ADSK: 48.4%, 47.0%, 52.0%
[INFO] AEE: 51.8%, 43.2%, 51.4%
[INFO] AEP: 46.6%, 44.2%, 50.6%
[INFO] AES: 51.8%, 46.6%, 50.0%
[INFO] AET: 49.0%, 44.4%, 51.4%
[INFO] AFL: 49.2%, 55.8%, 51.6%
[INFO] AGN: 50.8%, 50.8%, 47.2%
[INFO] AIG: 52.8%, 49.8%, 51.8%
[INFO] AIV: 51.8%, 46.6%, 54.4%
[INFO] AIZ: 51.4%, 50.0%, 51.6%
[INFO] AJG: 47.8%, 47.8%, 49.2%
[INFO] AKAM: 48.8%, 47.6%, 52.0%
[INFO] ALB: 47.6%, 44.0%, 51.6%
[INFO] ALGN: 53.2%, 46.2%, 54.6%
[INFO] ALK: 51.0%, 47.0%, 51.0%
[INFO] ALL: 49.0%, 46.8%, 51.6%
[INFO] ALLE: 5

[INFO] INTU: 50.6%, 55.8%, 54.6%
[INFO] IP: 47.4%, 51.6%, 49.8%
[INFO] IPG: 56.0%, 43.2%, 54.0%
[INFO] IPGP: 52.4%, 46.0%, 53.8%
[INFO] IQV: 50.8%, 45.8%, 50.2%
[INFO] IR: 49.2%, 52.2%, 51.6%
[INFO] IRM: 44.6%, 52.6%, 53.4%
[INFO] ISRG: 52.0%, 50.6%, 50.6%
[INFO] IT: 52.6%, 53.2%, 50.8%
[INFO] ITW: 49.6%, 56.6%, 48.8%
[INFO] IVZ: 50.8%, 51.8%, 47.6%
[INFO] JBHT: 50.8%, 50.0%, 50.4%
[INFO] JCI: 50.0%, 46.2%, 50.4%
[INFO] JEC: 49.6%, 48.6%, 49.4%
[INFO] JNJ: 50.0%, 49.0%, 47.2%
[INFO] JNPR: 49.8%, 45.2%, 56.6%
[INFO] JPM: 52.0%, 52.0%, 50.2%
[INFO] JWN: 50.4%, 50.4%, 48.2%
[INFO] K: 47.8%, 50.8%, 48.8%
[INFO] KEY: 52.6%, 47.2%, 53.0%
[INFO] KIM: 50.0%, 51.4%, 48.8%
[INFO] KLAC: 47.8%, 54.2%, 50.4%
[INFO] KMB: 50.4%, 49.2%, 49.2%
[INFO] KMI: 47.6%, 44.8%, 45.6%
[INFO] KMX: 51.0%, 55.0%, 46.8%
[INFO] KO: 48.4%, 52.4%, 50.0%
[INFO] KORS: 47.4%, 55.4%, 50.6%
[INFO] KR: 48.4%, 46.2%, 54.0%
[INFO] KSS: 50.4%, 50.4%, 53.8%
[INFO] KSU: 50.0%, 51.6%, 50.8%
[INFO] L: 52.2%, 50.6%, 51.8%
[INFO] LB:

[INFO] ^IXIC: 52.0%, 42.2%, 57.8%
[INFO] ^N225: 48.4%, 45.4%, 54.6%
