In [9]:
import numpy as np
def get_data(f_train, f_test):
    train_f = open(f_train, 'r')
    test_f = open(f_test, 'r')

    train_features = []
    train_labels = []

    for line in train_f.readlines():
        vals = line.split(',')
        train_features.append(vals[:-1])
        train_labels.append(vals[-1])

    test_features = []
    test_labels = []

    for line in test_f.readlines():
        vals = line.split(',')
        test_features.append(vals[:-1])
        test_labels.append(vals[-1])  
    
    return np.array(train_features, dtype='float32'), np.array(test_features, dtype='float32'), np.array(train_labels, dtype='float32'), np.array(test_labels, dtype='float32')

# classification
class_x_train, class_x_test, class_y_train, class_y_test = get_data('class_train.data', 'class_test.data')

# regression
reg_x_train, reg_x_test, reg_y_train, reg_y_test = get_data('reg_train.data', 'reg_test.data')

In [10]:
from sklearn.neural_network import MLPClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.tree import DecisionTreeRegressor
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split, cross_val_score

In [12]:
# create linear regression regressor 
lr = LinearRegression()
lr.fit(reg_x_train, reg_y_train)
# validation, use cross_val_score
lr_scores = cross_val_score(lr, reg_x_train, y=reg_y_train, cv=10)
print ("Linear Regression Mean Accuracy: %0.2f" % lr_scores.mean())
pred_labels = lr.predict(reg_x_test)

# create decision tree regressor 
dtr = DecisionTreeRegressor()
dtr.fit(reg_x_train, reg_y_train)
dtr_scores = cross_val_score(dtr, reg_x_train, y=reg_y_train, cv=10)
print ("Decision Tree Regression Mean Accuracy: %0.2f" % dtr_scores.mean())

pred_labels = lr.predict(reg_x_test) if lr_scores.mean() > dtr_scores.mean() else dtr.predict(reg_x_test)
print (pred_labels)

# create neural network classifier
nn_classifier = MLPClassifier()
nn_classifier.fit(class_x_train, class_y_train)

# create decision tree classifier
dt_classifier = DecisionTreeClassifier()
dt_classifier.fit(class_x_train, class_y_train)

# 10 fold cross validation
nn_scores = cross_val_score(nn_classifier, class_x_train, y=class_y_train, cv = 10)
print("Neural Network Cross Val Score: %0.2f (+/- %0.2f)" % (nn_scores.mean(), nn_scores.std() * 2))

# 10 fold cross validation
dt_scores = cross_val_score(dt_classifier, class_x_train, y=class_y_train, cv = 10)
print("Decision Tree Accuracy: %0.2f (+/- %0.2f)" % (dt_scores.mean(), dt_scores.std() * 2))

if (dt_scores.mean() >= nn_scores.mean()):
    model = dt_classifier
else:
    model = nn_classifier
    
predicted_labels = model.predict(class_x_test)
print(predicted_labels)

Linear Regression Mean Accuracy: 0.72
Decision Tree Regression Mean Accuracy: 0.96
[79. 92. 94. ... 92. 84. 76.]
Neural Network Cross Val Score: 0.79 (+/- 0.10)
Decision Tree Accuracy: 0.81 (+/- 0.04)
[-1. -1.  1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1.  1. -1. -1. -1.
 -1.  1. -1. -1.  1. -1. -1.  1. -1. -1. -1.  1. -1. -1. -1. -1. -1. -1.
  1. -1. -1.  1. -1. -1. -1.  1.  1. -1. -1.  1. -1. -1. -1.  1. -1.  1.
 -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1. -1.  1. -1. -1. -1. -1. -1.
 -1. -1.  1.  1.  1. -1. -1.  1.  1. -1. -1. -1.  1. -1.  1. -1.  1. -1.
  1.  1. -1. -1.  1.  1.  1. -1. -1. -1. -1. -1.  1. -1. -1.  1. -1.  1.
 -1.  1. -1.  1. -1. -1. -1. -1.  1. -1. -1.  1. -1. -1. -1.  1. -1. -1.
 -1. -1. -1. -1.  1.  1. -1. -1. -1. -1. -1. -1. -1. -1.  1. -1. -1. -1.
 -1.  1.  1. -1.  1.  1.  1. -1. -1.  1. -1.  1. -1. -1. -1. -1.  1. -1.
 -1.  1. -1.  1.  1.  1. -1.  1. -1. -1. -1.  1. -1. -1. -1.  1.  1. -1.
 -1.  1. -1. -1. -1. -1. -1. -1. -1. -1.  1. -1.  1. -1. -1. -1. -1. 