In [None]:
# Data

from sklearn.datasets import make_blobs
from sklearn.datasets import make_classification
from sklearn.datasets import make_regression

cluster_x,cluster_y = make_blobs(n_samples=10000, n_features=2, centers = 4, cluster_std = 2.0, random_state = 0)
reg_x,reg_y,reg_coeff = make_regression(n_samples=10000, n_features=2,n_informative=2, noise=10,coef=True, random_state=0)
class_x,class_y = make_classification(n_samples=1000, n_features = 2, n_redundant = 0, n_repeated = 0, n_classes= 2, random_state=0)
"""
classification_data = make_classification(n_samples=1000, 
                                          n_features = 8, 
                                          n_informative = 2, 
                                          n_redundant = 2,
                                          n_repeated = 2,
                                          n_classes= 4, 
                                          weights = None, 
                                          random_state=0)

"""

In [None]:
from sklearn.tree import DecisionTreeRegressor

# Define model. Specify a number for random_state to ensure same results each run
reg_model = DecisionTreeRegressor(random_state=1)

# Fit model
reg_model.fit(reg_x, reg_y)

from sklearn.metrics import mean_absolute_error

predicted_home_prices = melbourne_model.predict(reg_x)
mean_absolute_error(reg_y, predicted_home_prices)

In [None]:
from sklearn.model_selection import train_test_split

# split data into training and validation data, for both features and target
# The split is based on a random number generator. Supplying a numeric value to
# the random_state argument guarantees we get the same split every time we
# run this script.
train_X, val_X, train_y, val_y = train_test_split(reg_x, reg_y, random_state = 0)


# Define model
reg_model = DecisionTreeRegressor()
# Fit model
reg_model.fit(train_X, train_y)

# get predicted prices on validation data
val_predictions = reg_model.predict(val_X)

from sklearn.metrics import mean_absolute_error
print(mean_absolute_error(val_y, val_predictions))

In [None]:
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import f_regression

x,y,coeff = make_regression(n_samples=10000, n_features=8,n_informative=2, noise=10,coef=True, random_state=0)

# feature extraction
test = SelectKBest(score_func=f_regression, k=4)
fit = test.fit(x, y)
print(fit.scores_)

In [None]:
features = fit.transform(x)
# summarize selected features
print(features[0:5,:])
print(x[:5,:])

In [None]:
from sklearn.decomposition import PCA

pca = PCA(n_components=3)
fit = pca.fit(x)
# summarize components
print("Explained Variance: %s" % fit.explained_variance_ratio_)
print(fit.components_)

In [None]:
from sklearn.ensemble import ExtraTreesRegressor
model = ExtraTreesRegressor()
model.fit(x, y)
print(model.feature_importances_)

In [None]:
from sklearn.tree import DecisionTreeRegressor
model = DecisionTreeRegressor()
model.fit(x, y)
print(model.feature_importances_)


In [None]:
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score

kfold = KFold(n_splits=10, random_state=7)

from sklearn.linear_model import LinearRegression
model = LinearRegression()
scoring = 'neg_mean_squared_error'
results = cross_val_score(model, x, y, cv=kfold, scoring=scoring)
print(results.mean())

from sklearn.linear_model import Ridge
model = Ridge()
scoring = 'neg_mean_squared_error'
results = cross_val_score(model, X, Y, cv=kfold, scoring=scoring)
print(results.mean())


from sklearn.linear_model import Lasso
model = Lasso()
scoring = 'neg_mean_squared_error'
results = cross_val_score(model, X, Y, cv=kfold, scoring=scoring)
print(results.mean())

In [None]:
# Compare Algorithms
from pandas import read_csv
from matplotlib import pyplot
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
# load dataset
filename = 'pima-indians-diabetes.data.csv'
names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class']
dataframe = read_csv(filename, names=names)
array = dataframe.values
X = array[:,0:8]
Y = array[:,8]
# prepare models
models = []
models.append(('LR', LogisticRegression()))
models.append(('LDA', LinearDiscriminantAnalysis()))
models.append(('KNN', KNeighborsClassifier()))
models.append(('CART', DecisionTreeClassifier()))
models.append(('NB', GaussianNB()))
models.append(('SVM', SVC()))
# evaluate each model in turn
results = []
names = []
scoring = 'accuracy'
for name, model in models:
    kfold = KFold(n_splits=10, random_state=7)
    cv_results = cross_val_score(model, X, Y, cv=kfold, scoring=scoring)
    results.append(cv_results)

    names.append(name)
    msg = "%s: %f (%f)" % (name, cv_results.mean(), cv_results.std())
    print(msg)
# boxplot algorithm comparison
fig = pyplot.figure()
fig.suptitle('Algorithm Comparison')
ax = fig.add_subplot(111)
pyplot.boxplot(results)
ax.set_xticklabels(names)
pyplot.show()

In [30]:
# Grid Search for Algorithm Tuning
import numpy
from pandas import read_csv
from sklearn.linear_model import Ridge
from sklearn.model_selection import GridSearchCV

alphas = numpy.array([1,0.1,0.01,0.001,0.0001,0])
param_grid = dict(alpha=alphas)
model = Ridge()
grid = GridSearchCV(estimator=model, param_grid=param_grid)
grid.fit(x, y)
print(grid.best_score_)
print(grid.best_estimator_.alpha)

0.9913256319414119
0.0


In [None]:
# Randomized for Algorithm Tuning
from pandas import read_csv
from scipy.stats import uniform
from sklearn.linear_model import Ridge
from sklearn.model_selection import RandomizedSearchCV

param_grid = {'alpha': uniform()}
model = Ridge()
rsearch = RandomizedSearchCV(estimator=model, param_distributions=param_grid, n_iter=100,
random_state=7)
rsearch.fit(X, Y)
print(rsearch.best_score_)
print(rsearch.best_estimator_.alpha)
