In [17]:
import numpy as np
from sklearn import datasets
from sklearn.neural_network import MLPRegressor, MLPClassifier
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
# useful utility function (see https://scikit-learn.org/stable/modules/compose.html of more details)
from sklearn.pipeline import Pipeline, make_pipeline
# useful utility to optimize hyperparameters (see https://scikit-learn.org/stable/modules/grid_search.html#grid-search for more details)
from sklearn.model_selection import GridSearchCV
# dataset splitting utilities (see https://scikit-learn.org/stable/modules/cross_validation.html#cross-validation-iterators for more details)
from sklearn.model_selection import ShuffleSplit, KFold
# helper function to plot a ROC curve (see https://scikit-learn.org/stable/visualizations.html#visualizations for more details)
from sklearn.metrics import plot_roc_curve
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt
import pandas as pd

# Predict housing prices

In [None]:
# have a look at the dataset's features and target correlations
housing = datasets.fetch_california_housing(return_X_y=False, as_frame=True)
c = pd.plotting.scatter_matrix(pd.concat([housing.data, housing.target],axis=1), alpha=0.2, figsize=(17, 17), diagonal='hist')
c;

In [19]:
# load the dataset as a feature matrix and target vector
housing_X, housing_y = datasets.fetch_california_housing(return_X_y=True)
# list of features in column order
xcol=['MedInc', 'HouseAge', 'AveRooms', 'AveBedrms', 'Population', 'AveOccup',
       'Latitude', 'Longitude']


## Linear regression

## MLP Regression

Using `Pipeline` and `GridSearchCV` is recommended.

In [22]:
# Set of hyperparameters to search on using a grid search strategy and a cross validated score following the GridSearCV format
param_grid = [
    {
        "mlpregressor__hidden_layer_sizes": [(5), (30), (10,10,10), (30,30)], 
        "mlpregressor__activation":['identity', 'tanh'],
        "mlpregressor__alpha": [1e-4, 1e-2, 1],
    }
]

# define a separate test set (keep the random_state fixed)
X_train, X_test, y_train, y_test = train_test_split(housing_X, housing_y, test_size=0.1, random_state=43738)

# Predict Tumor state

In [15]:
# have a look at the dataset's features and target correlations
cancer = datasets.load_breast_cancer(as_frame=True)
c = pd.plotting.scatter_matrix(pd.concat([cancer.data,cancer.target],axis=1), alpha=0.2, figsize=(17, 17), diagonal='hist')
c;

In [39]:
# load the dataset as a feature matrix and target vector
cancer_X, cancer_y = datasets.load_breast_cancer(return_X_y=True)
# list of features in column order
xcol = ['mean radius', 'mean texture', 'mean perimeter', 'mean area',
       'mean smoothness', 'mean compactness', 'mean concavity',
       'mean concave points', 'mean symmetry', 'mean fractal dimension',
       'radius error', 'texture error', 'perimeter error', 'area error',
       'smoothness error', 'compactness error', 'concavity error',
       'concave points error', 'symmetry error', 'fractal dimension error',
       'worst radius', 'worst texture', 'worst perimeter', 'worst area',
       'worst smoothness', 'worst compactness', 'worst concavity',
       'worst concave points', 'worst symmetry', 'worst fractal dimension']

# define a separate test set (keep the random_state fixed)
X_train, X_test, y_train, y_test = train_test_split(cancer_X, cancer_y, test_size=0.25, random_state=43738)

hyperparameters = dict(hidden_layer_sizes=(5,),activation='tanh', 
                    max_iter=6000,solver='adam',warm_start='false')