In [2]:
# import packages
import numpy as np
import pandas as pd
from sklearn.linear_model import Lasso, LassoCV
from sklearn.datasets import make_regression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split


In [None]:
######################################

In [3]:
# toy data
X, Y = make_regression(n_samples=1000, n_features=25, noise=10, random_state=0)

In [4]:
#######################################

In [5]:
# RANDOM FOREST

In [6]:
# instantiate and fit
r_forest = RandomForestRegressor(max_depth=100, random_state=0)
r_forest.fit(X,Y)

In [7]:
# predict
r_forest.predict(X[:,])

array([-5.80986841e+02, -2.50922502e+02,  1.55564412e+02, -5.68192955e+02,
        3.52103728e+01, -1.89196058e+02, -7.83583959e+01,  3.72196633e+01,
        2.11662139e+02,  1.23268289e+01, -1.52461964e+02,  2.60568040e+02,
       -1.91670861e+02, -1.91916881e+02,  2.33067916e+02,  4.10139766e+02,
       -2.57395639e+01, -6.00767095e+01, -1.18320265e+02,  6.70578108e+01,
       -1.21296264e+02,  8.29813051e+01,  6.40053707e+01, -1.39503097e+02,
        1.37038139e+02, -1.74308950e+02,  3.30784141e+02, -3.79542624e+02,
        2.18734258e+02,  3.00257247e+02,  1.30533056e+02, -1.89169610e+02,
        3.75937272e+01, -1.33711157e+02, -4.89504150e+01,  1.52501781e+02,
       -4.29761178e+01,  8.08107063e+01,  2.07291797e+02,  3.87505584e+01,
        2.17066174e+02, -1.00551696e+02, -1.73121248e+02, -7.75210660e+00,
        9.23868977e+01, -2.46532317e+02,  1.49217417e+02, -9.79263461e+01,
       -2.59329920e+01, -2.85576969e+02,  1.01838016e+02,  1.59943555e+02,
        1.79311943e+01,  

In [8]:
####################################

In [9]:
# LASSO

In [10]:
# instantiate lasso with 10-fold cross-validation
lasso = LassoCV(cv=10,random_state=0) # random_state for reproducible results
lasso_poly = LassoCV(cv=10, random_state=0)

In [11]:
# fit
lasso.fit(X,Y)

In [12]:
# compute polynomial features for lasso
poly = PolynomialFeatures(degree=2, interaction_only=False, include_bias=False)
X_poly = poly.fit_transform(X)

In [13]:
# predict values
lasso.predict(X[:,]) # TODO: INPUT TEST SET HERE? OR APPROPRIATE DATA

array([-6.65174001e+02, -2.54809599e+02,  1.50734005e+02, -6.62219140e+02,
        3.32561199e+01, -1.88279852e+02, -8.89277364e+01,  4.76642742e+01,
        2.48712396e+02,  7.65499464e+00, -1.57603138e+02,  2.93919113e+02,
       -2.08902777e+02, -1.77142896e+02,  2.45923594e+02,  5.26951200e+02,
       -1.65266676e+01, -8.83149350e+01, -1.61503579e+02,  1.01365201e+02,
       -1.16889316e+02,  3.53673756e+01,  5.39532198e+01, -1.25722261e+02,
        1.21225213e+02, -2.30072191e+02,  4.00994157e+02, -3.65675922e+02,
        2.49200135e+02,  3.67072940e+02,  2.04662976e+02, -2.15757081e+02,
        2.88682301e+01, -1.87417843e+02, -3.63048801e+01,  1.90459441e+02,
       -2.16909030e+01,  7.69948242e+01,  2.56746971e+02,  4.11656862e+01,
        3.04006066e+02, -9.55333255e+01, -2.12246982e+02,  2.64016223e+01,
        7.41012000e+01, -2.65496040e+02,  1.66110273e+02, -6.87096969e+01,
       -5.60718249e+01, -3.58525411e+02,  1.10071498e+02,  1.92607283e+02,
       -1.76708529e+01,  

In [14]:
lasso_poly.fit(X_poly,Y) # here it takes 1m 25s for 25 original features with degree 3 poly transformation

In [15]:
lasso_poly.predict(X_poly[:,]) # prediction is fast

array([-6.61843942e+02, -2.53838688e+02,  1.51234474e+02, -6.60761687e+02,
        3.32002921e+01, -1.88065871e+02, -8.83498184e+01,  4.73100579e+01,
        2.46973087e+02,  7.60191561e+00, -1.56362379e+02,  2.94245348e+02,
       -2.07103086e+02, -1.75454574e+02,  2.44709008e+02,  5.23594404e+02,
       -1.72773865e+01, -8.77270368e+01, -1.60015186e+02,  1.00781189e+02,
       -1.16964386e+02,  3.75261738e+01,  5.32869416e+01, -1.27145437e+02,
        1.19721069e+02, -2.29050152e+02,  3.96721615e+02, -3.66057650e+02,
        2.48905351e+02,  3.67269826e+02,  2.03406696e+02, -2.13955550e+02,
        2.90811867e+01, -1.85567699e+02, -3.49789741e+01,  1.89488671e+02,
       -2.22089262e+01,  7.66726819e+01,  2.51908260e+02,  4.14946374e+01,
        3.02605410e+02, -9.41750176e+01, -2.10576148e+02,  2.66324922e+01,
        7.52572663e+01, -2.63244300e+02,  1.65325700e+02, -6.72357950e+01,
       -5.56967803e+01, -3.56340222e+02,  1.08341498e+02,  1.92508529e+02,
       -1.65404430e+01,  

In [16]:
# Define the LASSO base-learner # TODO: CHANGE THIS!!!, PROBABLY MAKE A CHILDCLASS OF LASSOCV!!!
# DONT KNOW IF NEEDED LIKE THAT!
class Base_Lasso(LassoCV):
    pass
    """""   X: features.
            Y: labels.
            poly_degree: number of degrees for polynomial transformation.
            poly_reg: whether to fit the model to the polynomial transformed X or not. """""
    """def __init__(self, x, y, poly_degree=1, poly_reg=False):
        super().__init__(cv=10,random_state=0)
        self.x = x
        self.y = y
        self.poly_degree = poly_degree
        self.poly_reg = poly_reg
        self.x_poly = PolynomialFeatures(degree=self.poly_degree, interaction_only=False, include_bias=False).fit_transform(self.x) # TODO: OR SEPARATE FUNCTION? """

    #def X_poly(self): # TODO: MAYBE DELETE?
        #poly = PolynomialFeatures(degree=self.poly_degree, interaction_only=False, include_bias=False)
        #return poly.fit_transform(self.X)

    "Comment on this function"

    """def fit(self,cv=10,random_stat=0):
        self.lasso = LassoCV(cv=cv, random_state=random_stat)
        if self.poly_reg:
            self.lasso.fit(self.x_poly, self.y)
        else:
            self.lasso.fit(self.x, self.y)"""


    "Comment on this function"
    #def predict(self,x):
        #return lasso_reg.predict(x)

In [17]:
lol = Base_Lasso(cv=10, random_state=0)

In [18]:
lol.fit(X,Y)

In [19]:
lol.predict(X[:,]) # oder so

array([-6.65174001e+02, -2.54809599e+02,  1.50734005e+02, -6.62219140e+02,
        3.32561199e+01, -1.88279852e+02, -8.89277364e+01,  4.76642742e+01,
        2.48712396e+02,  7.65499464e+00, -1.57603138e+02,  2.93919113e+02,
       -2.08902777e+02, -1.77142896e+02,  2.45923594e+02,  5.26951200e+02,
       -1.65266676e+01, -8.83149350e+01, -1.61503579e+02,  1.01365201e+02,
       -1.16889316e+02,  3.53673756e+01,  5.39532198e+01, -1.25722261e+02,
        1.21225213e+02, -2.30072191e+02,  4.00994157e+02, -3.65675922e+02,
        2.49200135e+02,  3.67072940e+02,  2.04662976e+02, -2.15757081e+02,
        2.88682301e+01, -1.87417843e+02, -3.63048801e+01,  1.90459441e+02,
       -2.16909030e+01,  7.69948242e+01,  2.56746971e+02,  4.11656862e+01,
        3.04006066e+02, -9.55333255e+01, -2.12246982e+02,  2.64016223e+01,
        7.41012000e+01, -2.65496040e+02,  1.66110273e+02, -6.87096969e+01,
       -5.60718249e+01, -3.58525411e+02,  1.10071498e+02,  1.92607283e+02,
       -1.76708529e+01,  

In [None]:
#####################################

# Neural Network

In [20]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers


In [57]:
# set some parameters
d = 25 #dimensions
N = 10000 #Number of observations

In [None]:
x, y = make_regression(n_samples=N, n_features=d, noise=1, random_state=0)
# train test split (just as an example)
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

Set up Model

In [58]:
# make model
# 3 layers with 200 units (elu activation), 2 layers with 100 units (elu activations), 1 output layer (linear activation)
model = keras.Sequential([
    keras.Input(shape=(d,)),
    layers.Dense(units=200, activation="elu", name="layer1"),
    layers.Dense(units=200, activation="elu", name="layer2"),
    layers.Dense(units=200, activation="elu", name="layer3"),
    layers.Dense(units=100, activation="elu", name="layer4"),
    layers.Dense(units=100, activation="elu", name="layer5"),
    layers.Dense(units=1, activation="linear", name="layer6"),

], name="Dense_Neural_Network")
model.summary()

Model: "Dense_Neural_Network"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 layer1 (Dense)              (None, 200)               5200      
                                                                 
 layer2 (Dense)              (None, 200)               40200     
                                                                 
 layer3 (Dense)              (None, 200)               40200     
                                                                 
 layer4 (Dense)              (None, 100)               20100     
                                                                 
 layer5 (Dense)              (None, 100)               10100     
                                                                 
 layer6 (Dense)              (None, 1)                 101       
                                                                 
Total params: 115901 (452.74 KB)
Trainable par

In [59]:
x, y = make_regression(n_samples=N, n_features=d, noise=1, random_state=0)

In [60]:
# train test split (just as an example)
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=42)

Compile Model

In [61]:
model.compile(
    optimizer=keras.optimizers.Adam(),  # Optimizer
    # Loss function to minimize
    loss=keras.losses.MeanSquaredError(),
    # List of metrics to monitor
    metrics=[keras.metrics.MeanSquaredError()],
)

Training

In [None]:
# Add early stopping
callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=0, start_from_epoch=0)

In [69]:
print("Training Model")
training = model.fit(
    X_train,
    y_train,
    batch_size=100,
    epochs=100,
    validation_data=(X_test, y_test),
    callbacks=[callback] # include early stopping
)

Training Model
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100


In [70]:
training.history # to see training history

{'loss': [3.7852022647857666,
  7.553530693054199,
  2.543985366821289,
  6.313776969909668],
 'mean_squared_error': [3.7852022647857666,
  7.553530693054199,
  2.543985366821289,
  6.313776969909668],
 'val_loss': [5.924784183502197,
  5.627142429351807,
  3.047239065170288,
  4.516695499420166],
 'val_mean_squared_error': [5.924784183502197,
  5.627142429351807,
  3.047239065170288,
  4.516695499420166]}

Evaluate

In [71]:
print("Evaluate on test data")
results = model.evaluate(X_test, y_test, batch_size=100)
print("test loss, test acc:", results)

Evaluate on test data
test loss, test acc: [4.516695499420166, 4.516695499420166]


Predictions

In [72]:
print("Generate predictions for test samples")
predictions = model.predict(X_test)
print("predictions shape:", predictions.shape)
predictions

Generate predictions for test samples
predictions shape: (2000, 1)


array([[  54.92495],
       [ 174.6089 ],
       [-218.62843],
       ...,
       [-206.69032],
       [-181.65187],
       [ 161.73146]], dtype=float32)