###### Here is explanation of how to use PnormRegressor model

In [5]:
# Import Needed packages 
import numpy as np
import sklearn
from Estimator import PnormRegressor
from sklearn.datasets import load_boston, fetch_openml
from sklearn.model_selection import GridSearchCV

In [11]:
# Check your Version of Sklearn
print("This code was tested on sklearn version of '0.21.2' & your sk.version is '{}'".format(sklearn.__version__))

This code was tested on sklearn version of '0.21.2' & your sk.version is '0.21.2'


In [12]:
# function to generate random data
def generate_dataset_simple(n, m, std):
  # Generate x as an array of `n` samples which can take a value between 0 and 100
  x = np.random.rand(n, m) * 100
  # Generate the random error of n samples, with a random value from a normal distribution, with a standard
  # deviation provided in the function argument
  y_intercept = np.random.randn(n) * std
  beta = np.random.rand(m)
  # Calculate `y` according to the equation discussed
  y =  np.dot(beta, x.T) + y_intercept
  return x, y

# Function to evaluate the model (R2_score)
def r2_score(y_true, y_pred):
    mean_y = np.mean(y_true)
    ss_tot = sum((y_true - mean_y) ** 2)
    ss_res = sum((y_true - y_pred) ** 2)
    r2 = 1 - (ss_res / ss_tot)
    return r2

##### Test The model on Random Generated data

In [13]:
# generate 500 sampes of X, y randomly 
X, y = generate_dataset_simple(500, 4, 0.25)
# Normalizing the data
X = (X - X.mean(axis=0))/X.std(axis=0)

#num_iterations, learning_rate , p_norm = 100, 1e-5, 2

# split data into train and validation
train_split_perc = 0.8
trainset = round(train_split_perc * len(X))
X_train, X_test, y_train, y_test = X[:trainset], X[trainset:], y[:trainset], y[trainset:]

model = PnormRegressor()

model.fit(X_train, y_train)

y_pred_train = model.predict(X_train)
y_pred_test = model.predict(X_test)

print("R2_score for randomly generated train data: {}".format(r2_score(y_train, y_pred_train)))
print("R2_score for randomly generated test data: {}".format(r2_score(y_test, y_pred_test)))

R2_score for randomly generated train data: 0.9999398207489156
R2_score for randomly generated test data: 0.9999491730587513


In [14]:
#####################################################################################
print('---Using sklearn-openML interface to evaluate the model on BOSTON dataset.---')

X,y = fetch_openml(name='boston', return_X_y=True)

# Normalizing the data
X = (X - X.mean(axis=0))/X.std(axis=0)

X_train, X_test, y_train, y_test = X[:trainset], X[trainset:], y[:trainset], y[trainset:]

model = PnormRegressor()

model.fit(X_train, y_train)

y_pred_train = model.predict(X_train)
y_pred_test = model.predict(X_test)

print("R2_score for BOSTON train data: {}".format(r2_score(y_train, y_pred_train)))
print("R2_score for BOSTON test data: {}".format(r2_score(y_test, y_pred_test)))

---Using sklearn-openML interface to evaluate the model on BOSTON dataset.---
R2_score for BOSTON train data: 0.709629068699329
R2_score for BOSTON test data: 0.11613806600873622


In [15]:

#######################################################################################
print('----- MODEL EVALUATION on  Employee Selection (ESL) dataset fetched from openML DB')

# Fetch Employee Selection dataset from openML
X,y = fetch_openml(name='ESL', return_X_y=True)

# Normalizing the data
X = (X - X.mean(axis=0))/X.std(axis=0)

# split data into train and validation
X_train, X_test, y_train, y_test = X[:trainset], X[trainset:], y[:trainset], y[trainset:]

model = PnormRegressor()

model.fit(X_train, y_train)

y_pred_train = model.predict(X_train)
y_pred_test = model.predict(X_test)

print("R2_score for ESL train data: {}".format(r2_score(y_train, y_pred_train)))
print("R2_score for ESL test data: {}".format(r2_score(y_test, y_pred_test)))



----- MODEL EVALUATION on  Employee Selection (ESL) dataset fetched from openML DB
R2_score for ESL train data: 0.8709435948092916
R2_score for ESL test data: 0.8129008718137286


##### We can also use GridSearch to choose the best parameters. 

In [16]:

##################################################################################
print('----using sklearn packages like GridSearch to search for best parameters---')

tuned_params = {"num_iterations": [100,1000], "p_norm" : [1,2]}

# Fetch Employee Selection dataset from openML
X,y = fetch_openml(name='ESL', return_X_y=True)

# Normalizing the data
X = (X - X.mean(axis=0))/X.std(axis=0)

# split data into train and validation
X_train, X_test, y_train, y_test = X[:trainset], X[trainset:], y[:trainset], y[trainset:]

pipe = GridSearchCV(PnormRegressor(), tuned_params)
pipe.fit(X_train, y_train)

y_pred_train = pipe.predict(X_train)
y_pred_test = pipe.predict(X_test)

print("R2_score for ESL train data: {}".format(r2_score(y_train, y_pred_train)))
print("R2_score for ESL test data: {}".format(r2_score(y_test, y_pred_test)))


print("Best parameters chose by GridSearch: ", pipe.best_params_)


----using sklearn packages like GridSearch to search for best parameters---
R2_score for ESL train data: 0.8709435948092916
R2_score for ESL test data: 0.8129008718137286
Best parameters chose by GridSearch:  {'num_iterations': 1000, 'p_norm': 2}
