In [9]:
import numpy as np
import pandas as pd

path1 = 'Datasets/communities.data'
path2 = 'Datasets/communities.names'
headers = []
with open(path2, 'r') as file:
    for line in file:
        if line.startswith('@attribute'):
            name = line.split()[1]
            headers.append(name)
print(headers)
data = pd.read_csv(path1, sep=',', names=headers)
data.drop('state', axis=1, inplace=True)
data.drop('county', axis=1, inplace=True)
data.drop('community', axis=1, inplace=True)
data.drop('communityname', axis=1, inplace=True)
data.drop('fold', axis=1, inplace=True)
data.replace('?', np.NAN, inplace=True)
na_percentage = data.isna().mean()
high_na_columns = na_percentage[na_percentage > 0].index.tolist()

['state', 'county', 'community', 'communityname', 'fold', 'population', 'householdsize', 'racepctblack', 'racePctWhite', 'racePctAsian', 'racePctHisp', 'agePct12t21', 'agePct12t29', 'agePct16t24', 'agePct65up', 'numbUrban', 'pctUrban', 'medIncome', 'pctWWage', 'pctWFarmSelf', 'pctWInvInc', 'pctWSocSec', 'pctWPubAsst', 'pctWRetire', 'medFamInc', 'perCapInc', 'whitePerCap', 'blackPerCap', 'indianPerCap', 'AsianPerCap', 'OtherPerCap', 'HispPerCap', 'NumUnderPov', 'PctPopUnderPov', 'PctLess9thGrade', 'PctNotHSGrad', 'PctBSorMore', 'PctUnemployed', 'PctEmploy', 'PctEmplManu', 'PctEmplProfServ', 'PctOccupManu', 'PctOccupMgmtProf', 'MalePctDivorce', 'MalePctNevMarr', 'FemalePctDiv', 'TotalPctDiv', 'PersPerFam', 'PctFam2Par', 'PctKids2Par', 'PctYoungKids2Par', 'PctTeen2Par', 'PctWorkMomYoungKids', 'PctWorkMom', 'NumIlleg', 'PctIlleg', 'NumImmig', 'PctImmigRecent', 'PctImmigRec5', 'PctImmigRec8', 'PctImmigRec10', 'PctRecentImmig', 'PctRecImmig5', 'PctRecImmig8', 'PctRecImmig10', 'PctSpeakEnglOn

In [None]:
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer
from copy import copy
import time

data_mice = copy(data)
imputer = IterativeImputer(max_iter=300, random_state=42, tol=0.001, min_value=0, max_value=1)
time_start = time.time()
imputed_values = imputer.fit_transform(data_mice)
time_stop = time.time()
print(f"Exec time: {time_stop-time_start}")
imputed_values

In [None]:
data_mice = pd.DataFrame(imputed_values)
data_mice.columns = [col for col in headers if col not in ['state', 'county', 'community', 'communityname', 'fold', ]]
data_mice

In [None]:
mice_X = data_mice.iloc[:,:-1]
mice_y = data_mice.iloc[:,-1]
mice_X

In [None]:
# from sklearn.decomposition import PCA
# 
# pca = PCA(n_components=60)
# pca_mice_X = pca.fit_transform(mice_X)
# pca_mice_X

In [None]:
# from sklearn.feature_selection import RFE
# from sklearn.linear_model import LinearRegression
# 
# estimator = LinearRegression()
# selector = RFE(estimator, n_features_to_select=60, step=0.1)
# selector = selector.fit(mice_X, mice_y)
# selected_features_mask = selector.support_
# selected_feature_names = [column_name for (column_name, selected) in zip(mice_X.columns, selected_features_mask) if selected]
# rfe_mice_X = mice_X[selected_feature_names]
# rfe_mice_X = pd.DataFrame(rfe_mice_X, columns=selected_feature_names)
# 
# rfe_mice_X

In [None]:
from sklearn.linear_model import Lasso
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

X_train, X_val, y_train, y_val = train_test_split(mice_X, mice_y, test_size=0.25, random_state=42)

lasso = Lasso(alpha=0.0001)

lasso.fit(X_train, y_train)

y_pred = lasso.predict(X_val)

mse = mean_squared_error(y_val, y_pred)
print(f"Mean Squared Error: {mse}")

r2 = r2_score(y_val, y_pred)
print(f"R^2: {r2}")


In [None]:
from sklearn.linear_model import Ridge

X_train, X_val, y_train, y_val = train_test_split(mice_X, mice_y, test_size=0.25, random_state=42)

ridge = Ridge(alpha=0.0001, solver='cholesky')

ridge.fit(X_train, y_train)

y_pred = ridge.predict(X_val)

mse = mean_squared_error(y_val, y_pred)
print(f"Mean Squared Error: {mse}")

r2 = r2_score(y_val, y_pred)
print(f"R^2: {r2}")

In [None]:
import lightgbm as lgb
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score


X_train, X_test, y_train, y_test = train_test_split(mice_X, mice_y, test_size=0.25, random_state=42)

train_data = lgb.Dataset(X_train, label=y_train)

params = {
    'objective': 'regression',
    'metric': 'rmse',
    'boosting_type': 'dart',
    'num_leaves': 18,
    'learning_rate': 0.1,
    'feature_fraction': 0.9,
    'bagging_fraction': 0.8,
    'bagging_freq': 6,
    'verbose': 1
}

num_round = 10000
bst = lgb.train(params, train_data, num_round)

y_pred = bst.predict(X_test, num_iteration=bst.best_iteration)

mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Mean Squared Error: {mse}")
print(f"R-squared: {r2}")

In [None]:
from sklearn.model_selection import KFold, cross_val_score
from sklearn.svm import SVR
from hyperopt import fmin, tpe, hp, space_eval
import warnings

warnings.filterwarnings("ignore")

X = mice_X
y = mice_y

estimators_params = {
    SVR: {
        'C': hp.loguniform('C', -3, 3),
        'kernel': hp.choice('kernel', ['linear', 'rbf', 'poly', 'sigmoid']),
    }
}

kf = KFold(n_splits=4, shuffle=True, random_state=42)

best_estimators = {}
for estimator, space in estimators_params.items():
    print(f"Optimizing {estimator.__name__}")

    def objective(params):
        model = estimator(**params)
        scores = cross_val_score(model, X, y, cv=kf, scoring='neg_mean_squared_error')
        mse = -scores.mean()
        print(f"MSE: {mse}")
        r2_scores = cross_val_score(model, X, y, cv=kf, scoring='r2')
        r2 = r2_scores.mean()
        print(f"R2: {r2}")
        print("----------------------------------")
        return mse

    best = fmin(fn=objective,
                space=space,
                algo=tpe.suggest,
                max_evals=200,
                verbose=True)

    best_params = space_eval(space, best)
    print(f"Best parameters for {estimator.__name__}: {best_params}")

    model = estimator(**best_params)
    model.fit(X, y)
    best_estimators[estimator] = model

# NEURAL NETWORK WITH L-BFGS SOLVER


In [None]:
from sklearn.model_selection import train_test_split

X_train_mice, X_val_mice, y_train_mice, y_val_mice = train_test_split(mice_X.astype(np.float32), mice_y.astype(np.float32), test_size=0.25, random_state=42)

In [None]:
y_train = y_train_mice.to_numpy()
if y_train.ndim == 1:
    y_train = y_train.reshape(-1, 1)

y_val = y_val_mice.to_numpy()
if y_val.ndim == 1:
    y_val = y_val.reshape(-1, 1)

X_train = X_train_mice.to_numpy()
X_val = X_val_mice.to_numpy()

In [7]:
!pip install tensorflow_probability

Collecting tensorflow_probability
  Downloading tensorflow_probability-0.21.0-py2.py3-none-any.whl.metadata (13 kB)
Collecting dm-tree (from tensorflow_probability)
  Downloading dm_tree-0.1.8-cp38-cp38-win_amd64.whl (101 kB)
     ---------------------------------------- 0.0/101.4 kB ? eta -:--:--
     ----------- ------------------------- 30.7/101.4 kB 660.6 kB/s eta 0:00:01
     -------------------------------------- 101.4/101.4 kB 1.2 MB/s eta 0:00:00
Collecting typing-extensions<4.6.0 (from tensorflow_probability)
  Downloading typing_extensions-4.5.0-py3-none-any.whl (27 kB)
Downloading tensorflow_probability-0.21.0-py2.py3-none-any.whl (6.9 MB)
   ---------------------------------------- 0.0/6.9 MB ? eta -:--:--
   -- ------------------------------------- 0.5/6.9 MB 10.0 MB/s eta 0:00:01
   --------- ------------------------------ 1.6/6.9 MB 16.8 MB/s eta 0:00:01
   ------------------ --------------------- 3.2/6.9 MB 25.7 MB/s eta 0:00:01
   --------------------------------------

In [8]:
import numpy as np
import tensorflow as tf
import tensorflow_probability as tfp

def wrapper_func(model, loss, train_x, train_y, val_x, val_y):
    shapes = [var.shape for var in model.trainable_variables]
    n_tensors = len(shapes)

    count = 0
    idx = []  # stitch indices
    part = []  # partition indices

    for i, shape in enumerate(shapes):
        n = np.product(shape)
        idx.append(tf.reshape(tf.range(count, count + n, dtype=tf.int32), shape))
        part.extend([i] * n)
        count += n

    part = tf.constant(part)

    def assign_new_model_parameters(params_1d):
        params = tf.dynamic_partition(params_1d, part, n_tensors)
        for i, (shape, param) in enumerate(zip(shapes, params)):
            model.trainable_variables[i].assign(tf.reshape(param, shape))

    @tf.function
    def f(params_1d):
        with tf.GradientTape() as tape:
            assign_new_model_parameters(params_1d)
            loss_value = loss(model(train_x, training=True), train_y)

        grads = tape.gradient(loss_value, model.trainable_variables)
        grads = tf.dynamic_stitch(idx, grads)

        f.iter.assign_add(1)
        tf.print("Iter:", f.iter, "loss:", loss_value)

        tf.py_function(f.history.append, inp=[loss_value], Tout=[])

        model_output = model(val_x, training=False)
        val_loss = loss(model_output, val_y)

        return val_loss, grads

    f.iter = tf.Variable(0)
    f.history = []

    f.idx = idx
    f.assign_new_model_parameters = assign_new_model_parameters

    return f


  np.object,


AttributeError: module 'numpy' has no attribute 'object'.
`np.object` was a deprecated alias for the builtin `object`. To avoid this error in existing code, use `object` by itself. Doing this will not modify any behavior and is safe. 
The aliases was originally deprecated in NumPy 1.20; for more details and guidance see the original release note at:
    https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations

In [None]:
from tensorflow.keras.regularizers import l2

def create_model():
    alpha_value = 0.2126152046916911
    model = tf.keras.Sequential([
        tf.keras.Input(shape=[mice_X.shape[1]]),

        tf.keras.layers.Dense(60, activation="tanh", kernel_regularizer=l2(alpha_value)),

        tf.keras.layers.Dense(60, activation="tanh", kernel_regularizer=l2(alpha_value)),

        tf.keras.layers.Dense(30, activation="tanh", kernel_regularizer=l2(alpha_value)),

        tf.keras.layers.Dense(15, activation="tanh", kernel_regularizer=l2(alpha_value)),

        tf.keras.layers.Dense(1, activation=None)
    ])
    return model


In [None]:
def train_model(model, func, init_params, max_iterations=500, tolerance=9.011410429103892e-055, patience=10):
    best_val_loss = float('inf')
    best_params = None
    no_improve_count = 0

    for iteration in range(max_iterations):
        results = tfp.optimizer.lbfgs_minimize(
            value_and_gradients_function=func,
            initial_position=init_params,
            tolerance=tolerance
        )

        # Update the model parameters
        func.assign_new_model_parameters(results.position)

        # Unpack only two values here: val_loss and _
        val_loss, _ = func(results.position)

        # Early stopping check
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            best_params = results.position
            no_improve_count = 0
        else:
            no_improve_count += 1

        if no_improve_count >= patience:
            print(f"Stopping early at iteration {iteration}")
            break

        # Update init_params for the next iteration
        init_params = results.position

    if best_params is not None:
        func.assign_new_model_parameters(best_params)

    return best_val_loss
#nya


In [None]:
from sklearn.metrics import r2_score

pred_model = create_model()

loss_fun = tf.keras.losses.MeanSquaredError()
func = wrapper_func(pred_model, loss_fun, X_train, y_train, X_val, y_val)

init_params = tf.dynamic_stitch(func.idx, pred_model.trainable_variables)

best_val_loss = train_model(pred_model, func, init_params, max_iterations=800, tolerance=1e-5, patience=10)

pred_outs = pred_model.predict(X_val)

r2_val_score = r2_score(y_val, pred_outs)


print(f"Best validation loss: {best_val_loss}")
print(f"R^2: {r2_val_score}")

In [None]:
from joblib import load, dump
dump(load("best_tensor_combo_temp.joblib"), "best_tensor_combo_temp_014082.joblib")

# MLP REGRESSOR

In [None]:
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_absolute_error, r2_score, mean_squared_error
from joblib import dump, load


from hyperopt import fmin, tpe, hp, space_eval

import warnings

warnings.filterwarnings("ignore")

estimators_params = {}


estimators_params[MLPRegressor] ={
    #[58, 45, 36, 61]
    'layer_number': hp.choice('layer_number', range(4,5)),
    'nn1' : hp.choice("nn1", range(53,63,1)),
    'nn2' : hp.choice("nn2", range(40,50,1)),
    'nn3' : hp.choice("nn3", range(31,41,1)),
    'nn4' : hp.choice("nn4", range(56,67,1)),
    # 'nn5' : hp.choice("nn5", range(20,40,1)),
    'activation': hp.choice('activation', ['relu', 'tanh']),  # Utrzymanie najlepszego wyboru
    'solver': 'lbfgs',  # Utrzymanie najlepszego wyboru
    'alpha': hp.uniform('alpha', 0.17, 0.22),  # Zwężony zakres wokół 0.1959
    'max_iter': hp.choice('max_iter', range(1200, 1300, 10)),  # Zwężony zakres
    'tol': hp.uniform('tol', 1e-6, 1e-3),
    'shuffle': hp.choice('shuffle', [True, False]),  # Utrzymanie najlepszego wyboru
    'early_stopping': True,
    'validation_fraction': 0.25,  # Utrzymanie najlepszego wyboru,
}



try:
    best_combo
except NameError:
    best_combo = None

best_estimators = {}
for estimator, space in estimators_params.items():
    print(estimator)

    def objective(params: dict):

        global best_combo

        params["hidden_layer_sizes"] = [params.pop("nn"+str(i)) for i in range(1,params.pop("layer_number")+1)]
        for i in range(6):
            if "nn"+str(i) in params.keys():
                params.pop("nn"+str(i))
        # print(params)
        model = estimator(**params)
        model.fit(X_train_mice, y_train_mice)
        y_pred_fillna = model.predict(X_val_mice)
        mse = mean_squared_error(y_val_mice, y_pred_fillna)
        print(f"MSE: {mse}")
        mae = mean_absolute_error(y_val_mice, y_pred_fillna)
        print(f"MAE:  {mae}")
        r2 = r2_score(y_val_mice, y_pred_fillna)
        print(f"R2:  {r2}")
        if best_combo is None:
            best_combo = {
                "model": model,
                "mse" : mse,
                "mae" : mae,
                "r2" : r2,
                "y_pred": y_pred_fillna,
            }
        else:
            if best_combo["mse"] > mse:
                best_combo = {
                    "model": model,
                    "mse" : mse,
                    "mae" : mae,
                    "r2" : r2,
                    "y_pred": y_pred_fillna,
                }
                print("NEW_BEST_COMBO!")
                print(f"MSE: {mse}")
                print(f"MAE: {mae}")
                print(f"R2: {r2}")
                dump(best_combo, 'best_combo_temp2.joblib')
        return mse

    best = fmin(fn=objective,
                space=space,
                algo=tpe.suggest,
                max_evals=10000,
                verbose=True)

    print("BEST:")
    best_params = space_eval(space, best)
    best_estimator = estimator(**best_params)
    best_estimator.fit(X_train_mice, y_train_mice)
    y_pred_est = best_estimator.predict(X_val_mice)
    mae_est = mean_absolute_error(y_val_mice, y_pred_est)
    mse_est = mean_squared_error(y_val_mice, y_pred_est)
    r2_est = r2_score(y_val_mice, y_pred_est)
    print(f"MAE: {mae_est}")
    print(f"MSE: {mse_est}")
    print(f"R2: {r2_est}")
    print("BEST:",best_params)
    best_estimators[mse_est] = (best_estimator, best_params)

best_mse = min(best_estimators.keys())
best_regressor= (best_estimators[best_mse],  best_mse)
print(best_combo)
dump(best_combo, 'best_combo_new3.joblib')
print(best_regressor)
dump(best_regressor, 'best_regressor_new3.joblib')

In [None]:
print(best_combo["model"].get_params())

In [None]:
def measure(model, X, y):
    preds = model.predict(X)
    return {
        "MAE": mean_absolute_error(y, preds),
        "MSE": mean_squared_error(y, preds),
        "R2": r2_score(y, preds),
    }

In [None]:
from joblib import load
print(load("best_combo_temp_005400.joblib"))

In [None]:
from joblib import load
measure(load("best_combo_temp_005400.joblib")["model"], X_val_mice, y_val_mice)