In [10]:
import os
import sys
import numpy as np
import pandas as pd
import seaborn as sns
import scipy.stats as stats
import matplotlib.pyplot as plt
import statsmodels.formula.api as sm

from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import FunctionTransformer, OneHotEncoder, MinMaxScaler, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.linear_model import LinearRegression, ElasticNetCV, ElasticNet, RidgeCV, LassoCV, HuberRegressor
from sklearn.metrics import r2_score, root_mean_squared_error, mean_absolute_error, mean_absolute_percentage_error
from sklearn.compose import TransformedTargetRegressor


%matplotlib inline
plt.style.use('ggplot')

In [11]:
str_to_ndarray = lambda x: np.fromstring(x, sep=' ')

path = os.path.join('..', '..', 'data', 'KG_combin.csv')
kg_data = pd.read_csv(path, converters={'eigvals': str_to_ndarray})

for q in range(14):#nondeg_minlen):
    kg_data['omega2_' + str(q)] = kg_data['eigvals'].apply(lambda arr: arr[6 + q]) / kg_data['rho']

kg_data = kg_data.drop(columns=['eigvals'])

In [12]:
# Train con los datos de KG_combin.csv
X_train = kg_data.drop(['K', 'G', 'shape'], axis=1)
y_train = kg_data['K']

sqrt_columns = ['rho', 'dx', 'dy', 'dz']
omega_columns = [f'omega2_{i}' for i in range(10)]
#categorical_columns = ['shape']

# Definición de transformaciones de features
feature_transformer = ColumnTransformer(transformers=[
    ('sqrt', FunctionTransformer(np.sqrt), sqrt_columns),
    ('log', FunctionTransformer(np.log1p), omega_columns), # para que no dé error con 0
    #('onehot', OneHotEncoder(drop='first'), categorical_columns)
], remainder='drop')


# Creación del pipeline
pipeline = Pipeline(steps=[
    ('feature_transformation', feature_transformer),
    ('scaling', MinMaxScaler()),
    ('regression', HuberRegressor(fit_intercept=True, max_iter=10000, alpha=0.0001, epsilon=1))
])

# TransformedTargetRegressor para transformar la variable objetivo (np.sqrt(K))
model = TransformedTargetRegressor(regressor=pipeline, func=np.sqrt, inverse_func=np.square)

model.fit(X_train, y_train)

Los valores de test son los experimentales

In [13]:
kg_exp = pd.read_csv('KG_Experimental_Iso.csv')

X_test = kg_exp.drop(['K', 'G'], axis=1)
y_test = kg_exp['K']
X_test

Unnamed: 0,rho,dx,dy,dz,omega2_0,omega2_1,omega2_2,omega2_3,omega2_4,omega2_5,omega2_6,omega2_7,omega2_8,omega2_9
0,8.052,0.7042,0.631,0.58393,1.485197,2.243456,2.722901,2.909486,3.16277,3.44288,3.77183,3.805168,3.946075,4.272144
1,5.403,0.33511,0.26023,0.15492,5.171781,8.546431,13.095081,17.098761,19.098981,19.59366,19.775137,23.387424,26.975627,29.017625
2,9.401,0.2348,0.1914,0.1484,8.507717,14.629007,17.771782,22.233325,25.365264,26.061988,27.416796,28.09385,29.148011,30.945435
3,9.401,0.23668,0.19364,0.14553,8.507717,14.629007,17.771782,22.233325,25.365264,26.061988,27.416796,28.09385,29.148011,30.945435
4,8.405,0.46319,0.2193,0.38864,1.320706,0.0,3.498974,3.897721,4.139785,4.43806,4.934177,5.329428,5.784489,6.02288
5,8.46,0.70809,0.49945,0.60214,1.383268,2.442278,2.798756,2.952491,3.24326,3.897151,3.934252,4.269236,4.35375,4.400785
6,8.535,0.7022,0.49905,0.6037,1.371514,2.452166,2.825646,2.929712,3.187154,3.884212,0.0,4.154756,4.274404,4.313281


In [14]:
# Predice los valores automáticamente con la transformación inversa
y_pred = model.predict(X_test)

# Métricas sobre los datos originales
r2 = r2_score(y_test, y_pred)
rmse = root_mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
mape = mean_absolute_percentage_error(y_test, y_pred)
print(f'R2: {r2:.3f}')
print(f'RMSE: {rmse:.3f}')
print(f'MAE: {mae:.3f}')
print(f'MAPE: {mape:.3f}')

R2: -88.769
RMSE: 2.474
MAE: 2.167
MAPE: 1.559


In [15]:
predictions = pd.Series(y_pred, name='K_pred')

# Insertar las predicciones en la segunda columna (índice 1) si no están ya
if 'K_pred' not in kg_exp.columns:
    kg_exp.insert(1, 'K_pred', predictions)
else:
    kg_exp['K_pred'] = predictions

kg_exp

Unnamed: 0,K,K_pred,G,rho,dx,dy,dz,omega2_0,omega2_1,omega2_2,omega2_3,omega2_4,omega2_5,omega2_6,omega2_7,omega2_8,omega2_9
0,1.663,2.901942,0.7362,8.052,0.7042,0.631,0.58393,1.485197,2.243456,2.722901,2.909486,3.16277,3.44288,3.77183,3.805168,3.946075,4.272144
1,1.315,2.96675,0.5178,5.403,0.33511,0.26023,0.15492,5.171781,8.546431,13.095081,17.098761,19.098981,19.59366,19.775137,23.387424,26.975627,29.017625
2,1.0,2.857261,0.6087,9.401,0.2348,0.1914,0.1484,8.507717,14.629007,17.771782,22.233325,25.365264,26.061988,27.416796,28.09385,29.148011,30.945435
3,1.146,2.857928,0.6151,9.401,0.23668,0.19364,0.14553,8.507717,14.629007,17.771782,22.233325,25.365264,26.061988,27.416796,28.09385,29.148011,30.945435
4,1.317,3.755111,0.3807,8.405,0.46319,0.2193,0.38864,1.320706,0.0,3.498974,3.897721,4.139785,4.43806,4.934177,5.329428,5.784489,6.02288
5,1.691,3.010643,0.7559,8.46,0.70809,0.49945,0.60214,1.383268,2.442278,2.798756,2.952491,3.24326,3.897151,3.934252,4.269236,4.35375,4.400785
6,1.691,6.64059,0.7522,8.535,0.7022,0.49905,0.6037,1.371514,2.452166,2.825646,2.929712,3.187154,3.884212,0.0,4.154756,4.274404,4.313281
