In [33]:
import os
import sys
import numpy as np
import pandas as pd
import seaborn as sns
import scipy.stats as stats
import matplotlib.pyplot as plt
import statsmodels.formula.api as sm

from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import FunctionTransformer, OneHotEncoder, MinMaxScaler, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.linear_model import LinearRegression, ElasticNetCV, ElasticNet, RidgeCV, LassoCV, HuberRegressor
from sklearn.metrics import r2_score, root_mean_squared_error, mean_absolute_error, mean_absolute_percentage_error
from sklearn.compose import TransformedTargetRegressor


%matplotlib inline
plt.style.use('ggplot')

In [34]:
str_to_ndarray = lambda x: np.fromstring(x, sep=' ')

path = os.path.join('..', '..', 'data', 'KG_combin.csv')
kg_data = pd.read_csv(path, converters={'eigvals': str_to_ndarray})

for q in range(14):#nondeg_minlen):
    kg_data['omega2_' + str(q)] = kg_data['eigvals'].apply(lambda arr: arr[6 + q]) / kg_data['rho']

kg_data = kg_data.drop(columns=['eigvals'])

In [35]:
# Train con los datos de KG_combin.csv
X_train = kg_data.drop(['K', 'G', 'shape'], axis=1)
y_train = kg_data['K']

sqrt_columns = ['rho', 'dx', 'dy', 'dz']
omega_columns = [f'omega2_{i}' for i in range(10)]
#categorical_columns = ['shape']

# Definición de transformaciones de features
feature_transformer = ColumnTransformer(transformers=[
    ('sqrt', FunctionTransformer(np.sqrt), sqrt_columns),
    ('log', FunctionTransformer(np.log1p), omega_columns), # para que no dé error con 0
    #('onehot', OneHotEncoder(drop='first'), categorical_columns)
], remainder='drop')


# Creación del pipeline
pipeline = Pipeline(steps=[
    ('feature_transformation', feature_transformer),
    ('scaling', MinMaxScaler()),
    ('regression', HuberRegressor(fit_intercept=True, max_iter=10000, alpha=0.0001, epsilon=1))
])

# TransformedTargetRegressor para transformar la variable objetivo (np.sqrt(K))
model = TransformedTargetRegressor(regressor=pipeline, func=np.sqrt, inverse_func=np.square)

model.fit(X_train, y_train)

Los valores de test son los experimentales

In [36]:
kg_exp = pd.read_csv('KG_Experimental_Iso.csv')

X_test = kg_exp.drop(['K', 'G'], axis=1)
y_test = kg_exp['K']
X_test

Unnamed: 0,rho,dx,dy,dz,omega2_0,omega2_1,omega2_2,omega2_3,omega2_4,omega2_5,omega2_6,omega2_7,omega2_8,omega2_9
0,8.052,0.7042,0.631,0.58393,0.19396,0.238385,0.262625,0.271474,0.283044,0.295312,0.309098,0.310461,0.316157,0.32896
1,5.403,0.33511,0.26023,0.15492,0.361943,0.465278,0.575936,0.658116,0.695545,0.704495,0.70775,0.769682,0.82662,0.857336
2,9.401,0.2348,0.1914,0.1484,0.464223,0.608734,0.670943,0.750451,0.801567,0.812501,0.833352,0.843579,0.85926,0.885357
3,9.401,0.23668,0.19364,0.14553,0.464223,0.608734,0.670943,0.750451,0.801567,0.812501,0.833352,0.843579,0.85926,0.885357
4,8.405,0.46319,0.2193,0.38864,0.182904,0.0,0.297708,0.314214,0.323824,0.335287,0.353531,0.367418,0.382783,0.390591
5,8.46,0.70809,0.49945,0.60214,0.187186,0.248724,0.266258,0.273473,0.286623,0.314191,0.315683,0.328848,0.332087,0.333876
6,8.535,0.7022,0.49905,0.6037,0.186389,0.249227,0.267534,0.272416,0.284133,0.313669,0.0,0.324409,0.329047,0.33054


In [37]:
# Predice los valores automáticamente con la transformación inversa
y_pred = model.predict(X_test)

# Métricas sobre los datos originales
r2 = r2_score(y_test, y_pred)
rmse = root_mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
mape = mean_absolute_percentage_error(y_test, y_pred)
print(f'R2: {r2:.3f}')
print(f'RMSE: {rmse:.3f}')
print(f'MAE: {mae:.3f}')
print(f'MAPE: {mape:.3f}')

R2: -16.168
RMSE: 1.082
MAE: 1.065
MAPE: 0.795


In [38]:
predictions = pd.Series(y_pred, name='K_pred')

# Insertar las predicciones en la segunda columna (índice 1) si no están ya
if 'K_pred' not in kg_exp.columns:
    kg_exp.insert(1, 'K_pred', predictions)
else:
    kg_exp['K_pred'] = predictions

kg_exp

Unnamed: 0,K,K_pred,G,rho,dx,dy,dz,omega2_0,omega2_1,omega2_2,omega2_3,omega2_4,omega2_5,omega2_6,omega2_7,omega2_8,omega2_9
0,1.663,2.558919,0.7362,8.052,0.7042,0.631,0.58393,0.19396,0.238385,0.262625,0.271474,0.283044,0.295312,0.309098,0.310461,0.316157,0.32896
1,1.315,2.130683,0.5178,5.403,0.33511,0.26023,0.15492,0.361943,0.465278,0.575936,0.658116,0.695545,0.704495,0.70775,0.769682,0.82662,0.857336
2,1.0,2.243029,0.6087,9.401,0.2348,0.1914,0.1484,0.464223,0.608734,0.670943,0.750451,0.801567,0.812501,0.833352,0.843579,0.85926,0.885357
3,1.146,2.243619,0.6151,9.401,0.23668,0.19364,0.14553,0.464223,0.608734,0.670943,0.750451,0.801567,0.812501,0.833352,0.843579,0.85926,0.885357
4,1.317,2.479378,0.3807,8.405,0.46319,0.2193,0.38864,0.182904,0.0,0.297708,0.314214,0.323824,0.335287,0.353531,0.367418,0.382783,0.390591
5,1.691,2.565938,0.7559,8.46,0.70809,0.49945,0.60214,0.187186,0.248724,0.266258,0.273473,0.286623,0.314191,0.315683,0.328848,0.332087,0.333876
6,1.691,3.053384,0.7522,8.535,0.7022,0.49905,0.6037,0.186389,0.249227,0.267534,0.272416,0.284133,0.313669,0.0,0.324409,0.329047,0.33054
