In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_squared_error, r2_score


In [None]:
from google.colab import drive
drive.mount('/content/drive')
# Cargar tu conjunto de datos
data = pd.read_csv("/content/drive/My Drive/Tesis/Dataset/nuevo_dataset_prueba.csv")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# Crear el DataFrame Referencia
y = data['Humedad']
#y=data['CSS BRIX°']
X =data[['NIR_R', 'NIR_S', 'NIR_T', 'NIR_U', 'NIR_V', 'NIR_W', 'Vis_Violet', 'Vis_Blue', 'Vis_Green', 'Vis_Yellow', 'Vis_Orange', 'Vis_Red']]

In [None]:
# Dividir los datos en conjunto de entrenamiento y conjunto de prueba
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
pca = PCA(n_components=0.95)  # Mantener el 95% de la varianza explicada
scaler = StandardScaler()
lr_pca_model = LinearRegression()

# Construir un pipeline para aplicar PCA, escalar características y luego entrenar la regresión lineal
pca_pipeline = Pipeline([('scaler', scaler), ('pca', pca), ('lr', lr_pca_model)])
pca_pipeline.fit(X_train, y_train)

In [None]:
# Evaluar rendimiento del modelo de regresión lineal con PCA en conjunto de prueba
y_pred_pca = pca_pipeline.predict(X_test)
mse_pca = mean_squared_error(y_test, y_pred_pca)
r2_pca = r2_score(y_test, y_pred_pca)

print("Rendimiento del modelo de regresión lineal con PCA:")
print("MSE:", mse_pca)
print("R-cuadrado:", r2_pca)

print(X_test[0:1])
print(y_pred_pca[0:1])


Rendimiento del modelo de regresión lineal con PCA:
MSE: 14.322020430071595
R-cuadrado: 0.3772380999631956
       NIR_R    NIR_S    NIR_T    NIR_U    NIR_V   NIR_W  Vis_Violet  \
83  2240.319  754.018  199.185  109.068  146.565  87.921     423.837   

    Vis_Blue  Vis_Green  Vis_Yellow  Vis_Orange   Vis_Red  
83   353.905    484.946     860.277    1095.734  1209.172  
[50.14995755]


In [None]:
# Exportar los parámetros del pipeline
scaler_mean = scaler.mean_
scaler_scale = scaler.scale_
pca_components = pca.components_
pca_mean = pca.mean_
regression_coef = lr_pca_model.coef_
regression_intercept = lr_pca_model.intercept_

print("Scaler Mean:", scaler_mean)
print("Scaler Scale:", scaler_scale)
print("PCA Components:", pca_components)
print("PCA Mean:", pca_mean)
print("Regression Coefficients:", regression_coef)
print("Regression Intercept:", regression_intercept)


Scaler Mean: [2770.4822375  788.1494125  224.4562125  125.68415    184.9026375
  105.791875   525.0081     452.1614875 1296.8736125 1677.7566875
 1939.014425  1427.188925 ]
Scaler Scale: [1132.73807612  302.3129269    77.77614829   41.93482105   69.82344766
   38.78783943  200.30376211  220.80053736  967.36684327  960.46603049
  936.11594918  514.95361525]
PCA Components: [[ 0.31573839  0.25607869  0.27618328  0.28396705  0.3266374   0.31883357
   0.27981434  0.28515207  0.26897175  0.27227401  0.3041765   0.26619841]
 [ 0.05550654  0.43281096  0.34708358  0.31229819  0.06250145  0.17614869
   0.08662188 -0.33024992 -0.41567465 -0.4270582  -0.2912255  -0.01081437]
 [-0.2264626   0.14581474 -0.19661463 -0.20774012 -0.26748153 -0.13416462
   0.50719426  0.02343123 -0.21495872 -0.1110663   0.14785894  0.64644619]
 [-0.3786383  -0.31257154  0.41882848  0.4238955  -0.19925321 -0.28980986
   0.3927901   0.25843179  0.0456388   0.04243028 -0.09667048 -0.2140942 ]]
PCA Mean: [ 1.44328993e-16 -

In [None]:
# prompt: escribir los parametros separados por ,

scaler_params = ','.join(str(x) for x in scaler.mean_)

pca_components_params = ','.join(str(x) for x in pca.components_)
pca_mean_params = ','.join(str(x) for x in pca.mean_)
regression_coef_params = ','.join(str(x) for x in lr_pca_model.coef_)


print("Scaler Mean:", scaler_params)

print("PCA Components:", pca_components_params)
print("PCA Mean:", pca_mean_params)
print("Regression Coefficients:", regression_coef_params)



Scaler Mean: 2770.4822375,788.1494125,224.45621250000005,125.68415,184.9026375,105.79187499999998,525.0081,452.1614875,1296.8736125,1677.7566875,1939.0144249999998,1427.188925
PCA Components: [0.31573839 0.25607869 0.27618328 0.28396705 0.3266374  0.31883357
 0.27981434 0.28515207 0.26897175 0.27227401 0.3041765  0.26619841],[ 0.05550654  0.43281096  0.34708358  0.31229819  0.06250145  0.17614869
  0.08662188 -0.33024992 -0.41567465 -0.4270582  -0.2912255  -0.01081437],[-0.2264626   0.14581474 -0.19661463 -0.20774012 -0.26748153 -0.13416462
  0.50719426  0.02343123 -0.21495872 -0.1110663   0.14785894  0.64644619],[-0.3786383  -0.31257154  0.41882848  0.4238955  -0.19925321 -0.28980986
  0.3927901   0.25843179  0.0456388   0.04243028 -0.09667048 -0.2140942 ]
PCA Mean: 1.4432899320127036e-16,-1.7763568394002506e-16,-6.106226635438361e-16,-6.661338147750939e-17,6.38378239159465e-17,6.411537967210279e-16,-9.159339953157541e-17,-8.881784197001253e-17,-8.326672684688674e-18,2.775557561562891

In [None]:
# prompt: remplazar los espacios por , y los [ por { en pca_components_params

pca_components_params = pca_components_params.replace(" ", ",").replace("[", "{").replace("]", "}")
print("PCA Components:", pca_components_params)


PCA Components: {0.31573839,0.25607869,0.27618328,0.28396705,0.3266374,,0.31883357
,0.27981434,0.28515207,0.26897175,0.27227401,0.3041765,,0.26619841},{,0.05550654,,0.43281096,,0.34708358,,0.31229819,,0.06250145,,0.17614869
,,0.08662188,-0.33024992,-0.41567465,-0.4270582,,-0.2912255,,-0.01081437},{-0.2264626,,,0.14581474,-0.19661463,-0.20774012,-0.26748153,-0.13416462
,,0.50719426,,0.02343123,-0.21495872,-0.1110663,,,0.14785894,,0.64644619},{-0.3786383,,-0.31257154,,0.41882848,,0.4238955,,-0.19925321,-0.28980986
,,0.3927901,,,0.25843179,,0.0456388,,,0.04243028,-0.09667048,-0.2140942,}


In [None]:
# Crear el DataFrame Referencia
y=data['°BRIX']
X =data[['NIR_R', 'NIR_S', 'NIR_T', 'NIR_U', 'NIR_V', 'NIR_W', 'Vis_Violet', 'Vis_Blue', 'Vis_Green', 'Vis_Yellow', 'Vis_Orange', 'Vis_Red']]

# Dividir los datos en conjunto de entrenamiento y conjunto de prueba
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

pca = PCA(n_components=0.95)  # Mantener el 95% de la varianza explicada
scaler = StandardScaler()
lr_pca_model = LinearRegression()

# Construir un pipeline para aplicar PCA, escalar características y luego entrenar la regresión lineal
pca_pipeline = Pipeline([('scaler', scaler), ('pca', pca), ('lr', lr_pca_model)])
pca_pipeline.fit(X_train, y_train)

# Evaluar rendimiento del modelo de regresión lineal con PCA en conjunto de prueba
y_pred_pca = pca_pipeline.predict(X_test)
mse_pca = mean_squared_error(y_test, y_pred_pca)
r2_pca = r2_score(y_test, y_pred_pca)

print("Rendimiento del modelo de regresión lineal con PCA:")
print("MSE:", mse_pca)
print("R-cuadrado:", r2_pca)

print(X_test[0:1])
print(y_pred_pca[0:1])

# Exportar los parámetros del pipeline
scaler_mean = scaler.mean_
scaler_scale = scaler.scale_
pca_components = pca.components_
pca_mean = pca.mean_
regression_coef = lr_pca_model.coef_
regression_intercept = lr_pca_model.intercept_


print("Scaler Scale:", scaler_scale)

print("Regression Intercept:", regression_intercept)

# prompt: escribir los parametros separados por ,

scaler_params = ','.join(str(x) for x in scaler.mean_)

pca_components_params = ','.join(str(x) for x in pca.components_)
pca_mean_params = ','.join(str(x) for x in pca.mean_)
regression_coef_params = ','.join(str(x) for x in lr_pca_model.coef_)


print("Scaler Mean:", scaler_params)
print("PCA Mean:", pca_mean_params)
print("Regression Coefficients:", regression_coef_params)
# prompt: remplazar los espacios por , y los [ por { en pca_components_params

pca_components_params = pca_components_params.replace(" ", ",").replace("[", "{").replace("]", "}")
print("PCA Components:", pca_components_params)

Rendimiento del modelo de regresión lineal con PCA:
MSE: 0.30926118759480503
R-cuadrado: 0.6691823805279365
       NIR_R    NIR_S    NIR_T    NIR_U    NIR_V   NIR_W  Vis_Violet  \
83  2240.319  754.018  199.185  109.068  146.565  87.921     423.837   

    Vis_Blue  Vis_Green  Vis_Yellow  Vis_Orange   Vis_Red  
83   353.905    484.946     860.277    1095.734  1209.172  
[8.95916708]
Scaler Scale: [1132.73807612  302.3129269    77.77614829   41.93482105   69.82344766
   38.78783943  200.30376211  220.80053736  967.36684327  960.46603049
  936.11594918  514.95361525]
Regression Intercept: 8.492744121272287
Scaler Mean: 2770.4822375,788.1494125,224.45621250000005,125.68415,184.9026375,105.79187499999998,525.0081,452.1614875,1296.8736125,1677.7566875,1939.0144249999998,1427.188925
PCA Mean: 1.4432899320127036e-16,-1.7763568394002506e-16,-6.106226635438361e-16,-6.661338147750939e-17,6.38378239159465e-17,6.411537967210279e-16,-9.159339953157541e-17,-8.881784197001253e-17,-8.326672684688674e-

In [None]:
# prompt: Realizar lo anterior pero con variables para pH

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_squared_error, r2_score
from google.colab import drive
drive.mount('/content/drive')
# Cargar tu conjunto de datos
data = pd.read_csv("/content/drive/My Drive/Tesis/Dataset/nuevo_dataset_prueba.csv")

# Crear el DataFrame Referencia
y = data['pH']
X =data[['NIR_R', 'NIR_S', 'NIR_T', 'NIR_U', 'NIR_V', 'NIR_W', 'Vis_Violet', 'Vis_Blue', 'Vis_Green', 'Vis_Yellow', 'Vis_Orange', 'Vis_Red']]

# Dividir los datos en conjunto de entrenamiento y conjunto de prueba
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

pca = PCA(n_components=0.95)  # Mantener el 95% de la varianza explicada
scaler = StandardScaler()
lr_pca_model = LinearRegression()

# Construir un pipeline para aplicar PCA, escalar características y luego entrenar la regresión lineal
pca_pipeline = Pipeline([('scaler', scaler), ('pca', pca), ('lr', lr_pca_model)])
pca_pipeline.fit(X_train, y_train)

# Evaluar rendimiento del modelo de regresión lineal con PCA en conjunto de prueba
y_pred_pca = pca_pipeline.predict(X_test)
mse_pca = mean_squared_error(y_test, y_pred_pca)
r2_pca = r2_score(y_test, y_pred_pca)

print("Rendimiento del modelo de regresión lineal con PCA para pH:")
print("MSE:", mse_pca)
print("R-cuadrado:", r2_pca)

print(X_test[0:1])
print(y_pred_pca[0:1])

# Exportar los parámetros del pipeline
scaler_mean = scaler.mean_
scaler_scale = scaler.scale_
pca_components = pca.components_
pca_mean = pca.mean_
regression_coef = lr_pca_model.coef_
regression_intercept = lr_pca_model.intercept_

print("Scaler Mean:", scaler_mean)
print("Scaler Scale:", scaler_scale)
print("PCA Components:", pca_components)
print("PCA Mean:", pca_mean)
print("Regression Coefficients:", regression_coef)
print("Regression Intercept:", regression_intercept)


scaler_params = ','.join(str(x) for x in scaler.mean_)

pca_components_params = ','.join(str(x) for x in pca.components_)
pca_mean_params = ','.join(str(x) for x in pca.mean_)
regression_coef_params = ','.join(str(x) for x in lr_pca_model.coef_)


print("Scaler Mean:", scaler_params)

print("PCA Components:", pca_components_params)
print("PCA Mean:", pca_mean_params)
print("Regression Coefficients:", regression_coef_params)



pca_components_params = pca_components_params.replace(" ", ",").replace("[", "{").replace("]", "}")
print("PCA Components:", pca_components_params)


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Rendimiento del modelo de regresión lineal con PCA para pH:
MSE: 0.016431069494937382
R-cuadrado: 0.5468883146426708
       NIR_R    NIR_S    NIR_T    NIR_U    NIR_V   NIR_W  Vis_Violet  \
83  2240.319  754.018  199.185  109.068  146.565  87.921     423.837   

    Vis_Blue  Vis_Green  Vis_Yellow  Vis_Orange   Vis_Red  
83   353.905    484.946     860.277    1095.734  1209.172  
[6.55872494]
Scaler Mean: [2770.4822375  788.1494125  224.4562125  125.68415    184.9026375
  105.791875   525.0081     452.1614875 1296.8736125 1677.7566875
 1939.014425  1427.188925 ]
Scaler Scale: [1132.73807612  302.3129269    77.77614829   41.93482105   69.82344766
   38.78783943  200.30376211  220.80053736  967.36684327  960.46603049
  936.11594918  514.95361525]
PCA Components: [[ 0.31573839  0.25607869  0.27618328  0.28396705  0.3266374   0.31883357
   0.27981434  0.28515207  

In [None]:
# prompt: Realizar lo anterior pero con variables para IM

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_squared_error, r2_score
from google.colab import drive
drive.mount('/content/drive')
# Cargar tu conjunto de datos
data = pd.read_csv("/content/drive/My Drive/Tesis/Dataset/nuevo_dataset_prueba.csv")

# Crear el DataFrame Referencia
y = data['IM']
X =data[['NIR_R', 'NIR_S', 'NIR_T', 'NIR_U', 'NIR_V', 'NIR_W', 'Vis_Violet', 'Vis_Blue', 'Vis_Green', 'Vis_Yellow', 'Vis_Orange', 'Vis_Red']]

# Dividir los datos en conjunto de entrenamiento y conjunto de prueba
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

pca = PCA(n_components=0.95)  # Mantener el 95% de la varianza explicada
scaler = StandardScaler()
lr_pca_model = LinearRegression()

# Construir un pipeline para aplicar PCA, escalar características y luego entrenar la regresión lineal
pca_pipeline = Pipeline([('scaler', scaler), ('pca', pca), ('lr', lr_pca_model)])
pca_pipeline.fit(X_train, y_train)

# Evaluar rendimiento del modelo de regresión lineal con PCA en conjunto de prueba
y_pred_pca = pca_pipeline.predict(X_test)
mse_pca = mean_squared_error(y_test, y_pred_pca)
r2_pca = r2_score(y_test, y_pred_pca)

print("Rendimiento del modelo de regresión lineal con PCA para IM:")
print("MSE:", mse_pca)
print("R-cuadrado:", r2_pca)

print(X_test[0:1])
print(y_pred_pca[0:1])

# Exportar los parámetros del pipeline
scaler_mean = scaler.mean_
scaler_scale = scaler.scale_
pca_components = pca.components_
pca_mean = pca.mean_
regression_coef = lr_pca_model.coef_
regression_intercept = lr_pca_model.intercept_

print("Scaler Mean:", scaler_mean)
print("Scaler Scale:", scaler_scale)
print("PCA Components:", pca_components)
print("PCA Mean:", pca_mean)
print("Regression Coefficients:", regression_coef)
print("Regression Intercept:", regression_intercept)


scaler_params = ','.join(str(x) for x in scaler.mean_)

pca_components_params = ','.join(str(x) for x in pca.components_)
pca_mean_params = ','.join(str(x) for x in pca.mean_)
regression_coef_params = ','.join(str(x) for x in lr_pca_model.coef_)


print("Scaler Mean:", scaler_params)

print("PCA Components:", pca_components_params)
print("PCA Mean:", pca_mean_params)
print("Regression Coefficients:", regression_coef_params)



pca_components_params = pca_components_params.replace(" ", ",").replace("[", "{").replace("]", "}")
print("PCA Components:", pca_components_params)


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Rendimiento del modelo de regresión lineal con PCA para IM:
MSE: 23.533550182798955
R-cuadrado: 0.5697563478402753
       NIR_R    NIR_S    NIR_T    NIR_U    NIR_V   NIR_W  Vis_Violet  \
83  2240.319  754.018  199.185  109.068  146.565  87.921     423.837   

    Vis_Blue  Vis_Green  Vis_Yellow  Vis_Orange   Vis_Red  
83   353.905    484.946     860.277    1095.734  1209.172  
[50.90247473]
Scaler Mean: [2770.4822375  788.1494125  224.4562125  125.68415    184.9026375
  105.791875   525.0081     452.1614875 1296.8736125 1677.7566875
 1939.014425  1427.188925 ]
Scaler Scale: [1132.73807612  302.3129269    77.77614829   41.93482105   69.82344766
   38.78783943  200.30376211  220.80053736  967.36684327  960.46603049
  936.11594918  514.95361525]
PCA Components: [[ 0.31573839  0.25607869  0.27618328  0.28396705  0.3266374   0.31883357
   0.27981434  0.28515207  0