In [1]:
import pandas as pd
import numpy as np
import warnings

from sklearn.impute import SimpleImputer

from sklearn.pipeline import Pipeline
from sklearn.model_selection import KFold

from sklearn.metrics import mean_squared_error, mean_absolute_error

from sklearn.ensemble import ExtraTreesRegressor, RandomForestRegressor, GradientBoostingRegressor
from lightgbm import LGBMRegressor
from xgboost import XGBRegressor

warnings.filterwarnings(action='ignore')

In [None]:
df = pd.read_csv('../data/all/')
df.head()

In [None]:
data_set = df.drop('material_id', axis=1)
X = data_set.drop('taeget',axis=1)
y = data_set['target']

In [None]:
rfr_params = {
    'n_estimators': 800,        
    'max_depth': 20,            
    'min_samples_split': 4,    
    'min_samples_leaf': 2,     
}

etr_params = {'max_features': 1.0,
    'min_samples_leaf': 1, 
    'min_samples_split': 2,
    'n_estimators': 300
}

gbr_params = { 'learning_rate': 0.1,
    'loss': 'squared_error',
    'max_depth': 7,
    'min_samples_leaf': 1,
    'min_samples_split': 2,
    'min_weight_fraction_leaf': 0.0,
    'n_estimators':800
 }

lgbr_params = {'n_estimators':2000,
    'num_leaves': 35
}

xgbr_params = {
    'colsample_bytree': 1.0,
    'learning_rate': 0.1,
    'max_depth': 7,
    'n_estimators': 2000,
    'subsample': 1.0
}


In [None]:
pipelines = [
    ('RFR', Pipeline([
        ('imputer', SimpleImputer(missing_values=np.nan, strategy='mean')),
        ('rfr', RandomForestRegressor(**rfr_params))
    ])),
    
    ('ETR', Pipeline([
        ('imputer', SimpleImputer(missing_values=np.nan, strategy='mean')),
        ('etr', ExtraTreesRegressor(**etr_params))
    ])),
    
    ('GBR', Pipeline([
        ('imputer', SimpleImputer(missing_values=np.nan, strategy='mean')),
        ('gbr', GradientBoostingRegressor(**gbr_params))
    ])),
    
    ('LGBM', Pipeline([
        ('imputer', SimpleImputer(missing_values=np.nan, strategy='mean')),
        ('lgbm', LGBMRegressor(**lgbr_params))
    ])),
    
    ('XGBoost', Pipeline([
        ('imputer', SimpleImputer(missing_values=np.nan, strategy='mean')),
        ('xgboost', XGBRegressor(**xgbr_params))
    ]))
]


#### Shear Modulus(G)

In [None]:
for model_name, pipeline in pipelines:
    print(f"\n{'='*50}")
    print(f"训练模型: {model_name}")
    print(f"{'='*50}")
    
    kf = KFold(n_splits=10, shuffle=True, random_state=79)
    
    scores = []
    mae_scores = []
    rmse_scores = []
    
    for fold, (train_index, test_index) in enumerate(kf.split(X)):
        X_train, X_test = X.iloc[train_index], X.iloc[test_index]
        y_train, y_test = y.iloc[train_index], y.iloc[test_index]
        
        pipeline.fit(X_train, y_train)
        
        y_pred = pipeline.predict(X_test)
        
        score = pipeline.score(X_test, y_test)
        mae = mean_absolute_error(y_test, y_pred)
        rmse = float(np.sqrt(mean_squared_error(y_test, y_pred)))
        
        scores.append(score)
        mae_scores.append(mae)
        rmse_scores.append(rmse)
    
    print(f"{model_name} - R^2 scores:", scores)
    print(f"{model_name} - Mean R^2 score:", sum(scores) / len(scores))
    print(f"{model_name} - MAE scores:", mae_scores)
    print(f"{model_name} - Mean MAE:", sum(mae_scores) / len(mae_scores))
    print(f"{model_name} - RMSE scores:", rmse_scores)
    print(f"{model_name} - Mean RMSE:", sum(rmse_scores) / len(rmse_scores))

#### Bullk Modulus(K)

In [None]:
for model_name, pipeline in pipelines:
    print(f"\n{'='*50}")
    print(f"训练模型: {model_name}")
    print(f"{'='*50}")
    
    kf = KFold(n_splits=10, shuffle=True, random_state=35)
    
    scores = []
    mae_scores = []
    rmse_scores = []
    
    for fold, (train_index, test_index) in enumerate(kf.split(X)):
        X_train, X_test = X.iloc[train_index], X.iloc[test_index]
        y_train, y_test = y.iloc[train_index], y.iloc[test_index]
        
        pipeline.fit(X_train, y_train)
        
        y_pred = pipeline.predict(X_test)
        
        score = pipeline.score(X_test, y_test)
        mae = mean_absolute_error(y_test, y_pred)
        rmse = float(np.sqrt(mean_squared_error(y_test, y_pred)))
        
        scores.append(score)
        mae_scores.append(mae)
        rmse_scores.append(rmse)
    
    print(f"{model_name} - R^2 scores:", scores)
    print(f"{model_name} - Mean R^2 score:", sum(scores) / len(scores))
    print(f"{model_name} - MAE scores:", mae_scores)
    print(f"{model_name} - Mean MAE:", sum(mae_scores) / len(mae_scores))
    print(f"{model_name} - RMSE scores:", rmse_scores)
    print(f"{model_name} - Mean RMSE:", sum(rmse_scores) / len(rmse_scores))