In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split, RandomizedSearchCV, GridSearchCV
from sklearn.metrics import f1_score, accuracy_score
# from skopt import BayesSearchCV
from xgboost import XGBRegressor
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import RepeatedKFold
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import r2_score

import warnings
warnings.filterwarnings("ignore")

np.set_printoptions(suppress=True)
pd.set_option('display.max_columns', None)

In [2]:
data = pd.read_csv('dane_uzup.csv', sep = ';', decimal=',')

In [3]:
data.head()

Unnamed: 0,Nr źródła,C [%],Si [%],S [%],P [%],Mg [%],Mn [%],Ni [%],Cu [%],Mo [%],Cr [%],Al [%],Sn [%],B [%],V [%],Wydzielenia grafitu [mm-2],Udział wydzieleń grafitu [%],Średnica sferoidów [μm],Wielkość sferoidów,Nodularity [%],Udział perlitu [%],Udział ferrytu [%],Rm [MPa] (as cast),A5 [%] (as cast),Minimalna grubość ścianki [mm],Temperatura austenityzacji [˚C],Czas austenityzacji [min.],Temperatura przemiany izotermicznej [˚C],Czas przemiany izotermicznej [min.],Rm [MPa],"R0,2 [MPa]",A5 [%],Twardość Brinella [HB],Twardość Rockwella [HRC],Twardość Rockwella [HRA],Twardość Rockwella [HRB],Twardość Vickersa [HV],Udarność Charpy [J],Temperatura pomiaru udarności [˚C],Udział austenitu %,Martensite volume fraction Xα',Retained austenite volume fraction XγR
0,1,3.4,2.41,0.017,0.015,0.064,0.15,0.001,0.0,0.001,,,,,,,,,,,80.0,,,,25.4,927.0,120.0,400.0,120.0,585.0,480.0,,,,,,,,,28.7,,
1,1,3.4,2.41,0.017,0.015,0.064,0.15,0.001,0.0,0.001,,,,,,,,,,,80.0,,,,25.4,927.0,120.0,385.0,120.0,701.0,636.0,,,,,,,,,37.5,,
2,1,3.4,2.41,0.017,0.015,0.064,0.15,0.001,0.0,0.001,,,,,,,,,,,80.0,,,,25.4,927.0,120.0,371.0,120.0,1062.0,861.0,10.5,,,,,,,,32.0,,
3,1,3.4,2.41,0.017,0.015,0.064,0.15,0.001,0.0,0.001,,,,,,,,,,,80.0,,,,25.4,927.0,120.0,357.0,120.0,1105.0,946.0,9.6,,,,,,,,28.5,,
4,1,3.4,2.41,0.017,0.015,0.064,0.15,0.001,0.0,0.001,,,,,,,,,,,80.0,,,,25.4,927.0,120.0,343.0,120.0,1185.0,989.0,,,,,,,,,26.2,,


In [4]:
data.columns

Index(['Nr źródła', 'C [%]', 'Si [%]', 'S [%]', 'P [%]', 'Mg [%]', 'Mn [%]',
       'Ni [%]', 'Cu [%]', 'Mo [%]', 'Cr [%]', 'Al [%]', 'Sn [%]', 'B [%]',
       'V [%]', 'Wydzielenia grafitu [mm-2]', 'Udział wydzieleń grafitu [%]',
       'Średnica sferoidów [μm]', 'Wielkość sferoidów', 'Nodularity [%]',
       'Udział perlitu [%]', 'Udział ferrytu [%]', 'Rm [MPa] (as cast)',
       'A5 [%]     (as cast)', 'Minimalna grubość ścianki [mm]',
       'Temperatura austenityzacji [˚C]', 'Czas austenityzacji [min.]',
       'Temperatura przemiany izotermicznej [˚C]',
       'Czas przemiany izotermicznej [min.]', 'Rm [MPa]', 'R0,2 [MPa]',
       'A5 [%]', 'Twardość Brinella [HB]', 'Twardość Rockwella [HRC]',
       'Twardość Rockwella [HRA]', 'Twardość Rockwella [HRB]',
       'Twardość Vickersa [HV]', 'Udarność Charpy [J]',
       'Temperatura pomiaru udarności [˚C]', 'Udział austenitu %',
       'Martensite volume fraction Xα'',
       'Retained austenite volume fraction XγR '],
      dtype='

In [5]:
def zmien_nazwy_kolumn(df):
    nowe_nazwy = {
        'Nr źródła': 'Nr_zrodla',
        'C [%]': 'C',
        'Si [%]': 'Si',
        'S [%]': 'S',
        'P [%]': 'P',
        'Mg [%]': 'Mg',
        'Mn [%]': 'Mn',
        'Ni [%]': 'Ni',
        'Cu [%]': 'Cu',
        'Mo [%]': 'Mo',
        'Cr [%]': 'Cr',
        'Al [%]': 'Al',
        'Sn [%]': 'Sn',
        'B [%]': 'B',
        'V [%]': 'V',
        'Wydzielenia grafitu [mm-2]': 'Wydzielenia_grafitu',
        'Udział wydzieleń grafitu [%]': 'Udzial_wydzielen_grafitu',
        'Średnica sferoidów [μm]': 'Srednica_sferoidow',
        'Wielkość sferoidów': 'Wielkosc_sferoidow',
        'Nodularity [%]': 'Nodularity',
        'Udział perlitu [%]': 'Udzial_perlitu',
        'Udział ferrytu [%]': 'Udzial_ferrytu',
        'Rm [MPa] (as cast)': 'Rm_cast',
        'A5 [%]     (as cast)': 'A5_cast',
        'Minimalna grubość ścianki [mm]': 'Minimalna_grubosc_scianki',
        'Temperatura austenityzacji [˚C]': 'Temperatura_austenityzacji',
        'Czas austenityzacji [min.]': 'Czas_austenityzacji',
        'Temperatura przemiany izotermicznej [˚C]': 'Temperatura_przemiany_izotermicznej',
        'Czas przemiany izotermicznej [min.]': 'Czas_przemiany_izotermicznej',
        'Rm [MPa]': 'Rm',
        'R0,2 [MPa]': 'R0.2',
        'A5 [%]': 'A5',
        'Twardość Brinella [HB]': 'Twardosc_Brinella',
        'Twardość Rockwella [HRC]': 'Twardosc_Rockwella_HRC',
        'Twardość Rockwella [HRA]': 'Twardosc_Rockwella_HRA',
        'Twardość Rockwella [HRB]': 'Twardosc_Rockwella_HRB',
        'Twardość Vickersa [HV]': 'Twardosc_Vickersa',
        'Udarność Charpy [J]': 'Udarnosc_Charpy',
        'Temperatura pomiaru udarności [˚C]': 'Temperatura_pomiaru_udarnosci',
        'Udział austenitu %': 'Udzial_austenitu',
        'Martensite volume fraction Xα''': 'Martensite_volume_X_alpha',
        'Retained austenite volume fraction XγR ': 'Retained_austenite_volume_X_gamma_R'
    }

    return df.rename(columns=nowe_nazwy)

In [6]:
data = zmien_nazwy_kolumn(data)

In [7]:
data.isnull().sum()

Nr_zrodla                                 3
C                                        67
Si                                       67
S                                       131
P                                       146
Mg                                      230
Mn                                      106
Ni                                      247
Cu                                      283
Mo                                      446
Cr                                     1033
Al                                     1095
Sn                                     1098
B                                      1107
V                                      1103
Wydzielenia_grafitu                     976
Udzial_wydzielen_grafitu               1371
Srednica_sferoidow                     1328
Wielkosc_sferoidow                     1469
Nodularity                             1342
Udzial_perlitu                         1333
Udzial_ferrytu                         1312
Rm_cast                         

In [8]:
data.dtypes

Nr_zrodla                               object
C                                      float64
Si                                     float64
S                                      float64
P                                      float64
Mg                                     float64
Mn                                     float64
Ni                                     float64
Cu                                     float64
Mo                                     float64
Cr                                     float64
Al                                     float64
Sn                                     float64
B                                      float64
V                                      float64
Wydzielenia_grafitu                    float64
Udzial_wydzielen_grafitu               float64
Srednica_sferoidow                     float64
Wielkosc_sferoidow                      object
Nodularity                             float64
Udzial_perlitu                         float64
Udzial_ferryt

In [9]:
columns_to_check = ['Nr_zrodla', 'Wielkosc_sferoidow']

for column in columns_to_check:
    data[column] = pd.to_numeric(data[column], errors='coerce')

In [10]:
data.dtypes

Nr_zrodla                              float64
C                                      float64
Si                                     float64
S                                      float64
P                                      float64
Mg                                     float64
Mn                                     float64
Ni                                     float64
Cu                                     float64
Mo                                     float64
Cr                                     float64
Al                                     float64
Sn                                     float64
B                                      float64
V                                      float64
Wydzielenia_grafitu                    float64
Udzial_wydzielen_grafitu               float64
Srednica_sferoidow                     float64
Wielkosc_sferoidow                     float64
Nodularity                             float64
Udzial_perlitu                         float64
Udzial_ferryt

USUWANIE ZBĘDNYCH KOLUMN I WIERSZY

Usuwamy zupełnie puste wiersze

In [11]:
data.iloc[[138,143,996],:]

Unnamed: 0,Nr_zrodla,C,Si,S,P,Mg,Mn,Ni,Cu,Mo,Cr,Al,Sn,B,V,Wydzielenia_grafitu,Udzial_wydzielen_grafitu,Srednica_sferoidow,Wielkosc_sferoidow,Nodularity,Udzial_perlitu,Udzial_ferrytu,Rm_cast,A5_cast,Minimalna_grubosc_scianki,Temperatura_austenityzacji,Czas_austenityzacji,Temperatura_przemiany_izotermicznej,Czas_przemiany_izotermicznej,Rm,R0.2,A5,Twardosc_Brinella,Twardosc_Rockwella_HRC,Twardosc_Rockwella_HRA,Twardosc_Rockwella_HRB,Twardosc_Vickersa,Udarnosc_Charpy,Temperatura_pomiaru_udarnosci,Udzial_austenitu,Martensite volume fraction Xα',Retained_austenite_volume_X_gamma_R
138,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
143,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
996,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [12]:
data = data.drop([138,143,996])

In [13]:
data = data.reset_index(drop=True)

In [14]:
negative_columns = (data < 0).any()

print("Kolumny zawierające wartości ujemne:")
print(negative_columns)

Kolumny zawierające wartości ujemne:
Nr_zrodla                              False
C                                      False
Si                                     False
S                                      False
P                                      False
Mg                                     False
Mn                                     False
Ni                                     False
Cu                                     False
Mo                                     False
Cr                                     False
Al                                     False
Sn                                     False
B                                      False
V                                      False
Wydzielenia_grafitu                    False
Udzial_wydzielen_grafitu               False
Srednica_sferoidow                     False
Wielkosc_sferoidow                     False
Nodularity                             False
Udzial_perlitu                         False
Udzial_ferrytu    

Usuwamy wiersze, w których 'Temperatura_pomiaru_udarnosci' jest ujemna

In [15]:
negative_rows = data[data['Temperatura_pomiaru_udarnosci'] < 0].index

print("Wiersze zawierające wartości ujemne:")
print(negative_rows)

Wiersze zawierające wartości ujemne:
Int64Index([ 423,  424,  425,  426,  427,  428, 1005, 1006, 1007, 1008, 1009,
            1010, 1011, 1012, 1013, 1014, 1015, 1016, 1017, 1018, 1019, 1020,
            1021, 1022, 1023, 1024, 1025, 1026, 1042, 1043, 1044, 1045, 1046,
            1047, 1048, 1049, 1050, 1051, 1052, 1053, 1054, 1055, 1056, 1057,
            1058, 1059, 1060, 1073, 1074, 1075, 1076, 1077, 1078, 1079, 1084,
            1085, 1086, 1087, 1088, 1089, 1090, 1182, 1188, 1248, 1249, 1250,
            1251, 1252, 1253, 1254, 1255, 1256, 1267, 1268, 1269, 1270, 1271,
            1272, 1273, 1417, 1418, 1419],
           dtype='int64')


In [16]:
data = data.drop(negative_rows)

In [17]:
data = data.reset_index(drop=True)

In [18]:
for index, row in data.iterrows():
    if not pd.isna(row['Udzial_ferrytu']):
        data.at[index, 'Udzial_perlitu'] = 100 - row['Udzial_ferrytu']
    elif not pd.isna(row['Udzial_perlitu']):
        data.at[index, 'Udzial_ferrytu'] = 100 - row['Udzial_perlitu']

Usuwamy 'B', 'Nr_zrodla', 'Udzial_perlitu', 'Wielkosc_sferoidow', 'Twardosc_Rockwella_HRC', 'Twardosc_Rockwella_HRA', 'Twardosc_Rockwella_HRB', 'Twardosc_Vickersa'

In [19]:
data = data.drop(['B','Nr_zrodla','Udzial_perlitu','Wielkosc_sferoidow','Twardosc_Rockwella_HRC',
                  'Twardosc_Rockwella_HRA','Twardosc_Rockwella_HRB','Twardosc_Vickersa'], axis = 1)

In [20]:
data

Unnamed: 0,C,Si,S,P,Mg,Mn,Ni,Cu,Mo,Cr,Al,Sn,V,Wydzielenia_grafitu,Udzial_wydzielen_grafitu,Srednica_sferoidow,Nodularity,Udzial_ferrytu,Rm_cast,A5_cast,Minimalna_grubosc_scianki,Temperatura_austenityzacji,Czas_austenityzacji,Temperatura_przemiany_izotermicznej,Czas_przemiany_izotermicznej,Rm,R0.2,A5,Twardosc_Brinella,Udarnosc_Charpy,Temperatura_pomiaru_udarnosci,Udzial_austenitu,Martensite volume fraction Xα',Retained_austenite_volume_X_gamma_R
0,3.4,2.41,0.017,0.015,0.064,0.15,0.001,0.0,0.001,,,,,,,,,20.0,,,25.4,927.0,120.0,400.0,120.0,585.0,480.0,,,,,28.7,,
1,3.4,2.41,0.017,0.015,0.064,0.15,0.001,0.0,0.001,,,,,,,,,20.0,,,25.4,927.0,120.0,385.0,120.0,701.0,636.0,,,,,37.5,,
2,3.4,2.41,0.017,0.015,0.064,0.15,0.001,0.0,0.001,,,,,,,,,20.0,,,25.4,927.0,120.0,371.0,120.0,1062.0,861.0,10.5,,,,32.0,,
3,3.4,2.41,0.017,0.015,0.064,0.15,0.001,0.0,0.001,,,,,,,,,20.0,,,25.4,927.0,120.0,357.0,120.0,1105.0,946.0,9.6,,,,28.5,,
4,3.4,2.41,0.017,0.015,0.064,0.15,0.001,0.0,0.001,,,,,,,,,20.0,,,25.4,927.0,120.0,343.0,120.0,1185.0,989.0,,,,,26.2,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1418,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1419,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1420,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1421,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


UZUPEŁNIANIE BRAKÓW

In [21]:
X = data.iloc[:, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 26, 30, 32, 33]]
Y = data.iloc[:, [25, 27, 28, 29, 31]]

In [22]:
X.isnull().sum()

C                                        64
Si                                       64
S                                       128
P                                       143
Mg                                      227
Mn                                      103
Ni                                      244
Cu                                      280
Mo                                      433
Cr                                     1026
Al                                     1082
Sn                                     1085
V                                      1090
Wydzielenia_grafitu                     909
Udzial_wydzielen_grafitu               1303
Srednica_sferoidow                     1260
Nodularity                             1275
Udzial_ferrytu                         1203
Rm_cast                                1222
A5_cast                                1254
Minimalna_grubosc_scianki               920
Temperatura_austenityzacji               89
Czas_austenityzacji             

In [23]:
Y.isnull().sum()

Rm                    411
A5                    537
Twardosc_Brinella     562
Udarnosc_Charpy      1167
Udzial_austenitu     1138
dtype: int64

zmienne objaśniające < 500 uzupełniamy medianą

In [24]:
from sklearn.impute import SimpleImputer

In [25]:
X_5 = X[X.columns[X.isnull().sum() < 500]]
X_5

Unnamed: 0,C,Si,S,P,Mg,Mn,Ni,Cu,Mo,Temperatura_austenityzacji,Czas_austenityzacji,Temperatura_przemiany_izotermicznej,Czas_przemiany_izotermicznej
0,3.4,2.41,0.017,0.015,0.064,0.15,0.001,0.0,0.001,927.0,120.0,400.0,120.0
1,3.4,2.41,0.017,0.015,0.064,0.15,0.001,0.0,0.001,927.0,120.0,385.0,120.0
2,3.4,2.41,0.017,0.015,0.064,0.15,0.001,0.0,0.001,927.0,120.0,371.0,120.0
3,3.4,2.41,0.017,0.015,0.064,0.15,0.001,0.0,0.001,927.0,120.0,357.0,120.0
4,3.4,2.41,0.017,0.015,0.064,0.15,0.001,0.0,0.001,927.0,120.0,343.0,120.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1418,,,,,,,,,,,,,
1419,,,,,,,,,,,,,
1420,,,,,,,,,,,,,
1421,,,,,,,,,,,,,


In [26]:
imp_mean = SimpleImputer(missing_values = np.NaN, strategy = 'median')
X_5 = imp_mean.fit_transform(X_5)

In [27]:
X_5 = pd.DataFrame(X_5, columns = X.columns[X.isnull().sum() < 500])

In [28]:
X_5.isnull().sum()

C                                      0
Si                                     0
S                                      0
P                                      0
Mg                                     0
Mn                                     0
Ni                                     0
Cu                                     0
Mo                                     0
Temperatura_austenityzacji             0
Czas_austenityzacji                    0
Temperatura_przemiany_izotermicznej    0
Czas_przemiany_izotermicznej           0
dtype: int64

reszte robimy lightgbmRegressor

In [29]:
import lightgbm as lgb

In [30]:
X_m5 = X[X.columns[X.isnull().sum() > 500]]
X_m5

Unnamed: 0,Cr,Al,Sn,V,Wydzielenia_grafitu,Udzial_wydzielen_grafitu,Srednica_sferoidow,Nodularity,Udzial_ferrytu,Rm_cast,A5_cast,Minimalna_grubosc_scianki,R0.2,Temperatura_pomiaru_udarnosci,Martensite volume fraction Xα',Retained_austenite_volume_X_gamma_R
0,,,,,,,,,20.0,,,25.4,480.0,,,
1,,,,,,,,,20.0,,,25.4,636.0,,,
2,,,,,,,,,20.0,,,25.4,861.0,,,
3,,,,,,,,,20.0,,,25.4,946.0,,,
4,,,,,,,,,20.0,,,25.4,989.0,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1418,,,,,,,,,,,,,,,,
1419,,,,,,,,,,,,,,,,
1420,,,,,,,,,,,,,,,,
1421,,,,,,,,,,,,,,,,


In [31]:
import lightgbm as lgb
from sklearn.model_selection import GridSearchCV

import warnings
warnings.filterwarnings("ignore", category=UserWarning, module="lightgbm")

# Siatka parametrów do przeszukania
param_grid = {
    'num_leaves': [15, 31, 50],
    'learning_rate': [0.05, 0.1, 0.2],
    'max_depth': [5, 10],
    'min_child_samples': [10, 20, 30],
    'objective': ['regression']
}

for column_name in X_m5.columns:
    column_data = X_m5[column_name]

    # Sprawdzamy, czy są braki danych
    mask_missing_data = column_data.isnull()

    # Indeksy wierszy z brakującymi danymi
    rows_with_missing_data_indices = column_data.index[mask_missing_data].tolist()

    # Tworzymy zbiór testowy
    y_test = column_data.iloc[rows_with_missing_data_indices]

    # Tworzymy zbiór treningowy
    rows_without_missing_data_indices = column_data.index.difference(rows_with_missing_data_indices)
    y_train = column_data.iloc[rows_without_missing_data_indices]

    # Tworzymy zbiór treningowy i testowy dla zm. objaśniających
    X_train, X_test = X_m5.iloc[rows_without_missing_data_indices], X_m5.iloc[rows_with_missing_data_indices]

    # Przeszukiwanie siatki parametrów
    grid_search = GridSearchCV(lgb.LGBMRegressor(), param_grid, scoring='neg_mean_squared_error', cv=5, n_jobs = -1)
    grid_search.fit(X_train, y_train)

    # Najlepsze parametry
    best_params = grid_search.best_params_

    # Ustawienie najlepszych parametrów
    best_model = lgb.LGBMRegressor(**best_params)

    # Trenowanie modelu
    best_model.fit(X_train, y_train)

    # Predykcja na zbiorze testowym
    y_pred = best_model.predict(X_test)
    y_pred = np.maximum(y_pred, 0)  # Zapewnienie, że wyniki są nieujemne

    # Aktualizacja oryginalnej ramki danych
    X_m5[column_name].loc[rows_with_missing_data_indices] = y_pred

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000130 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 241
[LightGBM] [Info] Number of data points in the train set: 397, number of used features: 16
[LightGBM] [Info] Start training from score 0.033940
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000105 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 241
[LightGBM] [Info] Number of data points in the train set: 397, number of used features: 16
[LightGBM] [Info] Start training from score 0.033940
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000123 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you

In [32]:
X_m5.isnull().sum()

Cr                                     0
Al                                     0
Sn                                     0
V                                      0
Wydzielenia_grafitu                    0
Udzial_wydzielen_grafitu               0
Srednica_sferoidow                     0
Nodularity                             0
Udzial_ferrytu                         0
Rm_cast                                0
A5_cast                                0
Minimalna_grubosc_scianki              0
R0.2                                   0
Temperatura_pomiaru_udarnosci          0
Martensite volume fraction Xα'         0
Retained_austenite_volume_X_gamma_R    0
dtype: int64

In [33]:
X_m5

Unnamed: 0,Cr,Al,Sn,V,Wydzielenia_grafitu,Udzial_wydzielen_grafitu,Srednica_sferoidow,Nodularity,Udzial_ferrytu,Rm_cast,A5_cast,Minimalna_grubosc_scianki,R0.2,Temperatura_pomiaru_udarnosci,Martensite volume fraction Xα',Retained_austenite_volume_X_gamma_R
0,0.0,0.288938,0.0,0.000137,59.285317,8.300063,12.348142,68.371865,20.000000,384.882655,3.629774,25.400000,480.000000,0.278687,0.000000,1.911909
1,0.0,0.288938,0.0,0.000136,59.186504,8.300063,12.348142,68.387679,20.000000,384.882655,3.609935,25.400000,636.000000,0.278687,0.000000,1.911909
2,0.0,0.288938,0.0,0.000139,59.040185,8.297070,12.341948,68.474598,20.000000,385.731117,3.471442,25.400000,861.000000,0.229480,0.000000,1.911909
3,0.0,0.277178,0.0,0.000139,59.689547,8.297070,12.542053,68.668350,20.000000,385.731117,3.503998,25.400000,946.000000,0.712414,0.000000,1.911909
4,0.0,0.277178,0.0,0.000139,59.689547,8.297070,12.542053,68.668350,20.000000,385.731117,3.503998,25.400000,989.000000,0.712414,0.000000,1.911909
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1418,0.0,0.249031,0.0,0.003661,51.047097,8.298992,17.575053,68.140358,1.057941,378.824055,4.107351,5.161861,209.655126,2.731945,0.332811,1.423794
1419,0.0,0.249031,0.0,0.003661,51.047097,8.298992,17.575053,68.140358,1.057941,378.824055,4.107351,5.161861,209.655126,2.731945,0.332811,1.423794
1420,0.0,0.249031,0.0,0.003661,51.047097,8.298992,17.575053,68.140358,1.057941,378.824055,4.107351,5.161861,209.655126,2.731945,0.332811,1.423794
1421,0.0,0.249031,0.0,0.003661,51.047097,8.298992,17.575053,68.140358,1.057941,378.824055,4.107351,5.161861,209.655126,2.731945,0.332811,1.423794


In [34]:
X = pd.concat([X_5, X_m5], axis=1)

In [35]:
X

Unnamed: 0,C,Si,S,P,Mg,Mn,Ni,Cu,Mo,Temperatura_austenityzacji,Czas_austenityzacji,Temperatura_przemiany_izotermicznej,Czas_przemiany_izotermicznej,Cr,Al,Sn,V,Wydzielenia_grafitu,Udzial_wydzielen_grafitu,Srednica_sferoidow,Nodularity,Udzial_ferrytu,Rm_cast,A5_cast,Minimalna_grubosc_scianki,R0.2,Temperatura_pomiaru_udarnosci,Martensite volume fraction Xα',Retained_austenite_volume_X_gamma_R
0,3.40,2.41,0.017,0.015,0.064,0.15,0.001,0.00,0.001,927.0,120.0,400.0,120.0,0.0,0.288938,0.0,0.000137,59.285317,8.300063,12.348142,68.371865,20.000000,384.882655,3.629774,25.400000,480.000000,0.278687,0.000000,1.911909
1,3.40,2.41,0.017,0.015,0.064,0.15,0.001,0.00,0.001,927.0,120.0,385.0,120.0,0.0,0.288938,0.0,0.000136,59.186504,8.300063,12.348142,68.387679,20.000000,384.882655,3.609935,25.400000,636.000000,0.278687,0.000000,1.911909
2,3.40,2.41,0.017,0.015,0.064,0.15,0.001,0.00,0.001,927.0,120.0,371.0,120.0,0.0,0.288938,0.0,0.000139,59.040185,8.297070,12.341948,68.474598,20.000000,385.731117,3.471442,25.400000,861.000000,0.229480,0.000000,1.911909
3,3.40,2.41,0.017,0.015,0.064,0.15,0.001,0.00,0.001,927.0,120.0,357.0,120.0,0.0,0.277178,0.0,0.000139,59.689547,8.297070,12.542053,68.668350,20.000000,385.731117,3.503998,25.400000,946.000000,0.712414,0.000000,1.911909
4,3.40,2.41,0.017,0.015,0.064,0.15,0.001,0.00,0.001,927.0,120.0,343.0,120.0,0.0,0.277178,0.0,0.000139,59.689547,8.297070,12.542053,68.668350,20.000000,385.731117,3.503998,25.400000,989.000000,0.712414,0.000000,1.911909
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1418,3.58,2.55,0.010,0.026,0.045,0.25,0.560,0.54,0.155,900.0,120.0,350.0,120.0,0.0,0.249031,0.0,0.003661,51.047097,8.298992,17.575053,68.140358,1.057941,378.824055,4.107351,5.161861,209.655126,2.731945,0.332811,1.423794
1419,3.58,2.55,0.010,0.026,0.045,0.25,0.560,0.54,0.155,900.0,120.0,350.0,120.0,0.0,0.249031,0.0,0.003661,51.047097,8.298992,17.575053,68.140358,1.057941,378.824055,4.107351,5.161861,209.655126,2.731945,0.332811,1.423794
1420,3.58,2.55,0.010,0.026,0.045,0.25,0.560,0.54,0.155,900.0,120.0,350.0,120.0,0.0,0.249031,0.0,0.003661,51.047097,8.298992,17.575053,68.140358,1.057941,378.824055,4.107351,5.161861,209.655126,2.731945,0.332811,1.423794
1421,3.58,2.55,0.010,0.026,0.045,0.25,0.560,0.54,0.155,900.0,120.0,350.0,120.0,0.0,0.249031,0.0,0.003661,51.047097,8.298992,17.575053,68.140358,1.057941,378.824055,4.107351,5.161861,209.655126,2.731945,0.332811,1.423794


In [36]:
Y

Unnamed: 0,Rm,A5,Twardosc_Brinella,Udarnosc_Charpy,Udzial_austenitu
0,585.0,,,,28.7
1,701.0,,,,37.5
2,1062.0,10.5,,,32.0
3,1105.0,9.6,,,28.5
4,1185.0,,,,26.2
...,...,...,...,...,...
1418,,,,,
1419,,,,,
1420,,,,,
1421,,,,,


In [37]:
data = pd.concat([X, Y], axis=1)

**RM**

In [38]:
# Usuwamy zupełnie puste wiersze w Rm
mask_missing_data = data['Rm'].isnull()
rows_with_missing_data_indices = data['Rm'].index[mask_missing_data].tolist()
data_rm = data.drop(rows_with_missing_data_indices)
data_rm = data_rm.reset_index(drop=True)
data_rm.shape

(1012, 34)

In [39]:
data_rm.isnull().sum()

C                                        0
Si                                       0
S                                        0
P                                        0
Mg                                       0
Mn                                       0
Ni                                       0
Cu                                       0
Mo                                       0
Temperatura_austenityzacji               0
Czas_austenityzacji                      0
Temperatura_przemiany_izotermicznej      0
Czas_przemiany_izotermicznej             0
Cr                                       0
Al                                       0
Sn                                       0
V                                        0
Wydzielenia_grafitu                      0
Udzial_wydzielen_grafitu                 0
Srednica_sferoidow                       0
Nodularity                               0
Udzial_ferrytu                           0
Rm_cast                                  0
A5_cast    

In [40]:
data_rm

Unnamed: 0,C,Si,S,P,Mg,Mn,Ni,Cu,Mo,Temperatura_austenityzacji,Czas_austenityzacji,Temperatura_przemiany_izotermicznej,Czas_przemiany_izotermicznej,Cr,Al,Sn,V,Wydzielenia_grafitu,Udzial_wydzielen_grafitu,Srednica_sferoidow,Nodularity,Udzial_ferrytu,Rm_cast,A5_cast,Minimalna_grubosc_scianki,R0.2,Temperatura_pomiaru_udarnosci,Martensite volume fraction Xα',Retained_austenite_volume_X_gamma_R,Rm,A5,Twardosc_Brinella,Udarnosc_Charpy,Udzial_austenitu
0,3.40,2.41,0.017,0.015,0.064,0.15,0.001,0.00,0.001,927.0,120.0,400.0,120.0,0.00,0.288938,0.0,0.000137,59.285317,8.300063,12.348142,68.371865,20.000000,384.882655,3.629774,25.40000,480.0,0.278687,0.0,1.911909,585.0,,,,28.7
1,3.40,2.41,0.017,0.015,0.064,0.15,0.001,0.00,0.001,927.0,120.0,385.0,120.0,0.00,0.288938,0.0,0.000136,59.186504,8.300063,12.348142,68.387679,20.000000,384.882655,3.609935,25.40000,636.0,0.278687,0.0,1.911909,701.0,,,,37.5
2,3.40,2.41,0.017,0.015,0.064,0.15,0.001,0.00,0.001,927.0,120.0,371.0,120.0,0.00,0.288938,0.0,0.000139,59.040185,8.297070,12.341948,68.474598,20.000000,385.731117,3.471442,25.40000,861.0,0.229480,0.0,1.911909,1062.0,10.5,,,32.0
3,3.40,2.41,0.017,0.015,0.064,0.15,0.001,0.00,0.001,927.0,120.0,357.0,120.0,0.00,0.277178,0.0,0.000139,59.689547,8.297070,12.542053,68.668350,20.000000,385.731117,3.503998,25.40000,946.0,0.712414,0.0,1.911909,1105.0,9.6,,,28.5
4,3.40,2.41,0.017,0.015,0.064,0.15,0.001,0.00,0.001,927.0,120.0,343.0,120.0,0.00,0.277178,0.0,0.000139,59.689547,8.297070,12.542053,68.668350,20.000000,385.731117,3.503998,25.40000,989.0,0.712414,0.0,1.911909,1185.0,,,,26.2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1007,3.58,2.08,0.042,0.024,0.000,0.20,0.100,0.43,0.000,900.0,120.0,300.0,120.0,0.03,0.000000,0.0,0.000000,30.904504,9.317876,15.319266,70.471537,8.847525,565.000000,8.000000,7.99161,256.0,21.000000,0.0,1.624403,847.0,9.7,298.0,3.67,
1008,3.58,2.08,0.042,0.024,0.000,0.20,0.100,0.43,0.000,900.0,120.0,300.0,150.0,0.03,0.000000,0.0,0.000000,30.904504,9.317876,15.319266,70.471537,8.847525,565.000000,8.000000,7.99161,269.0,21.000000,0.0,1.624403,839.0,9.3,314.0,3.67,
1009,3.58,2.08,0.042,0.024,0.000,0.20,0.100,0.43,0.000,900.0,120.0,350.0,75.0,0.03,0.000000,0.0,0.000000,30.904504,9.317876,15.319266,70.471537,8.847525,565.000000,8.000000,7.99161,279.0,21.000000,0.0,1.624403,690.0,10.0,,3.00,
1010,3.58,2.08,0.042,0.024,0.000,0.20,0.100,0.43,0.000,900.0,120.0,350.0,120.0,0.03,0.000000,0.0,0.000000,30.904504,9.317876,15.319266,70.471537,8.847525,565.000000,8.000000,7.99161,284.0,21.000000,0.0,1.624403,706.0,11.2,,2.67,


In [41]:
data_rm = pd.DataFrame(data_rm)

In [42]:
X_rm = data_rm.iloc[:, [0, 1, 2, 3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28]]
X_rm

Unnamed: 0,C,Si,S,P,Mg,Mn,Ni,Cu,Mo,Temperatura_austenityzacji,Czas_austenityzacji,Temperatura_przemiany_izotermicznej,Czas_przemiany_izotermicznej,Cr,Al,Sn,V,Wydzielenia_grafitu,Udzial_wydzielen_grafitu,Srednica_sferoidow,Nodularity,Udzial_ferrytu,Rm_cast,A5_cast,Minimalna_grubosc_scianki,R0.2,Temperatura_pomiaru_udarnosci,Martensite volume fraction Xα',Retained_austenite_volume_X_gamma_R
0,3.40,2.41,0.017,0.015,0.064,0.15,0.001,0.00,0.001,927.0,120.0,400.0,120.0,0.00,0.288938,0.0,0.000137,59.285317,8.300063,12.348142,68.371865,20.000000,384.882655,3.629774,25.40000,480.0,0.278687,0.0,1.911909
1,3.40,2.41,0.017,0.015,0.064,0.15,0.001,0.00,0.001,927.0,120.0,385.0,120.0,0.00,0.288938,0.0,0.000136,59.186504,8.300063,12.348142,68.387679,20.000000,384.882655,3.609935,25.40000,636.0,0.278687,0.0,1.911909
2,3.40,2.41,0.017,0.015,0.064,0.15,0.001,0.00,0.001,927.0,120.0,371.0,120.0,0.00,0.288938,0.0,0.000139,59.040185,8.297070,12.341948,68.474598,20.000000,385.731117,3.471442,25.40000,861.0,0.229480,0.0,1.911909
3,3.40,2.41,0.017,0.015,0.064,0.15,0.001,0.00,0.001,927.0,120.0,357.0,120.0,0.00,0.277178,0.0,0.000139,59.689547,8.297070,12.542053,68.668350,20.000000,385.731117,3.503998,25.40000,946.0,0.712414,0.0,1.911909
4,3.40,2.41,0.017,0.015,0.064,0.15,0.001,0.00,0.001,927.0,120.0,343.0,120.0,0.00,0.277178,0.0,0.000139,59.689547,8.297070,12.542053,68.668350,20.000000,385.731117,3.503998,25.40000,989.0,0.712414,0.0,1.911909
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1007,3.58,2.08,0.042,0.024,0.000,0.20,0.100,0.43,0.000,900.0,120.0,300.0,120.0,0.03,0.000000,0.0,0.000000,30.904504,9.317876,15.319266,70.471537,8.847525,565.000000,8.000000,7.99161,256.0,21.000000,0.0,1.624403
1008,3.58,2.08,0.042,0.024,0.000,0.20,0.100,0.43,0.000,900.0,120.0,300.0,150.0,0.03,0.000000,0.0,0.000000,30.904504,9.317876,15.319266,70.471537,8.847525,565.000000,8.000000,7.99161,269.0,21.000000,0.0,1.624403
1009,3.58,2.08,0.042,0.024,0.000,0.20,0.100,0.43,0.000,900.0,120.0,350.0,75.0,0.03,0.000000,0.0,0.000000,30.904504,9.317876,15.319266,70.471537,8.847525,565.000000,8.000000,7.99161,279.0,21.000000,0.0,1.624403
1010,3.58,2.08,0.042,0.024,0.000,0.20,0.100,0.43,0.000,900.0,120.0,350.0,120.0,0.03,0.000000,0.0,0.000000,30.904504,9.317876,15.319266,70.471537,8.847525,565.000000,8.000000,7.99161,284.0,21.000000,0.0,1.624403


In [43]:
Y_rm = data_rm.iloc[:, [29,30,31,32,33]]
Y_rm

Unnamed: 0,Rm,A5,Twardosc_Brinella,Udarnosc_Charpy,Udzial_austenitu
0,585.0,,,,28.7
1,701.0,,,,37.5
2,1062.0,10.5,,,32.0
3,1105.0,9.6,,,28.5
4,1185.0,,,,26.2
...,...,...,...,...,...
1007,847.0,9.7,298.0,3.67,
1008,839.0,9.3,314.0,3.67,
1009,690.0,10.0,,3.00,
1010,706.0,11.2,,2.67,


MODEL LIGHTGBM

In [44]:
X_train_rm, X_test_rm, y_train_rm, y_test_rm = train_test_split(X_rm, Y_rm['Rm'], test_size = 0.3)

In [45]:
params_grid_rm = {
    'num_leaves': [15, 31, 50],
    'learning_rate': [0.05, 0.1, 0.2],
    'max_depth': [5, 10],
    'min_child_samples': [10, 20, 30],
    'objective': ['regression']
}

grid_search_rm = GridSearchCV(lgb.LGBMRegressor(), params_grid_rm, scoring = 'neg_mean_squared_error', cv = 5, n_jobs = -1)

grid_search_rm.fit(X_train_rm, y_train_rm)

best_params_rm = grid_search_rm.best_params_

model_lgbm_rm = lgb.LGBMRegressor(**best_params_rm)

model_lgbm_rm.fit(X_train_rm, y_train_rm)

print("Najlepsze parametry:", best_params_rm)

y_pred_rm = model_lgbm_rm.predict(X_test_rm)

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000177 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2040
[LightGBM] [Info] Number of data points in the train set: 708, number of used features: 29
[LightGBM] [Info] Start training from score 1096.533926
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000190 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2040
[LightGBM] [Info] Number of data points in the train set: 708, number of used features: 29
[LightGBM] [Info] Start training from score 1096.533926
Najlepsze parametry: {'learning_rate': 0.2, 'max_depth': 5, 'min_child_samples': 20, 'num_leaves': 15, 'objective': 'regression'}


In [46]:
feature_importances = pd.DataFrame({'Feature': X_train_rm.columns, 'Importance': model_lgbm_rm.feature_importances_})
print(feature_importances.sort_values(by='Importance', ascending=False))

                                Feature  Importance
25                                 R0.2         137
11  Temperatura_przemiany_izotermicznej          92
1                                    Si          49
12         Czas_przemiany_izotermicznej          49
6                                    Ni          46
23                              A5_cast          45
5                                    Mn          43
24            Minimalna_grubosc_scianki          39
3                                     P          35
7                                    Cu          35
0                                     C          33
22                              Rm_cast          32
2                                     S          31
17                  Wydzielenia_grafitu          31
9            Temperatura_austenityzacji          27
8                                    Mo          26
18             Udzial_wydzielen_grafitu          26
21                       Udzial_ferrytu          25
19          

In [47]:
def mean_error(y_test, y_pred):
    return np.mean(y_pred - y_test)

def mean_absolute_percentage_error(y_test, y_pred):
    mask = y_test != 0
    return np.mean(np.abs((y_pred - y_test) / np.maximum(np.abs(y_test), 1e-8)) * mask) * 100

In [48]:
rmse_rm = np.sqrt(mean_squared_error(y_test_rm, y_pred_rm))
mse_rm = mean_squared_error(y_test_rm, y_pred_rm)
r2_rm = r2_score(y_test_rm, y_pred_rm)
mae_rm = mean_absolute_error(y_test_rm, y_pred_rm)
me_rm = mean_error(y_test_rm, y_pred_rm)
mape_rm = mean_absolute_percentage_error(y_test_rm, y_pred_rm)

print("R^2:", r2_rm)
print("RMSE:", rmse_rm)
print("MSE:", mse_rm)
print("MAE:", mae_rm)
print("ME:", me_rm)
print("MAPE:", mape_rm)

R^2: 0.8936314997026932
RMSE: 82.4359102309742
MSE: 6795.679295609237
MAE: 58.51459669086298
ME: -12.728104680557395
MAPE: 5.776203903290672


XGBOOST

In [49]:
import xgboost as xgb

In [50]:
xgb_params_grid_rm = {
    'n_estimator': [100, 150, 200],
    'max_depth': [5, 10, 15],
    'max_leaves': [10, 20],
    'learning_rate' : [0.01, 0.15, 0.2]
}

grid_search_rm = GridSearchCV(xgb.XGBRegressor(), xgb_params_grid_rm, scoring = 'neg_mean_squared_error', cv = 5, n_jobs = -1)

grid_search_rm.fit(X_train_rm, y_train_rm)

best_params_rm = grid_search_rm.best_params_

model_xgb_rm = xgb.XGBRegressor(**best_params_rm)

model_xgb_rm.fit(X_train_rm, y_train_rm)

print("Najlepsze parametry:", best_params_rm)

y_pred_rm = model_xgb_rm.predict(X_test_rm)

Najlepsze parametry: {'learning_rate': 0.2, 'max_depth': 5, 'max_leaves': 10, 'n_estimator': 100}


In [51]:
rmse_rm = np.sqrt(mean_squared_error(y_test_rm, y_pred_rm))
mse_rm = mean_squared_error(y_test_rm, y_pred_rm)
r2_rm = r2_score(y_test_rm, y_pred_rm)
mae_rm = mean_absolute_error(y_test_rm, y_pred_rm)
me_rm = mean_error(y_test_rm, y_pred_rm)
mape_rm = mean_absolute_percentage_error(y_test_rm, y_pred_rm)

print("R^2:", r2_rm)
print("RMSE:", rmse_rm)
print("MSE:", mse_rm)
print("MAE:", mae_rm)
print("ME:", me_rm)
print("MAPE:", mape_rm)

R^2: 0.9051771586579893
RMSE: 77.83347164538344
MSE: 6058.049308372707
MAE: 56.969995334022926
ME: -10.65503224423057
MAPE: 5.632285162658366


**Udział austenitu**

In [52]:
mask_missing_data = data['Udzial_austenitu'].isnull()
rows_with_missing_data_indices = data['Udzial_austenitu'].index[mask_missing_data].tolist()
data_ua = data.drop(rows_with_missing_data_indices)
data_ua = data_ua.reset_index(drop = True)
data_ua.shape

(285, 34)

In [53]:
data_ua = pd.DataFrame(data_ua)
X_ua= data_ua.iloc[:, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28]]
X_ua

Unnamed: 0,C,Si,S,P,Mg,Mn,Ni,Cu,Mo,Temperatura_austenityzacji,Czas_austenityzacji,Temperatura_przemiany_izotermicznej,Czas_przemiany_izotermicznej,Cr,Al,Sn,V,Wydzielenia_grafitu,Udzial_wydzielen_grafitu,Srednica_sferoidow,Nodularity,Udzial_ferrytu,Rm_cast,A5_cast,Minimalna_grubosc_scianki,R0.2,Temperatura_pomiaru_udarnosci,Martensite volume fraction Xα',Retained_austenite_volume_X_gamma_R
0,3.40,2.41,0.017,0.015,0.064,0.150,0.001,0.00,0.001,927.0,120.0,400.0,120.0,0.0,0.288938,0.0,0.000137,59.285317,8.300063,12.348142,68.371865,20.0,384.882655,3.629774,25.400000,480.0,0.278687,0.0,1.911909
1,3.40,2.41,0.017,0.015,0.064,0.150,0.001,0.00,0.001,927.0,120.0,385.0,120.0,0.0,0.288938,0.0,0.000136,59.186504,8.300063,12.348142,68.387679,20.0,384.882655,3.609935,25.400000,636.0,0.278687,0.0,1.911909
2,3.40,2.41,0.017,0.015,0.064,0.150,0.001,0.00,0.001,927.0,120.0,371.0,120.0,0.0,0.288938,0.0,0.000139,59.040185,8.297070,12.341948,68.474598,20.0,385.731117,3.471442,25.400000,861.0,0.229480,0.0,1.911909
3,3.40,2.41,0.017,0.015,0.064,0.150,0.001,0.00,0.001,927.0,120.0,357.0,120.0,0.0,0.277178,0.0,0.000139,59.689547,8.297070,12.542053,68.668350,20.0,385.731117,3.503998,25.400000,946.0,0.712414,0.0,1.911909
4,3.40,2.41,0.017,0.015,0.064,0.150,0.001,0.00,0.001,927.0,120.0,343.0,120.0,0.0,0.277178,0.0,0.000139,59.689547,8.297070,12.542053,68.668350,20.0,385.731117,3.503998,25.400000,989.0,0.712414,0.0,1.911909
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
280,3.79,3.09,0.014,0.046,0.028,0.450,0.000,0.54,0.000,800.0,90.0,375.0,60.0,0.0,0.000000,0.0,0.000000,135.000000,7.649740,14.439180,92.000000,82.0,404.000000,15.000000,8.204462,320.0,21.000000,0.0,2.242776
281,3.79,3.09,0.014,0.046,0.028,0.450,0.000,0.54,0.000,830.0,90.0,375.0,60.0,0.0,0.000000,0.0,0.000000,135.000000,7.649740,14.439180,92.000000,82.0,404.000000,15.000000,8.204462,370.0,21.000000,0.0,2.242776
282,3.41,2.58,0.040,0.026,0.084,0.047,1.020,0.54,0.240,780.0,90.0,375.0,60.0,0.0,0.000000,0.0,0.000000,160.000000,7.713978,14.516804,95.000000,50.0,370.000000,5.300000,7.962787,360.0,21.000000,0.0,2.242776
283,3.41,2.58,0.040,0.026,0.084,0.047,1.020,0.54,0.240,800.0,90.0,375.0,60.0,0.0,0.000000,0.0,0.000000,160.000000,7.713978,14.516804,95.000000,50.0,370.000000,5.300000,7.962787,600.0,21.000000,0.0,2.242776


In [54]:
Y_ua = data_ua.iloc[:, [29,30,31,32,33]]
Y_ua

Unnamed: 0,Rm,A5,Twardosc_Brinella,Udarnosc_Charpy,Udzial_austenitu
0,585.0,,,,28.7
1,701.0,,,,37.5
2,1062.0,10.5,,,32.0
3,1105.0,9.6,,,28.5
4,1185.0,,,,26.2
...,...,...,...,...,...
280,310.0,5.5,,140.0,8.0
281,500.0,7.0,,130.0,42.0
282,550.0,3.5,,95.0,69.0
283,895.0,7.0,,100.0,79.0


MODEL LIGHTGBM

In [55]:
X_train_ua, X_test_ua, y_train_ua, y_test_ua = train_test_split(X_ua, Y_ua['Udzial_austenitu'], test_size = 0.3)

In [56]:
params_grid_ua = {
    'num_leaves': [15, 31, 50],
    'learning_rate': [0.05, 0.1, 0.2],
    'max_depth': [5, 10],
    'min_child_samples': [10, 20, 30],
    'objective': ['regression']
}

grid_search_ua = GridSearchCV(lgb.LGBMRegressor(), params_grid_ua, scoring = 'neg_mean_squared_error', cv = 5, n_jobs = -1)

grid_search_ua.fit(X_train_ua, y_train_ua)

best_params_ua = grid_search_ua.best_params_

model_lgbm_ua = lgb.LGBMRegressor(**best_params_ua)

model_lgbm_ua.fit(X_train_ua, y_train_ua)

print("Najlepsze parametry:", best_params_ua)

y_pred_ua = model_lgbm_ua.predict(X_test_ua)

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000148 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 668
[LightGBM] [Info] Number of data points in the train set: 199, number of used features: 29
[LightGBM] [Info] Start training from score 24.955276
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000047 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 668
[LightGBM] [Info] Number of data points in the train set: 199, number of used features: 29
[LightGBM] [Info] Start training from score 24.955276
Najlepsze parametry: {'learning_rate': 0.05, 'max_depth': 5, 'min_child_samples': 10, 'num_leaves': 15, 'objective': 'regression'}


In [57]:
feature_importances = pd.DataFrame({'Feature': X_train_ua.columns, 'Importance': model_lgbm_ua.feature_importances_})
print(feature_importances.sort_values(by='Importance', ascending=False))

                                Feature  Importance
11  Temperatura_przemiany_izotermicznej         143
12         Czas_przemiany_izotermicznej         108
5                                    Mn          58
0                                     C          56
22                              Rm_cast          49
9            Temperatura_austenityzacji          46
24            Minimalna_grubosc_scianki          44
4                                    Mg          44
25                                 R0.2          37
2                                     S          35
23                              A5_cast          34
20                           Nodularity          28
6                                    Ni          27
3                                     P          25
1                                    Si          25
17                  Wydzielenia_grafitu          22
18             Udzial_wydzielen_grafitu          21
21                       Udzial_ferrytu          21
28  Retained

In [58]:
rmse_ua = np.sqrt(mean_squared_error(y_test_ua, y_pred_ua))
mse_ua = mean_squared_error(y_test_ua, y_pred_ua)
r2_ua = r2_score(y_test_ua, y_pred_ua)
mae_ua = mean_absolute_error(y_test_ua, y_pred_ua)
me_ua = mean_error(y_test_ua, y_pred_ua)
mape_ua = mean_absolute_percentage_error(y_test_ua, y_pred_ua)


print("R^2:", r2_ua)
print("RMSE:", rmse_ua)
print("MSE:", mse_ua)
print("MAE:", mae_ua)
print("ME:", me_ua)
print("MAPE:", mape_ua)

R^2: 0.585294885662089
RMSE: 9.471399316118463
MSE: 89.7074050053693
MAE: 6.677426015389013
ME: -0.9460963461917685
MAPE: 42.68027563696626


XGBOOST

In [62]:
xgb_params_grid_ua = {
    'n_estimator': [100, 150, 200],
    'max_depth': [5, 10, 15],
    'max_leaves': [10, 20],
    'learning_rate' : [0.01, 0.15, 0.2]
}

grid_search_ua = GridSearchCV(xgb.XGBRegressor(), xgb_params_grid_ua, scoring = 'neg_mean_squared_error', cv = 5, n_jobs = -1)

grid_search_ua.fit(X_train_ua, y_train_ua)

best_params_ua = grid_search_ua.best_params_

model_xgb_ua = xgb.XGBRegressor(**best_params_ua)

model_xgb_ua.fit(X_train_ua, y_train_ua)

print("Najlepsze parametry:", best_params_ua)

y_pred_ua = model_xgb_ua.predict(X_test_ua)

Najlepsze parametry: {'learning_rate': 0.2, 'max_depth': 10, 'max_leaves': 20, 'n_estimator': 100}


In [63]:
rmse_ua = np.sqrt(mean_squared_error(y_test_ua, y_pred_ua))
mse_ua = mean_squared_error(y_test_ua, y_pred_ua)
r2_ua = r2_score(y_test_ua, y_pred_ua)
mae_ua = mean_absolute_error(y_test_ua, y_pred_ua)
me_ua = mean_error(y_test_ua, y_pred_ua)
mape_ua = mean_absolute_percentage_error(y_test_ua, y_pred_ua)

print("R^2:", r2_ua)
print("RMSE:", rmse_ua)
print("MSE:", mse_ua)
print("MAE:", mae_ua)
print("ME:", me_ua)
print("MAPE:", mape_ua)

R^2: 0.7927500473539456
RMSE: 6.6956334643911
MSE: 44.83150748947396
MAE: 5.010941132556559
ME: -0.8045997456062671
MAPE: 26.835954710707348
