In [36]:
# Загрузка библиотек
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import re

from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.svm import SVR

In [37]:
# Ссылки к файлам
...

### Подготовка данных по элементам

In [38]:
elements = pd.read_csv(filepath_or_buffer=links['elements'], index_col='Symbol')
# Переименовываю признаки
elements.rename(columns={
    'Atomic Number':'Atomic_Number',
    'NUMBER OF Electrons at last orbitale': 'NUMBER_OF_Electrons_at_last_orbitale',
    'NUMBER OF Electrons at before last orbitale':'NUMBER_OF_Electrons_at_before_last_orbitale',
    'NUMBER OF electrones at last level':'NUMBER_OF_electrones_at_last_level',
    'NUMBER OF vacancies at  outer orbitale':'NUMBER_OF_vacancies_at_outer_orbitale',
    'Number of active electrons at inner level':'Number_of_active_electrons_at_inner_level',
    'Max valency':'Max_valency',
    'Atomic Mass':'Atomic_Mass',
    'Atomic radius (pm)':'Atomic_radius',
    'Covalent radius (pm)':'Covalent_radius',
    'Ionization potential (eV)':'Ionization_potential',
    'Electron affinity (KJ/mol)':'Electron_affinity'
    }, inplace=True)
elements.head()

Unnamed: 0_level_0,Atomic_Number,NUMBER_OF_Electrons_at_last_orbitale,NUMBER_OF_Electrons_at_before_last_orbitale,NUMBER_OF_electrones_at_last_level,NUMBER_OF_vacancies_at_outer_orbitale,Number_of_active_electrons_at_inner_level,Max_valency,Atomic_Mass,Electronegativity,Atomic_radius,Covalent_radius,Ionization_potential,Electron_affinity,Period,Group,Block
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
Li,3,1,0,1,1,0,1,7.0,0.98,145,134,5.392,596,2,IA,s
B,5,1,2,3,5,0,1,10.81,2.04,98,82,8.298,267,2,IIIA,p
C,6,2,2,4,4,0,2,12.011,2.55,77,77,11.26,1539,2,IVA,p
N,7,3,2,5,3,0,5,14.0067,3.04,92,75,14.48,7,2,VA,p
Na,11,1,0,2,1,0,1,22.989769,0.93,190,154,5.139,528,3,IA,s


In [39]:
# создаю словарь для обозначения групп и блоков
group_notation_dictionary = {i[1]:i[0] for i in enumerate(elements.Group.unique())}  # Группы
block_notation_dictionary = {i[1]:i[0] for i in enumerate(elements.Block.unique())}  # Блоки
print(f'Group\n{group_notation_dictionary}\nBlock{block_notation_dictionary}')

# заменяю обозначение групп на соответствующие числовые индикаторы
elements.Group = list(map(lambda x: group_notation_dictionary[x], elements.Group.values))
elements.Block = list(map(lambda x: block_notation_dictionary[x], elements.Block.values))

Group
{'IA': 0, 'IIIA': 1, 'IVA': 2, 'VA': 3, 'IIA': 4, 'VIA': 5, 'IIIB': 6, 'IVB': 7, 'VB': 8, 'VIB': 9, 'VIIB': 10, 'VIIIB': 11, 'IB': 12, 'IIB': 13, 'VIIA': 14, 'iVA': 15}
Block{'s': 0, 'p': 1, 'd': 2}


In [40]:
elements.info()

<class 'pandas.core.frame.DataFrame'>
Index: 63 entries, Li to Ag
Data columns (total 16 columns):
 #   Column                                       Non-Null Count  Dtype  
---  ------                                       --------------  -----  
 0   Atomic_Number                                63 non-null     int64  
 1   NUMBER_OF_Electrons_at_last_orbitale         63 non-null     int64  
 2   NUMBER_OF_Electrons_at_before_last_orbitale  63 non-null     int64  
 3   NUMBER_OF_electrones_at_last_level           63 non-null     int64  
 4   NUMBER_OF_vacancies_at_outer_orbitale        63 non-null     int64  
 5   Number_of_active_electrons_at_inner_level    63 non-null     int64  
 6   Max_valency                                  63 non-null     int64  
 7   Atomic_Mass                                  63 non-null     float64
 8   Electronegativity                            63 non-null     object 
 9   Atomic_radius                                63 non-null     object 
 10  Covalent

In [41]:
# Редактирую отображение дробных чисел
elements.Electronegativity = [float(re.sub(',', '.', i)) for i in elements.Electronegativity]
elements.Ionization_potential = [float(re.sub(',', '.', i)) for i in elements.Ionization_potential]
elements.Electron_affinity = [float(re.sub(',', '.', i)) for i in elements.Electron_affinity]
elements.Atomic_radius = [float(re.sub(',', '.', i)) for i in elements.Atomic_radius]

### Подготовка данных по соединенийм

In [42]:
# загрузка данных по соединениям
compounds = pd.read_csv(filepath_or_buffer=links['materials'], index_col='Compound')
# Переименовываю признак
compounds.rename(columns={'Band gap, eV':'Band_gap'}, inplace=True)

In [43]:
# регулярное выражение для разделения соединения на элементы
reg_compaund = r'(?P<element_1>[A-Z](?![a-z])+|[A-Z][a-z]+)(?P<number_1>\d+|)(?P<element_2>[A-Z](?![a-z])+|[A-Z][a-z]+)(?P<number_2>\d+|)(?P<element_3>[A-Z](?![a-z])+|[A-Z][a-z]+)(?P<number_3>\d+|)'

In [44]:
data = pd.DataFrame(data=compounds, index=compounds.index)

In [45]:
# тест
comp = compounds.index[0]
r = re.search(reg_compaund, comp).groupdict()
# [i for i in elements.index]
r

{'element_1': 'Mn',
 'number_1': '2',
 'element_2': 'Si',
 'number_2': '',
 'element_3': 'O',
 'number_3': '4'}

In [46]:
# добавлю метки для себя 
data.insert(loc=0, column='a', value=0)
data.insert(loc=1,column='b', value=0)
data.insert(loc=2, column='c', value=0)

In [47]:
# Добавляю признаки по каждому элементу (a, b, c) соединения 
for parameter in elements.columns[::-1]:
    data.insert(loc=data.columns.get_loc('a')+1, column='{f_1}_a'.format(f_1=parameter),
                value=[elements.at[re.search(reg_compaund, comp).groupdict()['element_1'], parameter] for comp in data.index])
    data.insert(loc=data.columns.get_loc('b')+1, column='{f_1}_b'.format(f_1=parameter),
                value=[elements.at[re.search(reg_compaund, comp).groupdict()['element_2'], parameter] for comp in data.index])
    data.insert(loc=data.columns.get_loc('c')+1, column='{f_1}_c'.format(f_1=parameter),
                value=[elements.at[re.search(reg_compaund, comp).groupdict()['element_3'], parameter] for comp in data.index])

In [48]:
# Число элементов в соединении
data.insert(loc=data.columns.get_loc('a')+1, column='Number_a',
            value=[re.search(reg_compaund, comp).groupdict()['number_1'] if re.search(reg_compaund, comp).groupdict()['number_1'] != '' else 1  for comp in data.index])
data.insert(loc=data.columns.get_loc('b')+1, column='Number_b',
            value=[re.search(reg_compaund, comp).groupdict()['number_2'] if re.search(reg_compaund, comp).groupdict()['number_2'] != '' else 1  for comp in data.index])
data.insert(loc=data.columns.get_loc('c')+1, column='Number_c',
            value=[re.search(reg_compaund, comp).groupdict()['number_3'] if re.search(reg_compaund, comp).groupdict()['number_3'] != '' else 1  for comp in data.index])

In [49]:
# Удаляю метки
data.drop(columns=['a', 'b', 'c'], inplace=True)

In [50]:
# Сохраняю данные
...

### Соединения для прогнозирования


In [51]:
# загружаю данные 
compounds = pd.read_csv(filepath_or_buffer=links['materials_for_predict'], index_col='Compound', )
compounds.drop(columns=compounds.columns, inplace=True)

In [52]:
# метки для удобства
compounds.insert(loc=0, column='a', value=0)
compounds.insert(loc=1,column='b', value=0)
compounds.insert(loc=2, column='c', value=0)

In [53]:
compounds

Unnamed: 0_level_0,a,b,c
Compound,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Zn3WN4,0,0,0
ZnGeN2,0,0,0
ZnSiN2,0,0,0
YWN3,0,0,0
ZnSnN2,0,0,0
...,...,...,...
ZrAl3N4,0,0,0
Mn3AlN3,0,0,0
Mn3InN3,0,0,0
Si6Mo3N11,0,0,0


In [54]:
# добавляю признаки по элементам соединения
for parameter in elements.columns[::-1]:
    compounds.insert(loc=compounds.columns.get_loc('a')+1, column='{f_1}_a'.format(f_1=parameter),
                value=[elements.at[re.search(reg_compaund, comp).groupdict()['element_1'], parameter] for comp in compounds.index])
    compounds.insert(loc=compounds.columns.get_loc('b')+1, column='{f_1}_b'.format(f_1=parameter),
                value=[elements.at[re.search(reg_compaund, comp).groupdict()['element_2'], parameter] for comp in compounds.index])
    compounds.insert(loc=compounds.columns.get_loc('c')+1, column='{f_1}_c'.format(f_1=parameter),
                value=[elements.at[re.search(reg_compaund, comp).groupdict()['element_3'], parameter] for comp in compounds.index])

In [55]:
# число атомов элемента в соединении
compounds.insert(loc=compounds.columns.get_loc('a')+1, column='Number_a',
            value=[re.search(reg_compaund, comp).groupdict()['number_1'] if re.search(reg_compaund, comp).groupdict()['number_1'] != '' else 1  for comp in compounds.index])
compounds.insert(loc=compounds.columns.get_loc('b')+1, column='Number_b',
            value=[re.search(reg_compaund, comp).groupdict()['number_2'] if re.search(reg_compaund, comp).groupdict()['number_2'] != '' else 1  for comp in compounds.index])
compounds.insert(loc=compounds.columns.get_loc('c')+1, column='Number_c',
            value=[re.search(reg_compaund, comp).groupdict()['number_3'] if re.search(reg_compaund, comp).groupdict()['number_3'] != '' else 1  for comp in compounds.index])

In [56]:
compounds.drop(columns=['a', 'b', 'c'], inplace=True)
compounds

Unnamed: 0_level_0,Number_a,Atomic_Number_a,NUMBER_OF_Electrons_at_last_orbitale_a,NUMBER_OF_Electrons_at_before_last_orbitale_a,NUMBER_OF_electrones_at_last_level_a,NUMBER_OF_vacancies_at_outer_orbitale_a,Number_of_active_electrons_at_inner_level_a,Max_valency_a,Atomic_Mass_a,Electronegativity_a,...,Max_valency_c,Atomic_Mass_c,Electronegativity_c,Atomic_radius_c,Covalent_radius_c,Ionization_potential_c,Electron_affinity_c,Period_c,Group_c,Block_c
Compound,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Zn3WN4,3,30,2,0,2,0,0,2,65.40000,1.65,...,5,14.0067,3.04,92.0,75,14.48,7.0,2,3,1
ZnGeN2,1,30,2,0,2,0,0,2,65.40000,1.65,...,5,14.0067,3.04,92.0,75,14.48,7.0,2,3,1
ZnSiN2,1,30,2,0,2,0,0,2,65.40000,1.65,...,5,14.0067,3.04,92.0,75,14.48,7.0,2,3,1
YWN3,1,39,2,0,2,0,1,3,88.90500,1.22,...,5,14.0067,3.04,92.0,75,14.48,7.0,2,3,1
ZnSnN2,1,30,2,0,2,0,0,2,65.40000,1.65,...,5,14.0067,3.04,92.0,75,14.48,7.0,2,3,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ZrAl3N4,1,40,2,0,2,0,2,4,91.22000,1.33,...,5,14.0067,3.04,92.0,75,14.48,7.0,2,3,1
Mn3AlN3,3,25,2,0,2,0,5,7,54.93804,1.55,...,5,14.0067,3.04,92.0,75,14.48,7.0,2,3,1
Mn3InN3,3,25,2,0,2,0,5,7,54.93804,1.55,...,5,14.0067,3.04,92.0,75,14.48,7.0,2,3,1
Si6Mo3N11,6,14,2,2,4,4,0,2,28.08500,1.90,...,5,14.0067,3.04,92.0,75,14.48,7.0,2,3,1


In [57]:
# Сохраняю данные
...

In [58]:
data

Unnamed: 0_level_0,Number_a,Atomic_Number_a,NUMBER_OF_Electrons_at_last_orbitale_a,NUMBER_OF_Electrons_at_before_last_orbitale_a,NUMBER_OF_electrones_at_last_level_a,NUMBER_OF_vacancies_at_outer_orbitale_a,Number_of_active_electrons_at_inner_level_a,Max_valency_a,Atomic_Mass_a,Electronegativity_a,...,Atomic_Mass_c,Electronegativity_c,Atomic_radius_c,Covalent_radius_c,Ionization_potential_c,Electron_affinity_c,Period_c,Group_c,Block_c,Band_gap
Compound,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Mn2SiO4,2,25,2,0,2,0,5,7,54.938040,1.55,...,15.9994,3.44,48.0,66,13.56,141.0,2,5,1,3.25
Mn4SiO7,4,25,2,0,2,0,5,7,54.938040,1.55,...,15.9994,3.44,48.0,66,13.56,141.0,2,5,1,1.52
Mn7SiO12,7,25,2,0,2,0,5,7,54.938040,1.55,...,15.9994,3.44,48.0,66,13.56,141.0,2,5,1,3.15
AlSi2O5,1,13,1,2,3,5,0,1,26.981538,1.61,...,15.9994,3.44,48.0,66,13.56,141.0,2,5,1,9.10
Fe2SiO4,2,26,2,0,2,0,6,6,55.840000,1.83,...,15.9994,3.44,48.0,66,13.56,141.0,2,5,1,7.80
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
MgZrN2,1,12,2,0,2,0,0,2,24.305000,1.31,...,14.0067,3.04,92.0,75,14.48,7.0,2,3,1,1.50
ZnGeN2,1,30,2,0,2,0,0,2,65.400000,1.65,...,14.0067,3.04,92.0,75,14.48,7.0,2,3,1,2.50
MnSiN2,1,25,2,0,2,0,5,7,54.938040,1.55,...,14.0067,3.04,92.0,75,14.48,7.0,2,3,1,3.50
MnGeN2,1,25,2,0,2,0,5,7,54.938040,1.55,...,14.0067,3.04,92.0,75,14.48,7.0,2,3,1,2.50


### Обучение

In [59]:
y = data.Band_gap
X = data.drop(columns='Band_gap')

In [60]:
# MinMaxScaler
scaler = MinMaxScaler()
X_norm = pd.DataFrame(data=scaler.fit_transform(X), columns=X.columns,  index=X.index)

In [61]:
# выделю train и test
X_train, X_test, y_train, y_test = train_test_split(X_norm, y, random_state=2)

In [62]:
# тест 1. "Прямой" predict
# Обучение
svr = SVR(C=1.0, epsilon=0.2)
svr.fit(X=X_train, y=y_train)

SVR(C=1.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.2, gamma='scale',
    kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False)

In [63]:
# Предсказание
predict = svr.predict(X=X_test)
predict

array([2.18980527, 2.33829629, 3.05004664, 3.26608977, 2.68682206,
       2.3030499 , 2.86352734, 2.29556737, 3.55099514, 2.36986977,
       2.19048489, 4.12478706, 2.31779307, 2.62897709, 2.87816201,
       2.25893226, 3.86722176, 2.53230529, 1.8915457 , 2.67664003,
       2.30341457, 3.31172529, 1.87326237, 2.6720813 , 3.12843853,
       2.22799878, 2.40902661, 2.40013473, 2.28220477, 2.53047313,
       4.16258058, 3.84554023, 2.94150169, 2.48173077, 2.88242628,
       2.28759034, 4.17939979, 3.46798849, 3.64650863, 1.59826397,
       2.31920749, 2.18859441, 2.42543824, 2.9160983 , 2.24699457,
       3.19428822, 4.05130571, 2.93142856, 4.13632147, 4.29532855,
       3.32021885, 4.19820838, 3.12339765, 1.96939122, 2.15583632,
       3.32358736, 2.42173287, 2.88300863, 2.60756329, 2.60284468,
       3.69540789, 2.41936698, 4.26453906, 3.39580265, 1.64030038,
       3.07895332, 3.97038125, 1.68311185, 1.69017085, 3.24490065,
       3.33369134, 3.19436786, 2.43037049, 3.33752124, 3.32663

In [64]:
# Настоящие метки
y_test.to_numpy().reshape((y_test.to_numpy().shape[0]))

array([1.9 , 2.08, 2.44, 4.07, 3.7 , 2.7 , 3.37, 2.43, 2.  , 3.57, 1.9 ,
       5.06, 2.93, 2.41, 1.86, 1.9 , 3.3 , 1.42, 2.  , 2.11, 2.5 , 4.17,
       4.1 , 3.65, 3.7 , 2.25, 1.29, 2.35, 1.19, 1.85, 4.8 , 5.6 , 2.3 ,
       1.3 , 3.2 , 1.95, 6.2 , 4.1 , 3.9 , 0.46, 1.9 , 1.55, 2.45, 1.5 ,
       1.8 , 3.56, 5.08, 1.8 , 5.52, 4.96, 3.6 , 9.1 , 3.68, 1.96, 1.8 ,
       2.2 , 2.4 , 3.45, 3.03, 1.3 , 5.1 , 1.56, 4.2 , 4.7 , 0.87, 3.6 ,
       3.15, 1.83, 3.65, 3.64, 4.  , 5.33, 1.88, 2.8 , 3.25, 2.24, 3.  ,
       2.31, 1.83, 3.6 , 2.37, 1.54, 3.08, 3.6 , 4.  , 3.6 , 1.1 , 2.62,
       0.4 , 1.53, 1.8 , 4.8 ])

In [65]:
 # Оценка через метрику r2_score
from sklearn.metrics import r2_score
r2 = r2_score(y_pred=predict, y_true=y_test.to_numpy().reshape((y_test.to_numpy().shape[0])))
r2

0.5003199232738778

In [66]:
data_for_predict = compounds.copy()
data_for_predict.head()

Unnamed: 0_level_0,Number_a,Atomic_Number_a,NUMBER_OF_Electrons_at_last_orbitale_a,NUMBER_OF_Electrons_at_before_last_orbitale_a,NUMBER_OF_electrones_at_last_level_a,NUMBER_OF_vacancies_at_outer_orbitale_a,Number_of_active_electrons_at_inner_level_a,Max_valency_a,Atomic_Mass_a,Electronegativity_a,...,Max_valency_c,Atomic_Mass_c,Electronegativity_c,Atomic_radius_c,Covalent_radius_c,Ionization_potential_c,Electron_affinity_c,Period_c,Group_c,Block_c
Compound,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Zn3WN4,3,30,2,0,2,0,0,2,65.4,1.65,...,5,14.0067,3.04,92.0,75,14.48,7.0,2,3,1
ZnGeN2,1,30,2,0,2,0,0,2,65.4,1.65,...,5,14.0067,3.04,92.0,75,14.48,7.0,2,3,1
ZnSiN2,1,30,2,0,2,0,0,2,65.4,1.65,...,5,14.0067,3.04,92.0,75,14.48,7.0,2,3,1
YWN3,1,39,2,0,2,0,1,3,88.905,1.22,...,5,14.0067,3.04,92.0,75,14.48,7.0,2,3,1
ZnSnN2,1,30,2,0,2,0,0,2,65.4,1.65,...,5,14.0067,3.04,92.0,75,14.48,7.0,2,3,1


In [67]:
# MinMaxScaler
data_for_predict = pd.DataFrame(data=scaler.transform(data_for_predict), columns=data_for_predict.columns, index=data_for_predict.index)

In [68]:
# Выполняю предсказание
target_prediction = svr.predict(data_for_predict)

In [69]:
# Создам DataFrame где в качестве индексов используется соединение, а predict - предсказанное значение ширнины запрещенной зоны
res = pd.DataFrame(data=target_prediction, columns=['predict'], index=data_for_predict.index)
res

Unnamed: 0_level_0,predict
Compound,Unnamed: 1_level_1
Zn3WN4,2.228140
ZnGeN2,2.818794
ZnSiN2,3.194368
YWN3,2.632254
ZnSnN2,2.337091
...,...
ZrAl3N4,3.387561
Mn3AlN3,2.693235
Mn3InN3,1.910620
Si6Mo3N11,2.003208


In [76]:
# Поиск максимально точного показания по тестовой выборке

traintest_data = data.copy()
predict_data = compounds.copy()
# Выделение таргета и данных
y = traintest_data.Band_gap
X = traintest_data.drop(columns='Band_gap')

In [77]:
# MinMaxScaler
scaler = MinMaxScaler()
scaler.fit(X)
X_scaler = pd.DataFrame(data=scaler.transform(X), columns=X.columns, index=X.index)
data_for_predict = pd.DataFrame(data=scaler.transform(predict_data), columns=predict_data.columns, index=predict_data.index)

In [78]:
svr = SVR(C=1.0, epsilon=0.2)

# списки результатов
list_r2_score = []
list_traintest_predict = []
list_predict = []
list_x_test = []

# число тестов
number_tests = 100

for i in range(number_tests):
    # выделяю train и test
    X_train, X_test, y_train, y_test = train_test_split(X_scaler, y)
    list_x_test.append(X_test)

    #svr
    svr.fit(X=X_train, y=y_train)
    traintest_predict = svr.predict(X_test)
    list_traintest_predict.append(traintest_predict)

    # r2_score
    list_r2_score.append(r2_score(y_pred=traintest_predict, y_true=y_test))

    predict = svr.predict(data_for_predict)
    list_predict.append(predict)

In [79]:
# Максимальный показатель метрики r2
max(list_r2_score)

0.6229179037562724

In [81]:
# Найду предсказания соответствующие данному показатель по метрике
res = pd.DataFrame(data=list_predict[list_r2_score.index(max(list_r2_score))], columns=['predict'], index=data_for_predict.index)
res.head()

Unnamed: 0_level_0,predict
Compound,Unnamed: 1_level_1
Zn3WN4,2.277929
ZnGeN2,2.699837
ZnSiN2,3.147336
YWN3,2.63531
ZnSnN2,2.098254


In [83]:
# также все 100 циклов
res_2 = pd.DataFrame(index=data_for_predict.index)
for i, j in enumerate(list_predict):
    res_2[str(i)] = j
res_2

Unnamed: 0_level_0,0,1,2,3,4,5,6,7,8,9,...,90,91,92,93,94,95,96,97,98,99
Compound,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Zn3WN4,2.280214,2.626473,2.513460,2.236138,2.037930,2.085025,2.179867,2.156832,2.389888,2.221909,...,2.669290,1.997092,2.038280,2.010038,2.180327,2.418808,2.009757,2.567224,2.221309,2.264835
ZnGeN2,2.945722,3.201256,2.699876,3.400163,2.700012,2.770618,3.077609,2.751123,3.326979,2.830478,...,3.400425,2.700044,2.699651,2.983268,2.793284,3.224242,2.700431,2.886994,2.700259,2.712674
ZnSiN2,3.286024,3.545233,2.982017,3.795136,3.146859,3.232684,3.508478,3.231372,3.777345,3.238576,...,3.760209,3.154891,3.187537,3.401371,3.148159,3.634383,3.184652,3.327385,3.121547,3.125620
YWN3,2.678165,2.962675,2.829091,2.541275,2.383212,2.405875,2.496106,2.524812,2.793527,2.520371,...,2.955187,2.400780,2.294283,2.583159,2.521488,2.498199,2.584643,2.891561,2.639545,2.570963
ZnSnN2,2.543381,2.704110,2.364598,2.822665,2.136705,2.181902,2.519359,2.106823,2.723985,2.238278,...,2.849827,2.112982,2.098812,2.434322,2.296160,2.607203,2.145331,2.308917,2.110509,2.163436
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ZrAl3N4,3.492576,3.382234,3.280971,3.582351,3.408143,3.156471,3.385995,3.272052,3.641722,3.435473,...,3.530830,3.257407,3.380384,3.269829,3.083133,3.481094,3.274106,3.377491,3.163447,3.058020
Mn3AlN3,2.947280,2.742674,2.668541,2.894936,2.653164,2.455378,2.780398,2.713246,3.060576,2.774206,...,2.852864,2.746798,2.783312,2.628395,2.381620,2.923867,2.589711,2.639531,2.548072,2.509047
Mn3InN3,2.225327,2.049618,2.157846,2.157033,1.830834,1.617235,1.915136,1.782195,2.143073,1.936332,...,2.060732,1.894480,1.836324,1.889340,1.692898,2.072979,1.745419,1.824316,1.778806,1.801898
Si6Mo3N11,2.010831,2.456982,2.233266,2.059427,1.804066,1.871785,2.067006,2.067921,1.934942,2.091448,...,2.153789,1.933410,1.971692,2.002278,2.020715,2.074791,2.057593,2.209655,1.943510,1.983100
