1. Постройте нейронную сеть (берем несложную полносвязную сеть, меняем число слоев, число нейронов, типы активации, тип оптимизатора) на датасете from sklearn.datasets import load_boston. 
2. Постройте 10-15 вариантов разных нейронных сетей и сведите результаты их работы в таблицу.  Опишите, какого результата вы добились от нейросети? Что помогло вам улучшить ее точность?


In [1]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt

from sklearn.datasets import load_boston
from sklearn.preprocessing import MinMaxScaler

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

import warnings
warnings.filterwarnings('ignore')

In [20]:
class NNetCompare:
    
    def __init__(self, 
                 X,
                 y,
                 epoch_list=[10],
                 neuron_list=[32],
                 layer_list=[2],
                 activator_list=['sigmoid'],
                 optimizer_list=['Adam'],
                 mix=False                
                ):
        
        self.X = X
        self.y = y
        self.e_list = epoch_list
        self.n_list = neuron_list
        self.l_list = layer_list
        self.act_list = activator_list
        self.opt_list = optimizer_list
        self.mix = mix
                
    
    def run(self):
        
        tf.random.set_seed(42)
        
        result = pd.DataFrame(columns=['n_layers',
                                       'n_neurons',
                                       'activator',
                                       'optimizer', 
                                       'n_epoch',
                                       'loss_train',
                                       'r2_score_train',
                                       'loss_test', 
                                       'r2_score_test'
                                      ])
        if not self.mix:
            for epoch in self.e_list:
                for layers in self.l_list:
                    for neurons in self.n_list:
                        for activator in self.act_list:
                            for optimizer in self.opt_list:

                                model_data = self.build_net(layers,
                                                            neurons,
                                                            activator,
                                                           )
                                model, act_list = model_data

                                with tf.device("GPU:0"):
                                    model.compile(loss='mean_squared_error',
                                                  optimizer=optimizer, 
                                                  metrics=tf.keras.metrics.RootMeanSquaredError()
                                                 )

                                with tf.device("GPU:0"):
                                    history = self.fit(model, epoch)

                                result = result.append({'n_layers': layers,
                                                        'n_neurons': neurons,
                                                        'activator': '->'.join(act_list),
                                                        'optimizer': optimizer, 
                                                        'n_epoch': len(history.history['loss']),
                                                        'loss_train': history.history['loss'][-1],
                                                        'r2_score_train': history.history['root_mean_squared_error'][-1],
                                                        'loss_test': history.history['val_loss'][-1], 
                                                        'r2_score_test': history.history['val_root_mean_squared_error'][-1]
                                                       }, ignore_index=True)
        else:
            
            for epoch in self.e_list:
                for layers in self.l_list:
                    for neurons in self.n_list:

                        model_data = self.build_net(layers,
                                                    neurons,
                                                    activator='fake', # это, конечно, костыль, но не хочется переписывать функцию self.build_net(). Извините =)
                                                   )
                        model, act_list = model_data
                        
                        optimizer = np.random.choice(self.opt_list)

                        with tf.device("GPU:0"):
                            model.compile(loss='mean_squared_error',
                                          optimizer=optimizer, 
                                          metrics=tf.keras.metrics.RootMeanSquaredError()
                                         )

                        with tf.device("GPU:0"):
                            history = self.fit(model, epoch)

                        result = result.append({'n_layers': layers,
                                                'n_neurons': neurons,
                                                'activator': '->'.join(act_list),
                                                'optimizer': optimizer, 
                                                'n_epoch': len(history.history['loss']),
                                                'loss_train': history.history['loss'][-1],
                                                'r2_score_train': history.history['root_mean_squared_error'][-1],
                                                'loss_test': history.history['val_loss'][-1], 
                                                'r2_score_test': history.history['val_root_mean_squared_error'][-1]
                                               }, ignore_index=True)
        
        return result
    
                
    def fit(self, 
            model,
            epochs,
           ):
        
        hist = model.fit(self.X,
                         self.y,
                         epochs=epochs,
                         batch_size=520, 
                         verbose=0,
                         validation_split=0.3
                        )
        
        return hist
    
           
    def build_net(self,
                  n_layers,
                  n_neurons,                   
                  activator,
                 ):
        
        act_list = []
        
        model = Sequential()
        
        # input_act = np.random.choice(['linear', 'relu'])
        act_list.append('relu')
        
        model.add(Dense(self.X.shape[1],
                        input_shape=(self.X.shape[1],), 
                        activation='relu'))
        
        delta = n_neurons // (n_layers + 1)
        next_neurons = n_neurons
        
        tf.random.set_seed(42)
        
        for layer in range(n_layers):
            if not self.mix:
                model.add(Dense(next_neurons, activator))
                act_list.append(activator)
            else:
                next_act = np.random.choice(self.act_list)
                model.add(Dense(next_neurons, next_act))
                act_list.append(next_act)
            next_neurons -= delta
        
        model.add(Dense(1, 'linear'))
        act_list.append('linear')
        
        return [model, act_list]

In [21]:
tf.config.list_physical_devices()

[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU'),
 PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

In [22]:
boston = load_boston()
data = pd.DataFrame(boston['data'], columns=boston["feature_names"])
target = boston["target"]
data.describe()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT
count,506.0,506.0,506.0,506.0,506.0,506.0,506.0,506.0,506.0,506.0,506.0,506.0,506.0
mean,3.613524,11.363636,11.136779,0.06917,0.554695,6.284634,68.574901,3.795043,9.549407,408.237154,18.455534,356.674032,12.653063
std,8.601545,23.322453,6.860353,0.253994,0.115878,0.702617,28.148861,2.10571,8.707259,168.537116,2.164946,91.294864,7.141062
min,0.00632,0.0,0.46,0.0,0.385,3.561,2.9,1.1296,1.0,187.0,12.6,0.32,1.73
25%,0.082045,0.0,5.19,0.0,0.449,5.8855,45.025,2.100175,4.0,279.0,17.4,375.3775,6.95
50%,0.25651,0.0,9.69,0.0,0.538,6.2085,77.5,3.20745,5.0,330.0,19.05,391.44,11.36
75%,3.677083,12.5,18.1,0.0,0.624,6.6235,94.075,5.188425,24.0,666.0,20.2,396.225,16.955
max,88.9762,100.0,27.74,1.0,0.871,8.78,100.0,12.1265,24.0,711.0,22.0,396.9,37.97


In [23]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 506 entries, 0 to 505
Data columns (total 13 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   CRIM     506 non-null    float64
 1   ZN       506 non-null    float64
 2   INDUS    506 non-null    float64
 3   CHAS     506 non-null    float64
 4   NOX      506 non-null    float64
 5   RM       506 non-null    float64
 6   AGE      506 non-null    float64
 7   DIS      506 non-null    float64
 8   RAD      506 non-null    float64
 9   TAX      506 non-null    float64
 10  PTRATIO  506 non-null    float64
 11  B        506 non-null    float64
 12  LSTAT    506 non-null    float64
dtypes: float64(13)
memory usage: 51.5 KB


In [24]:
sample = NNetCompare(data, target)

In [25]:
res = sample.run()
res

Unnamed: 0,n_layers,n_neurons,activator,optimizer,n_epoch,loss_train,r2_score_train,loss_test,r2_score_test
0,2,32,relu->sigmoid->sigmoid->linear,Adam,10,718.300781,26.801134,356.774872,18.888485


In [27]:
neurons = [6, 9, 12]
layers = [2, 3, 4]
activators = ['softmax', 'linear', 'relu']
optimizer = ['Adam', 'RMSProp']
epoch = [10, 20]

In [28]:
sample_2 = NNetCompare(data,
                       target,
                       epoch_list=epoch,
                       neuron_list=neurons,
                       layer_list=layers,
                       activator_list=activators,
                       optimizer_list=optimizer)

In [30]:
res_2 = sample_2.run()
res_2.sort_values(by='r2_score_test', ascending=False).head(5)
# res_2.sort_values(by='loss_train', ascending=True).head(5)

Unnamed: 0,n_layers,n_neurons,activator,optimizer,n_epoch,loss_train,r2_score_train,loss_test,r2_score_test
95,4,6,relu->relu->relu->relu->relu->linear,RMSProp,20,55.812748,7.470793,165.823975,12.877266
41,4,6,relu->relu->relu->relu->relu->linear,RMSProp,10,58.409744,7.642627,180.320465,13.428346
94,4,6,relu->relu->relu->relu->relu->linear,Adam,20,61.446251,7.838766,163.160751,12.773439
107,4,12,relu->relu->relu->relu->relu->linear,RMSProp,20,64.021675,8.001354,236.264038,15.370883
101,4,9,relu->relu->relu->relu->relu->linear,RMSProp,20,64.831184,8.051782,209.918381,14.488561


In [31]:
neurons = [25, 35, 45]
layers = [2, 3]
activators = ['linear', 'relu']
optimizer = ['Adam', 'RMSProp']
epoch = [7, 10, 14]

In [32]:
sample_3 = NNetCompare(data,
                       target,
                       epoch_list=epoch,
                       neuron_list=neurons,
                       layer_list=layers,
                       activator_list=activators,
                       optimizer_list=optimizer)

In [38]:
res_3 = sample_3.run()
res_3.sort_values(by='r2_score_test', ascending=False).head(5)
# res_3['diff'] = abs(res_3['r2_score_test'] - res_3['r2_score_train'])
# res_3.sort_values(by='diff', ascending=True).head(5)

Unnamed: 0,n_layers,n_neurons,activator,optimizer,n_epoch,loss_train,r2_score_train,loss_test,r2_score_test,diff
22,3,45,relu->relu->relu->relu->linear,Adam,7,141.358383,11.889423,113.317795,10.645083,1.24434
45,3,45,relu->linear->linear->linear->linear,RMSProp,10,125.271019,11.192453,97.689041,9.883777,1.308677
12,3,25,relu->linear->linear->linear->linear,Adam,7,694.53125,26.353962,586.356201,24.214792,2.13917
6,2,35,relu->relu->relu->linear,Adam,7,8963.314453,94.674782,8505.273438,92.224037,2.450745
21,3,45,relu->linear->linear->linear->linear,RMSProp,7,163.407791,12.783106,235.827118,15.356664,2.573558


In [45]:
sample31 = NNetCompare(data,
                       target,
                       epoch_list=[500],
                       neuron_list=[35],
                       layer_list=[3],
                       activator_list=['relu'],
                       optimizer_list=['Adam'])

In [46]:
res31 = sample31.run()
res31

Unnamed: 0,n_layers,n_neurons,activator,optimizer,n_epoch,loss_train,r2_score_train,loss_test,r2_score_test
0,3,35,relu->relu->relu->relu->linear,Adam,500,17.270987,4.155838,843.381714,29.041035


In [47]:
neurons = [75, 120]
layers = [3, 4]
activators = ['relu', 'linear']
optimizer = ['Adam', 'RMSProp']
epoch = [9, 12]

In [48]:
sample_4 = NNetCompare(data,
                       target,
                       epoch_list=epoch,
                       neuron_list=neurons,
                       layer_list=layers,
                       activator_list=activators,
                       optimizer_list=optimizer, 
                       mix=True)

In [49]:
res_4 = sample_4.run()
res_4.sort_values(by='r2_score_test', ascending=False).head(5)

Unnamed: 0,n_layers,n_neurons,activator,optimizer,n_epoch,loss_train,r2_score_train,loss_test,r2_score_test
2,4,75,relu->relu->relu->linear->linear->linear,Adam,9,451.886688,21.257626,1146.239868,33.856163
4,3,75,relu->linear->linear->linear->linear,Adam,12,82.674164,9.092533,394.421295,19.860043
5,3,120,relu->relu->relu->relu->linear,RMSProp,12,67.799446,8.234042,331.236328,18.1999
1,3,120,relu->relu->linear->linear->linear,Adam,9,284.254028,16.859835,217.741501,14.756066
6,4,75,relu->linear->linear->linear->relu->linear,RMSProp,12,62.64883,7.915102,141.870224,11.910929


Мне почему-то не удалось достигнуть стабильного адекватного значения метрики. Напишите, пожалуйста, в комментариях к ДЗ что не так у меня. Я переделаю правильно.