In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import warnings
from imblearn.under_sampling import NearMiss
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.metrics import confusion_matrix, classification_report

import tensorflow as tf
import numpy as np

# Regression

In [139]:
df_regression = pd.read_csv('DataSet3_fil.csv')
df_regression.drop('Unnamed: 0',axis=1, inplace=True)

In [140]:
y_regression = df_regression["price"]
X_regression = df_regression.drop(["price"], axis=1)

In [141]:
X_train_regression, X_test_regression, y_train_regression, y_test_regression = train_test_split(X_regression, y_regression, test_size=0.2)

In [142]:
# создаем модель, как набор последовательных слоев
model_regression = tf.keras.Sequential(
    [
        # Dense - полносвязный слой (каждый нейрон следующего слоя связан со всеми нейронами предыдущего)
        tf.keras.layers.Dense(64, activation="relu", input_shape=(13,)),
        # на втором скрытом слое будет 32 нейрона
        tf.keras.layers.Dense(32, activation="linear"),
        # Dropout позволяет внести фактор случайности - при обучении часть нейронов будет отключаться
        # каждый нейрон, в данном случае, будет отключаться с вероятностью 0.1
        tf.keras.layers.Dropout(0.1),
        tf.keras.layers.Dense(16, activation="relu"),
        tf.keras.layers.Dropout(0.1),
        # на выходе один нейрон, функция активации не применяется
        tf.keras.layers.Dense(1, activation="linear"),
    ]
)

In [143]:
model_regression.summary()

Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_20 (Dense)            (None, 64)                896       
                                                                 
 dense_21 (Dense)            (None, 32)                2080      
                                                                 
 dropout_6 (Dropout)         (None, 32)                0         
                                                                 
 dense_22 (Dense)            (None, 16)                528       
                                                                 
 dropout_7 (Dropout)         (None, 16)                0         
                                                                 
 dense_23 (Dense)            (None, 1)                 17        
                                                                 
Total params: 3521 (13.75 KB)
Trainable params: 3521 (

In [144]:
# компилируем
model_regression.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.005), loss="mse")

In [146]:
X_train_regression = X_train_regression.astype('float32')
y_train_regression = y_train_regression.astype('float32')

In [147]:
# обучаем, 50 эпох означает 50 проходов по обучающей выборке
model_regression.fit(X_train_regression, y_train_regression, epochs=50, verbose=None)

<keras.src.callbacks.History at 0x23e6e00b2d0>

In [41]:
X_test_regression = X_train_regression.astype('float32')
y_test_regression = y_train_regression.astype('float32')

In [42]:
y_pred_regression = model_regression.predict(X_test_regression)



In [43]:
from sklearn.metrics import r2_score
# оцениваем качество с помощью метрик
print(mean_absolute_error(y_test_regression, y_pred_regression))
print(mean_squared_error(y_test_regression, y_pred_regression))
print(r2_score(y_test_regression, y_pred_regression))

10897280.0
350402160000000.0
0.5055344899512042


# Classification

In [45]:
df_classifier = pd.read_csv('DataSet4_fil.csv')
df_classifier.drop('Unnamed: 0',axis=1, inplace=True)


In [46]:
df_classifier.head()

Unnamed: 0,Temperature[C],Humidity[%],TVOC[ppb],eCO2[ppm],Raw H2,Raw Ethanol,Pressure[hPa],PM1.0,PM2.5,NC0.5,NC1.0,NC2.5,CNT,Fire Alarm
0,23.056,51.11,1.0,400.0,12626.0,19698,939.81,0.08,0.14,0.43,0.156,0.056,240,0
1,23.131,50.08,5.0,400.0,12633.0,19696,939.813,0.09,0.17,0.46,0.189,0.074,247,0
2,23.142,50.0,0.0,400.0,12629.0,19704,939.79,0.1,0.17,0.49,0.189,0.072,248,0
3,23.153,49.85,12.0,400.0,12630.0,19698,939.791,0.1,0.18,0.5,0.195,0.074,249,0
4,23.163,49.71,11.0,400.0,12637.0,19710,939.812,0.1,0.17,0.49,0.192,0.073,250,0


In [47]:
scaler = StandardScaler()
data_scaler = scaler.fit_transform(df_classifier.drop(["Fire Alarm"], axis=1))

In [48]:
y_classifier = df_classifier["Fire Alarm"]
X_classifier = data_scaler

#X_classifier, _, y_classifier, _ = train_test_split(X_classifier, y_classifier, test_size=0.9, random_state=42)

In [37]:
from imblearn.under_sampling import NearMiss
nm = NearMiss()
X_classifier, y_classifier = nm.fit_resample(X_classifier, y_classifier.ravel())

In [49]:
X_train_classifier, X_test_classifier, y_train_classifier, y_test_classifier = train_test_split(X_classifier, y_classifier, test_size=0.2)

In [50]:
model_classification = tf.keras.Sequential(
    [
        tf.keras.layers.Dense(64, activation="relu", input_shape=(13,)),
        tf.keras.layers.Dense(128, activation="relu"),
        tf.keras.layers.Dropout(0.05),
        tf.keras.layers.Dense(64, activation="relu"),
        tf.keras.layers.Dense(32, activation="relu"),
        tf.keras.layers.Dense(16, activation="relu"),
        # используем 1 нейрон и sigmoid
        tf.keras.layers.Dense(1, activation="sigmoid"),
    ]
)

In [57]:
# в качестве функции активации используется бинарная  кроссэнтропия
model_classification.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), loss="mse")

In [58]:
w0 = 1 / y_train_classifier[y_train_classifier==0].shape[0]
w1 = 1 / y_train_classifier[y_train_classifier==1].shape[0]

In [59]:
# verbose=None - не будет логов
model_classification.fit(X_train_classifier, y_train_classifier, epochs=25, verbose=None,
                           class_weight={0: w0, 1: w1})

<keras.src.callbacks.History at 0x17709860510>

In [61]:
y_pred = np.around(model_classification.predict(X_test_classifier, verbose=None))

In [62]:
print(classification_report(y_test_classifier, y_pred))

              precision    recall  f1-score   support

         0.0       1.00      1.00      1.00       969
         1.0       1.00      1.00      1.00      6047

    accuracy                           1.00      7016
   macro avg       1.00      1.00      1.00      7016
weighted avg       1.00      1.00      1.00      7016



# MLP

In [78]:
import numpy as np

class MLP:
    def __init__(self, layer_sizes, activation_functions):
        self.num_layers = len(layer_sizes)
        self.layer_sizes = layer_sizes
        self.activation_functions = activation_functions
        self.weights = [np.random.randn(layer_sizes[i], layer_sizes[i-1]) for i in range(1, self.num_layers)]
        self.biases = [np.random.randn(layer_sizes[i], 1) for i in range(1, self.num_layers)]

    def _sigmoid(self, x):
        return 1.0 / (1.0 + np.exp(-x))

    def _sigmoid_derivative(self, x):
        return self._sigmoid(x) * (1 - self._sigmoid(x))

    def _tanh(self, x):
        return np.tanh(x)

    def _tanh_derivative(self, x):
        return 1 - np.tanh(x)**2

    def _relu(self, x):
        return np.maximum(0, x)

    def _relu_derivative(self, x):
        return np.where(x > 0, 1, 0)

    def _feed_forward(self, x):
        # х-ый элеменет
        activations = [x]
        zs = []
        for i in range(self.num_layers - 1):
            weight = self.weights[i]
            bias = self.biases[i]
            activation_fn = self.activation_functions[i]
            #умножаем матрицу весов на вектор активаций последнего слоя + вектор смещения
            z = np.dot(weight, activations[-1]) + bias
            zs.append(z)
            if activation_fn == 'sigmoid':
                a = self._sigmoid(z)
            elif activation_fn == 'tanh':
                a = self._tanh(z)
            elif activation_fn == 'relu':
                a = self._relu(z)
            activations.append(a)
        return activations, zs

    def _backpropagate(self, x, y):
        # накопление градиентов функции потерь по весам и смещениям в каждом слое сети
        delta_weights = [np.zeros(weight.shape) for weight in self.weights]
        delta_biases = [np.zeros(bias.shape) for bias in self.biases]
        # прямой проход сети для входных данных x, и сохраняются активации и взвешенные суммы для каждого слоя 
        activations, zs = self._feed_forward(x)
        #разница между предсказанными активациями и фактическими значениями целевой переменной
        delta = (activations[-1] - y) 
        for i in range(self.num_layers - 2, -1, -1):
            # Получение взвешенной суммы
            z = zs[i]
            activation_fn = self.activation_functions[i]
            if activation_fn == 'sigmoid':
                derivative = self._sigmoid_derivative(z)
            elif activation_fn == 'tanh':
                derivative = self._tanh_derivative(z)
            elif activation_fn == 'relu':
                derivative = self._relu_derivative(z)
            #Вычисление градиента по весам для текущего слоя
            delta_weights[i] = np.dot(delta, activations[i].T)
            delta_biases[i] = delta
            delta = np.dot(self.weights[i].T, delta) * derivative
        return delta_weights, delta_biases

    def fit(self, X_train, y_train, learning_rate, num_epochs):
        for epoch in range(num_epochs):
            for x, y in zip(X_train, y_train):
                x = np.array(x, ndmin=2).T
                y = np.array(y, ndmin=2).T
                delta_weights, delta_biases = self._backpropagate(x, y)
                self.weights = [weight - learning_rate * d_weight for weight, d_weight in zip(self.weights, delta_weights)]
                self.biases = [bias - learning_rate * d_bias for bias, d_bias in zip(self.biases, delta_biases)]

    def predict(self, X_test):
        predictions = []
        for x in X_test:
            # массив минимум 2мерный и транспонируем
            x = np.array(x, ndmin=2).T
            activations, _ = self._feed_forward(x)
            predictions.append(activations[-1])
        return predictions

In [101]:
X_train_regression = np.asarray(X_train_regression, dtype=np.float64)
y_train_regression = np.asarray(y_train_regression, dtype=np.float64).reshape(-1, 1)
# Создание объекта MLP для регрессии
mlp_regression = MLP([13, 1, 1], ['sigmoid', 'sigmoid'])

# Обучаем модель
mlp_regression.fit(X_train_regression, y_train_regression, learning_rate=0.05, num_epochs=10)

X_test_regression = np.asarray(X_test_regression, dtype=np.float64)
# Получаем предсказания модели
predictions_regression = mlp_regression.predict(X_test_regression)

  return 1.0 / (1.0 + np.exp(-x))


In [108]:
predictions_regression = np.asarray(predictions_regression, dtype=np.float64).reshape(-1, 1)

In [109]:
y_test_regression = np.asarray(y_test_regression, dtype=np.float64).reshape(-1, 1)
from sklearn.metrics import r2_score
print(mean_absolute_error(y_test_regression, predictions_regression))
print(mean_squared_error(y_test_regression, predictions_regression))
print(r2_score(y_test_regression, predictions_regression))

26554378.635197435
1413783283471718.5
-0.995042344314857


In [121]:
mlp_classifier = MLP([14, 1, 1, 1], ['tanh', 'relu', 'sigmoid'])

mlp_classifier.fit(X_train_classifier, y_train_classifier, learning_rate=0.05, num_epochs=10)

X_test_classifier = np.asarray(X_test_classifier, dtype=np.float64)
# Получаем предсказания модели
predictions_classifier = mlp_classifier.predict(X_test_classifier)

In [129]:
predictions_classifier=np.asarray(predictions_classifier, dtype=np.float64).reshape(-1, 1)

In [136]:
from sklearn.metrics import accuracy_score
accuracy = accuracy_score(y_test_classifier, predictions_classifier[:1253])
print("Accuracy:", accuracy)

Accuracy: 0.7150837988826816
