In [2]:
import pandas as pd
import numpy as np
import seaborn as sns
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import normalize
def to_one_hot(Y):
    n_col = np.amax(Y) + 1
    binarized = np.zeros((len(Y), n_col))
    for i in range(len(Y)):
        binarized[i, Y[i]] = 1.
    return binarized

def from_one_hot(Y):
    arr = np.zeros((len(Y), 1))
    for i in range(len(Y)):
        l = layer2[i]
        for j in range(len(l)):
            if(l[j] == 1):
                arr[i] = j+1
    return arr

def sigmoid(x):
    return 1/(1+np.exp(-x))
    
def sigmoid_deriv(x):
    return sigmoid(x)*(1 - sigmoid(x))

def normalize(X, axis=-1, order=2):
    l2 = np.atleast_1d(np.linalg.norm(X, order, axis))
    l2[l2 == 0] = 1
    return X / np.expand_dims(l2, axis)

In [4]:
train_data = pd.read_csv("iris_train.csv")
test_data = pd.read_csv("iris_test.csv")
train_data['species'].replace(['Iris-setosa', 'Iris-virginica', 'Iris-versicolor'], [0, 1, 2], inplace=True)
columns_train = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width']
X_train = pd.DataFrame(train_data, columns=columns_train)
X_train.fillna(X_train.mean(), inplace=True)
X_train = normalize(X_train.values)

columns_train_output = ['species']
y_train = pd.DataFrame(train_data, columns=columns_train_output)
y_train = y_train.values.flatten()
y_train = to_one_hot(y_train)

columns_test = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width']
X_test = pd.DataFrame(test_data, columns=columns_test)
X_test = normalize(X_test.values)

w0 = 2*np.random.random((4, 5)) - 1 # для входного слоя - 4 входа, 3 выхода
w1 = 2*np.random.random((5, 3)) - 1 # для внутреннего слоя - 5 входов, 3 выхода
# скорость обучения (learning rate)
n = 0.01
errors = []

for i in range(100000):
    # прямое распространение(feed forward)
    layer0 = X_train
    layer1 = sigmoid(np.dot(layer0, w0))
    layer2 = sigmoid(np.dot(layer1, w1))
    # обратное распространение(back propagation) с использованием градиентного спуска
    layer2_error = y_train - layer2
    layer2_delta = layer2_error * sigmoid_deriv(layer2)
    layer1_error = layer2_delta.dot(w1.T)
    layer1_delta = layer1_error * sigmoid_deriv(layer1)
    w1 += layer1.T.dot(layer2_delta) * n
    w0 += layer0.T.dot(layer1_delta) * n
    error = np.mean(np.abs(layer2_error))
    errors.append(error)
    accuracy = (1 - error) * 100

print(accuracy)

94.61115501076458


In [None]:
layer0_test = X_test
layer1_test = sigmoid(np.dot(layer0_test, w0))
layer2_test = sigmoid(np.dot(layer1_test, w1))
predicted_labels = np.argmax(layer2_test, axis=1)
predicted_species = np.array(['Iris-setosa', 'Iris-virginica', 'Iris-versicolor'])[predicted_labels]
submission_df = pd.DataFrame(test_data, columns=columns_test)
submission_df['species'] = predicted_species
submission_df.to_csv('submission.csv', index=False)