In [None]:
import pandas as pd
import numpy as np

from keras.layers import Dense, PReLU
from keras.models import Sequential
from tensorflow.keras.optimizers import Adam, SGD

from sklearn.model_selection import train_test_split

Генерація даних

In [None]:
N_ENROLLEES = 3000

np.random.seed(0)
math, physics, ukrainian = np.random.randint(2, 6, N_ENROLLEES), np.random.randint(2, 6, N_ENROLLEES), np.random.randint(2, 6, N_ENROLLEES)
privilege = np.random.randint(0, 2, N_ENROLLEES)


data = pd.DataFrame({'math': math, 'physics': physics, 'ukrainian': ukrainian, 'privilege': privilege})


def create_labels(enrolle):
    math, phy, ukr, privilege = enrolle
    ovr = math + phy + ukr

    if math < 3 or phy < 3 or ukr < 3:
        return 0

    if not privilege and (math == 3 or phy == 3 or ovr < 11):
        return 0
    
    return 1


def scale_mark(mark): return (mark - 2) / 3


data['enrolled'] = data.apply(create_labels, axis=1)

for mark_column in ('math', 'physics', 'ukrainian'): 
    data[mark_column] = data[mark_column].apply(scale_mark)

In [None]:
train_data, test_data = train_test_split(data, test_size=0.3, random_state=0)

Тренувальна вибірка

In [None]:
train_data.head()

Unnamed: 0,math,physics,ukrainian,privilege,enrolled
1732,0.666667,1.0,0.333333,1,1
2440,1.0,1.0,0.0,1,0
1232,0.666667,1.0,0.0,1,0
1081,0.333333,0.0,0.333333,0,0
2920,1.0,0.0,1.0,0,0


In [None]:
train_data.describe()

Unnamed: 0,math,physics,ukrainian,privilege,enrolled
count,2100.0,2100.0,2100.0,2100.0,2100.0
mean,0.505238,0.49619,0.490952,0.517143,0.302857
std,0.378648,0.374023,0.371804,0.499825,0.459603
min,0.0,0.0,0.0,0.0,0.0
25%,0.0,0.0,0.0,0.0,0.0
50%,0.666667,0.333333,0.333333,1.0,0.0
75%,1.0,0.666667,0.666667,1.0,1.0
max,1.0,1.0,1.0,1.0,1.0


Тестова вибірка

In [None]:
test_data.head()

Unnamed: 0,math,physics,ukrainian,privilege,enrolled
311,0.666667,1.0,1.0,1,1
1025,1.0,0.0,0.0,1,0
1587,0.666667,1.0,0.333333,1,1
2941,1.0,1.0,0.0,0,0
2980,1.0,1.0,0.666667,1,1


In [None]:
test_data.describe()

Unnamed: 0,math,physics,ukrainian,privilege,enrolled
count,900.0,900.0,900.0,900.0,900.0
mean,0.505926,0.513704,0.483704,0.5,0.32
std,0.363438,0.371969,0.369531,0.500278,0.466736
min,0.0,0.0,0.0,0.0,0.0
25%,0.333333,0.333333,0.0,0.0,0.0
50%,0.333333,0.666667,0.333333,0.5,0.0
75%,0.666667,1.0,0.666667,1.0,1.0
max,1.0,1.0,1.0,1.0,1.0


Побудова моделі

На архітектуру моделі та спосіб її тренування вплинули наступні чинники:
1. Мала кількість рис (4 шт.)
2. Чітко визначені правила прийому (у форматі if then), що не є типовим для задач, які вирішують НМ
3. Обмежена кількість унікальних векторів даних.

Виходячи з п. 1, 3 прийнято рішення побудувати просту модель: всього 1 прихований шар з 4-х нейронів.  
Виходячи з п. 2, 3 прийнято рішення не намагатися узагальнити дані, а просто "запам'ятати" можливі комбінації, тренуючи нейронну мережу 3000 епох. Це дає можливість гарантувати правильну роботу мережі.

In [None]:
model = Sequential([
    Dense(4, input_shape=(4,)),
    PReLU(),
    Dense(1, activation='sigmoid')
])

model.compile(optimizer='sgd', loss='binary_crossentropy', metrics=['accuracy'])
history = model.fit(train_data.drop('enrolled', axis=1), train_data['enrolled'], batch_size=10, epochs=3000, verbose=False)
history.history['loss'][-1], history.history['accuracy'][-1]

(9.873240196611732e-05, 1.0)

In [None]:
_ = model.evaluate(test_data.drop('enrolled', axis=1), test_data['enrolled'])



Мережа справилася із задачею про що свідчить 100% точності

Для введення даних запустіть настпуну комірку

In [None]:
try:
    math = scale_mark(int(input('math (2-5): ')))
    phy = scale_mark(int(input('physics (2-5): ')))
    ukr = scale_mark(int(input('ukrainian (2-5): ')))
    privilege = {'y': 1, 'n': 0}[input('has privilege (y/n): ')]

    enrolled = model.predict([[math, phy, ukr, privilege]]) > 0.5
    print('Student is', 'enrolled' if enrolled else 'not enrolled')
except:
    print('Incorrect data')

math (2-5): 4
physics (2-5): 3
ukrainian (2-5): 4
has privilege (y/n): n
Student is not enrolled
