In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

np.set_printoptions(precision=3, suppress=True)

In [None]:
import tensorflow as tf

from tensorflow import keras
from tensorflow.keras import layers

print(tf.__version__)

In [None]:
df = pd.read_csv("data/dataset.csv")
df

In [None]:
df = df[["Survived", "Pclass", "Sex", "Age", "SibSp", "Parch", "Fare", "Embarked"]]
df = df.dropna()
df

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X = df[["Pclass", "Age", "SibSp", "Parch", "Fare"]].values
y = df[["Survived"]].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3)

In [None]:
normalizer = tf.keras.layers.Normalization(axis=-1)
normalizer.adapt(np.array(X))

In [None]:
linear_model = tf.keras.Sequential([
    normalizer,
    layers.Dense(units=1)
])

In [None]:
linear_model = tf.keras.Sequential()
linear_model.add(normalizer)
linear_model.add(layers.Dense(units=1))

In [None]:
linear_model.compile(
    optimizer=tf.optimizers.SGD(learning_rate=0.1),
    loss='mean_absolute_error')

In [None]:
%%time
history = linear_model.fit(
    X_train,
    y_train,
    epochs=100,
    verbose=1,
    validation_split = 0.2)

In [None]:
y_test

In [None]:
y_test_prediction = linear_model.predict(X_test)
y_test_prediction

In [None]:
y_test_prediction > 0.5

In [None]:
from sklearn.metrics import confusion_matrix

In [None]:
confusion_matrix(y_test, y_test_prediction > 0.5)

In [None]:
from sklearn.metrics import accuracy_score

accuracy_score(y_test, y_test_prediction > 0.5)

In [None]:
model = tf.keras.Sequential()
model.add(normalizer)
model.add(layers.Dense(units=50, activation="relu"))
model.add(layers.Dense(units=1, activation="sigmoid"))
model.compile(
    optimizer=tf.optimizers.SGD(learning_rate=0.1),
    loss='mean_absolute_error')

history = model.fit(
    X,
    y,
    epochs=100,
    verbose=1,
    validation_split = 0.2)

y_test_prediction = model.predict(X_test)
accuracy_score(y_test, y_test_prediction > 0.5)

In [None]:
history.history

In [None]:
hist = pd.DataFrame(history.history)
plt.plot(hist)
plt.show()

In [None]:
from sklearn.preprocessing import LabelEncoder

In [None]:
df

In [None]:
label_encoder = LabelEncoder()
label_encoder.fit_transform(df['Sex'])

In [None]:
df["Sex"] = label_encoder.fit_transform(df['Sex'])

In [None]:
df

In [None]:
X = df[["Pclass", "Age", "Sex", "SibSp", "Parch", "Fare"]].values
normalizer = tf.keras.layers.Normalization(axis=-1)
normalizer.adapt(np.array(X))

In [None]:
model = tf.keras.Sequential()
model.add(normalizer)
model.add(layers.Dense(units=50, activation="relu"))
model.add(layers.Dense(units=1, activation="sigmoid"))
model.compile(
    optimizer=tf.optimizers.SGD(learning_rate=0.1),
    loss='mean_absolute_error')

history = model.fit(
    X,
    y,
    epochs=100,
    verbose=1,
    validation_split = 0.2)

y_test_prediction = model.predict(X_test)
accuracy_score(y_test, y_test_prediction > 0.5)

Можно построить заново другую ИНС

In [None]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import RandomForestRegressor

In [None]:
rf = RandomForestRegressor()

In [None]:
X_train

In [None]:
rf.fit(X_train, y_train)

In [None]:
y_test_prediction = rf.predict(X_test)
y_test_prediction

In [None]:
accuracy_score(y_test, y_test_prediction > 0.5)

In [None]:
from sklearn.model_selection import GridSearchCV

In [None]:
y.shape[0]

In [None]:
y = y.reshape(y.shape[0])
y_train = y_train.reshape(y_train.shape[0])
y_test = y_test.reshape(y_test.shape[0])

In [None]:
y_test

In [None]:
np.mean([0.2, 0.24, 0.19, 0.2, 0.21])

In [None]:
n_estimators=50, max_features=1, max_depth=4, criterion=squared_error -> 0.2, 0.24, 0.19, 0.2, 0.21 -> 0.208
n_estimators=50, max_features=1, max_depth=5, criterion=squared_error -> 0.2, 0.24, 0.19, 0.2, 0.21 -> 0.208


In [None]:
rf = RandomForestRegressor()

param_grid = {
   'n_estimators': [50, 100, 150],
   'max_features': [1],
   'max_depth' : [4, 5, 6],
   'criterion' :['squared_error']
}

GSCV = GridSearchCV(estimator=rf, param_grid=param_grid, cv=5, verbose=2)
GSCV.fit(X_train, y_train)

In [None]:
rf = GSCV.best_estimator_
rf.fit(X_train, y_train)
y_test_prediction = rf.predict(X_test)
y_test_prediction

In [None]:
accuracy_score(y_test, y_test_prediction > 0.5)