In [1]:
%%capture
!pip install category-encoders

In [2]:
import numpy as np #turning lists into arrays
import seaborn as sns #data loader
import pandas as pd #manipulate data
import category_encoders as ce #encoding
from sklearn.model_selection import train_test_split # data splitting
from sklearn.preprocessing import MinMaxScaler # normalization
from sklearn.impute import SimpleImputer # fill NaN values
from sklearn.pipeline import make_pipeline # creating a pipeline
from sklearn.metrics import accuracy_score #scoring metric

In [3]:
df = sns.load_dataset("titanic")

# drop uneeded columns
df = df.drop(['alive', 'adult_male', 'who', 'deck'], axis=1)

# pipe
pipe = make_pipeline(
    ce.OrdinalEncoder(), #encode categoricals
    SimpleImputer(strategy="median"), #fill the NaN's
    MinMaxScaler() # change values to be represented between 0 and 1
)

# split features and targets
target = 'survived'
X = df.drop(target, axis=1)
y = df[target]
print(f"X shape: {X.shape}\ny shape: {y.shape}")

X shape: (891, 10)
y shape: (891,)


In [5]:
type(X)

pandas.core.frame.DataFrame

In [4]:
type(y)

pandas.core.series.Series

In [None]:
# apply pipe
X = pipe.fit_transform(X)
print(f"""X Shape: {X.shape}
""")

X Shape: (891, 10)



In [6]:
type(X)

pandas.core.frame.DataFrame

In [None]:
# cast y as np.array for modeling
y = np.array(y)

# reshape into a column vector, do the same for y_test if you want to evaluate on test set
# https://stackoverflow.com/questions/17428621/python-differentiating-between-row-and-column-vectors
y = y.reshape((-1,1))
y

array([[0],
       [1],
       [1],
       [1],
       [0],
       [0],
       [0],
       [0],
       [1],
       [1],
       [1],
       [1],
       [0],
       [0],
       [0],
       [1],
       [0],
       [1],
       [0],
       [1],
       [0],
       [1],
       [1],
       [1],
       [0],
       [1],
       [0],
       [0],
       [1],
       [0],
       [0],
       [1],
       [1],
       [0],
       [0],
       [0],
       [1],
       [0],
       [0],
       [1],
       [0],
       [0],
       [0],
       [1],
       [1],
       [0],
       [0],
       [1],
       [0],
       [0],
       [0],
       [0],
       [1],
       [1],
       [0],
       [1],
       [1],
       [0],
       [1],
       [0],
       [0],
       [1],
       [0],
       [0],
       [0],
       [1],
       [1],
       [0],
       [1],
       [0],
       [0],
       [0],
       [0],
       [0],
       [1],
       [0],
       [0],
       [0],
       [1],
       [1],
       [0],
       [1],
       [1],
    

In [None]:
# Set the random seed for reproducible work
np.random.seed(12)

In [None]:

def sigmoid(x):
    return 1 / (1 + np.exp(-x))


def sigmoid_derivative(x):
    sx = sigmoid(x)
    return sx * (1-sx)

In [None]:
weights = 2 * np.random.random((10,1)) - 1

In [None]:
(len(df.columns) - 1)

10

In [None]:
weights

array([[-0.69167432],
       [ 0.48009939],
       [-0.47336997],
       [ 0.06747879],
       [-0.97085008],
       [ 0.83749402],
       [ 0.80142971],
       [-0.93315714],
       [ 0.91389867],
       [-0.72558136]])

In [None]:
weighted_sum = np.dot(X, weights)
weighted_sum.shape

(891, 1)

In [None]:
activated_output = sigmoid(weighted_sum)
activated_output.shape

(891, 1)

In [None]:
error = y - activated_output
error.shape

(891, 1)

In [None]:
adjustments = error * sigmoid_derivative(weighted_sum)
adjustments.shape

(891, 1)

In [None]:
weights += np.dot(X.T, adjustments)
weights.shape

(10, 1)

In [None]:
# Steps we've already done: 
# 1. Randomly Initialized Weights already. Those are in memory as `weights`
# 2. We've already got input data & correct_outputs


# Update our weights 10,000 times - (fingers crossed that this process reduces error)
for iteration in range(10000):
    
    # Weighted sum of inputs / weights
    weighted_sum = np.dot(X, weights)
    
    # Activate!
    activated_output = sigmoid(weighted_sum)
    
    # Cac error
    error = y - activated_output
    
    adjustments = error * sigmoid_derivative(weighted_sum)
    
    # Update the Weights
    weights += np.dot(X.T, adjustments)
    
print("Weights after training")
print(weights)

print("Output after training")
print(activated_output)

Weights after training
[[-26.03388209]
 [ 28.94938609]
 [ -2.12666625]
 [  3.81209402]
 [  0.83857745]
 [  0.46542653]
 [  0.25155367]
 [-26.27536492]
 [  0.36402263]
 [-24.98523025]]
Output after training
[[1.74475176e-23]
 [1.00000000e+00]
 [5.13145873e-22]
 [1.00000000e+00]
 [1.07963812e-34]
 [1.96294602e-34]
 [3.52913752e-12]
 [8.99285697e-23]
 [4.70159109e-11]
 [9.58547760e-01]
 [1.22356073e-10]
 [9.20526985e-01]
 [1.61200753e-34]
 [2.27720936e-23]
 [7.07105138e-22]
 [5.44081037e-11]
 [2.19911386e-22]
 [2.98731687e-23]
 [5.17702453e-11]
 [5.96851945e-22]
 [2.50708345e-23]
 [2.54475367e-23]
 [1.03794587e-21]
 [6.96573599e-12]
 [2.86299624e-10]
 [8.74202215e-11]
 [1.59701127e-34]
 [8.10222871e-01]
 [7.33227112e-22]
 [1.30154395e-34]
 [6.16226752e-12]
 [1.00000000e+00]
 [7.33141057e-22]
 [1.07961733e-23]
 [5.04828462e-01]
 [3.57227697e-01]
 [1.59701736e-34]
 [1.56949936e-34]
 [1.18005882e-10]
 [9.95012033e-11]
 [4.03890363e-11]
 [9.29565745e-01]
 [1.59798477e-34]
 [9.76450466e-01]
 [

In [None]:
# pull values out of lists and append into a single list with a for loop
pred = []
for x in activated_output:
  for x in x:
    pred.append(int(round(x,1)))

In [None]:
pred

[0,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,


In [None]:
# Uh-oh 😧, accuracy score cant handle floats so outputs must be changed to ints using a threshold 
accuracy_score(y, [x for x in activated_output])

ValueError: ignored

In [None]:
# Change values to 1 or 0 depending on the threshold (0.5) with a single nested list comprehension if not using for loop
y_pred = [1 if x > 0.5 else 0 for x in [x for x in activated_output]]

In [None]:
accuracy_score(y, y_pred)

0.7542087542087542