In [330]:
import tensorflow as tf
import numpy as np
import pandas as pd

from sklearn.preprocessing import OrdinalEncoder
from sklearn.model_selection import train_test_split

seed = 1234
tf.random.set_seed(seed)
np.random.seed(seed)

In [331]:
data_positive = pd.read_csv("samples_positive.csv")
data_negative = pd.read_csv("samples_negative.csv")

In [332]:
data = pd.concat([data_positive, data_negative])
data.head()

Unnamed: 0,GPA,SAT Reading & Writing,SAT Math,SAT Essay,Activity Score,Personal Statement Score,Residency,Race,Gender,Accepted
0,4.0,751,709,17,80,80,International,Asian,Female,1
1,3.7,790,782,17,80,80,CA resident,Asian,Male,1
2,3.5,744,729,16,80,80,International,White,Male,1
3,4.0,759,737,17,80,80,CA resident,White,Female,1
4,4.0,707,759,18,80,80,CA resident,Asian,Female,1


In [333]:
obj_cols = data.select_dtypes(include=['object']).columns
processed_data = data.copy()
residency_ordinal_encoder = OrdinalEncoder()
processed_data[obj_cols] = residency_ordinal_encoder.fit_transform(data[obj_cols])

In [334]:
processed_data.head()

Unnamed: 0,GPA,SAT Reading & Writing,SAT Math,SAT Essay,Activity Score,Personal Statement Score,Residency,Race,Gender,Accepted
0,4.0,751,709,17,80,80,1.0,2.0,0.0,1
1,3.7,790,782,17,80,80,0.0,2.0,2.0,1
2,3.5,744,729,16,80,80,1.0,7.0,2.0,1
3,4.0,759,737,17,80,80,0.0,7.0,0.0,1
4,4.0,707,759,18,80,80,0.0,2.0,0.0,1


In [335]:
shuffled_data = processed_data.sample(frac=1, random_state=seed).reset_index(drop=True)

In [336]:
shuffled_data.head()

Unnamed: 0,GPA,SAT Reading & Writing,SAT Math,SAT Essay,Activity Score,Personal Statement Score,Residency,Race,Gender,Accepted
0,3.6,751,753,16,80,80,0.0,2.0,0.0,1
1,3.9,712,796,16,80,80,0.0,2.0,2.0,1
2,3.6,768,743,18,80,80,1.0,4.0,0.0,1
3,3.6,717,783,16,80,80,0.0,4.0,2.0,1
4,3.9,785,744,16,80,80,0.0,2.0,0.0,1


In [337]:
y = shuffled_data.pop("Accepted")
X = shuffled_data.copy()

In [338]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=seed)

In [339]:
def create_model():
    model = tf.keras.Sequential([
        tf.keras.layers.Dense(1, activation="sigmoid", input_shape=(9, ))
    ])
    model.compile(optimizer='sgd', loss='binary_crossentropy', metrics=['accuracy'])
    return model

In [340]:
model = create_model()

In [341]:
model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=50)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x1436e4a2ad0>

In [342]:
model.predict([[3.5, 710, 740, 18, 80, 80, 3.0, 5.0, 0.0], 
               [4.0, 780, 780, 18, 80, 80, 3.0, 5.0, 0.0]])



array([[1.],
       [1.]], dtype=float32)

In [343]:
model.predict([[2.0, 600, 600, 16, 100, 100, 3.0, 2.0, 0.0],
               [2.6, 700, 600, 13, 80, 80, 1.0, 2.0, 1.0]])



array([[0.],
       [0.]], dtype=float32)

In [344]:
model.save_weights("logistic_weights.h5")

In [345]:
model = create_model()

In [346]:
model.load_weights("logistic_weights.h5")

In [347]:
model.evaluate(X_test, y_test)



[46.73590850830078, 0.9375]