In [19]:
import pandas as pd
import torch
import numpy as np
from machine_learning import nn, preprocessing as pp
import requests
import io
import cufflinks as cf
cf.go_offline()
import logging 

logger = logging.getLogger(__name__)
logging.basicConfig()
logger.setLevel(logging.INFO)

In [2]:
%load_ext autoreload
%autoreload 2

In [6]:
breast_cancer = pd.read_csv(
    io.BytesIO(requests.get(
        "https://archive.ics.uci.edu/ml/machine-learning-databases/breast-cancer-wisconsin/breast-cancer-wisconsin.data",
        verify=False
    ).content),
    header=None, 
    names=[
        "id_number", 
        "clump_thickness", 
        'uniformity_cell_size', 
        'uniformity_cell_shape', 
        "marginal_adhesion",
        "single_epithelial_cell_size", 
        "bare_nuclei", 
        "bland_chromatin", "normal_nucleoli", "mitosis", "class"
    ]
).replace('?',np.NaN).astype('float', errors='ignore').dropna()






In [127]:
from machine_learning import validation
from scipy import stats

def score_func(y, yhat):
    return np.mean(np.argmax(y, axis=1) == yhat)

def run_classification_experiment(
    X, 
    y, 
    learning_rate_choices=np.linspace(.001, .01, 10), 
    hidden_layer_choices=list(range(3, 16, 3)),
    n_iter=10000, 
    conv_tol=.001
):
    kfold = validation.KFoldStratifiedCV(num_folds=5)
    accuracy_0h = []
    accuracy_1h = []
    accuracy_2h = []
    baseline = []

    split = 0 
    for train, test in kfold.split(X=X, y=y.reshape(-1, )):
        split += 1
        logger.info(f"CV Iteration: {split}")
        logger.info("Standardizing data")
        max_scaler = pp.Standardizer()
        X_train = max_scaler.fit_transform(X[train])
        X_test = max_scaler.transform(X[test])        
        y_train = pd.get_dummies(y[train]).values
        
        if split == 1:
            logger.info("Finding learning rate for H0")
            h0_callable = lambda lr: nn.SequentialNetwork(
                nn.LinearSigmoid(in_features=X_train.shape[1] + 1, out_features=y_train.shape[1]),
                convergence_tol=conv_tol,
                n_iter=n_iter,
                learning_rate=lr,
                batch_size=48
            )

            results = list(validation.GridSearchCV(
                model_callable=h0_callable, 
                param_grid={"lr": learning_rate_choices}, 
                scoring_func=score_func,
                cv_object=validation.KFoldCV(num_folds=3)
            ).get_cv_scores(X=X_train, y=y_train))
            
            best_model_h0 = h0_callable(**sorted(results, key=lambda x: x[-1], reverse=True)[0][0])
            logger.info(f"Results: {sorted(results, key=lambda x: x[-1], reverse=True)[0][0]}")

            logger.info("Finding topology and learning rate for H1")
            h1_callable = lambda h1, lr: nn.SequentialNetwork(
                nn.LinearSigmoid(in_features=X_train.shape[1], out_features=h1, bias=True),
                nn.LinearSigmoid(in_features=h1, out_features=y_train.shape[1], bias=True),
                convergence_tol=conv_tol,
                n_iter=n_iter,
                learning_rate=lr,
                batch_size=X_train.shape[1], 
                fit_intercept=False
            )

            results = list(validation.GridSearchCV(
                model_callable=h1_callable, 
                param_grid={'h1': hidden_layer_choices, "lr": learning_rate_choices}, 
                scoring_func=score_func, 
                cv_object=validation.KFoldCV(num_folds=3)
            ).get_cv_scores(X=X_train, y=y_train))

            best_model_h1 = h1_callable(**sorted(results, key=lambda x: x[-1], reverse=True)[0][0])
            logger.info(f"Results: {sorted(results, key=lambda x: x[-1], reverse=True)[0][0]}")
            
            logger.info("Finding topology and learning rate for H2")
            h2_callable = lambda h1, h2, lr: nn.SequentialNetwork(
                nn.LinearSigmoid(in_features=X_train.shape[1], out_features=h1, bias=True),
                nn.LinearSigmoid(in_features=h1, out_features=h2, bias=True),
                nn.LinearSigmoid(in_features=h2, out_features=y_train.shape[1], bias=True),
                convergence_tol=conv_tol,
                n_iter=n_iter,
                learning_rate=lr,
                batch_size=X_train.shape[1],
                fit_intercept=False
            )

            results = list(validation.GridSearchCV(
                model_callable=h2_callable, 
                param_grid={'h1': hidden_layer_choices, 'h2': hidden_layer_choices,  "lr": learning_rate_choices}, 
                scoring_func=score_func, 
                cv_object=validation.KFoldCV(num_folds=3)
            ).get_cv_scores(X=X_train, y=y_train))
            logger.info(f"Results: {sorted(results, key=lambda x: x[-1], reverse=True)[0][0]}")
            best_model_h2 = h2_callable(**sorted(results, key=lambda x: x[-1], reverse=True)[0][0])

        best_model_h0.fit(X_train, y_train)
        best_model_h1.fit(X_train, y_train)
        best_model_h2.fit(X_train, y_train)

        baseline.append(np.mean(stats.mode(y[train]).mode[0]  == y[test]))
        accuracy_0h.append(np.mean(best_model_h0.predict(X_test) == y[test]))
        accuracy_1h.append(np.mean(best_model_h1.predict(X_test) == y[test]))
        accuracy_2h.append(np.mean(best_model_h2.predict(X_test) == y[test]))
    return {
        "models": {
            'h0': best_model_h0, 
            'h1': best_model_h1, 
            'h2': best_model_h2, 
        }, 
        'accuracy': {
            'h0': accuracy_0h, 
            'h1': accuracy_1h, 
            'h2': accuracy_2h, 
            'baseline': baseline
        }
    }

In [24]:
X, y = (
    breast_cancer.drop(['id_number', 'class'], axis=1).values,
    breast_cancer['class'].astype('category').cat.codes.values.reshape(-1, )
)
np.random.seed(73)
breast_cancer_results = run_classification_experiment(X=X, y=y)

INFO:__main__:CV Iteration: 1
INFO:__main__:Standardizing data
INFO:__main__:Finding learning rate for H0

overflow encountered in exp

INFO:__main__:Finding topology and learning rate for H1
INFO:__main__:Results: {'h1': 3}
INFO:__main__:Finding topology and learning rate for H2
INFO:__main__:Results: {'h1': 8, 'h2': 6}
INFO:__main__:CV Iteration: 2
INFO:__main__:Standardizing data
INFO:__main__:CV Iteration: 3
INFO:__main__:Standardizing data
INFO:__main__:CV Iteration: 4
INFO:__main__:Standardizing data
INFO:__main__:CV Iteration: 5
INFO:__main__:Standardizing data


In [64]:
from toolz import pipe

# Next, we repeat this process on the Soybean data
soybean_data = pipe(
    pd.read_csv(
         io.BytesIO(requests.get(
                "https://archive.ics.uci.edu/ml/machine-learning-databases/soybean/soybean-small.data",
        verify=False
        ).content),
        header=None,
        names=[
            "date",
            "plant-stand",
            "precip",
            "temp",
            "hail",
            "crop-hist",
            "area-damaged",
            "severity",
            "seed-tmt",
            "germination",
            "plant-growth",
            "leaves",
            "leafspots-halo",
            "leafspots-marg",
            "leafspot-size",
            "leaf-shread",
            "leaf-malf",
            "leaf-mild",
            "stem",
            "lodging",
            "stem-cankers",
            "canker-lesion",
            "fruiting-bodies",
            "external decay",
            "mycelium",
            "int-discolor",
            "sclerotia",
            "fruit-pods",
            "fruit spots",
            "seed",
            "mold-growth",
            "seed-discolor",
            "seed-size",
            "shriveling",
            "roots",
            "instance_class",
        ],
    )
    .pipe(lambda df: df.loc(axis=1)[df.nunique() > 1])  # drop columns with no variance
    .assign(instance_class=lambda df: df["instance_class"].astype("category").cat.codes)
)

X, y = (
    pd.get_dummies(
        soybean_data.drop('instance_class', axis=1),
        columns=soybean_data.drop('instance_class', axis=1).columns, 
        drop_first=True
    ).values, 
    soybean_data['instance_class'].values
)


soybean_results = run_classification_experiment(
    X, 
    y=y, 
    learning_rate_choices=np.linspace(.00001, .0001, 5), 
    hidden_layer_choices=list(range(5, 11, 2)), 
)



INFO:__main__:CV Iteration: 1
INFO:__main__:Standardizing data
INFO:__main__:Finding learning rate for H0
INFO:__main__:Finding topology and learning rate for H1
INFO:__main__:Results: {'h1': 5, 'lr': 7.75e-05}
INFO:__main__:Finding topology and learning rate for H2
INFO:__main__:Results: {'h1': 5, 'h2': 5, 'lr': 0.0001}
INFO:__main__:CV Iteration: 2
INFO:__main__:Standardizing data
INFO:__main__:CV Iteration: 3
INFO:__main__:Standardizing data
INFO:__main__:CV Iteration: 4
INFO:__main__:Standardizing data
INFO:__main__:CV Iteration: 5
INFO:__main__:Standardizing data


In [67]:
soybean_results

{'models': {'h0': <machine_learning.nn.SequentialNetwork at 0x130fe2610>,
  'h1': <machine_learning.nn.SequentialNetwork at 0x131b5fc10>,
  'h2': <machine_learning.nn.SequentialNetwork at 0x130e8b410>},
 'accuracy': {'h0': [0.6666666666666666, 1.0, 1.0, 1.0, 1.0],
  'h1': [0.2222222222222222, 0.3333333333333333, 0.3333333333333333, 0.4, 0.4],
  'h2': [0.3333333333333333, 0.3333333333333333, 0.3333333333333333, 0.4, 0.4],
  'baseline': [0.3333333333333333,
   0.3333333333333333,
   0.3333333333333333,
   0.4,
   0.4]}}

In [60]:
pd.Series(soybean_results['models']['h2'].loss).rolling(10).mean().iplot()

In [128]:
iris_data = pd.read_csv(
    "https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data", 
    header=None, names=['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'class']
)

X, y = (
    iris_data.drop(['class'], axis=1).values, 
    iris_data['class'].astype('category').cat.codes.values
)

In [136]:
iris_results = run_classification_experiment(
    X=X, 
    y=y, 
    learning_rate_choices=list(np.linspace(.001, 1,  10)), 
    hidden_layer_choices=[4], 
    n_iter=15000, 
    conv_tol=.01
)

INFO:__main__:CV Iteration: 1
INFO:__main__:Standardizing data
INFO:__main__:Finding learning rate for H0
INFO:__main__:Results: {'lr': 0.001}
INFO:__main__:Finding topology and learning rate for H1
INFO:__main__:Results: {'h1': 4, 'lr': 0.778}
INFO:__main__:Finding topology and learning rate for H2
INFO:__main__:Results: {'h1': 4, 'h2': 4, 'lr': 0.334}
INFO:__main__:CV Iteration: 2
INFO:__main__:Standardizing data
INFO:__main__:CV Iteration: 3
INFO:__main__:Standardizing data
INFO:__main__:CV Iteration: 4
INFO:__main__:Standardizing data
INFO:__main__:CV Iteration: 5
INFO:__main__:Standardizing data


In [134]:
iris_results

{'models': {'h0': <machine_learning.nn.SequentialNetwork at 0x131e01150>,
  'h1': <machine_learning.nn.SequentialNetwork at 0x131e01190>,
  'h2': <machine_learning.nn.SequentialNetwork at 0x131e01510>},
 'accuracy': {'h0': [0.7,
   0.7666666666666667,
   0.8333333333333334,
   0.7666666666666667,
   0.8],
  'h1': [0.6666666666666666,
   0.6666666666666666,
   0.6666666666666666,
   0.6666666666666666,
   0.6666666666666666],
  'h2': [0.3333333333333333,
   0.3333333333333333,
   0.3333333333333333,
   0.3333333333333333,
   0.3333333333333333],
  'baseline': [0.3333333333333333,
   0.3333333333333333,
   0.3333333333333333,
   0.3333333333333333,
   0.3333333333333333]}}

In [124]:
pd.Series(iris_results['models']['h2'].loss).rolling(30).mean().iplot()

In [123]:
pd.Series(iris_results['models']['h1'].loss).rolling(15).mean().iplot()

In [73]:
accuracy_2h, accuracy_1h, accuracy_0h, baseline

NameError: name 'accuracy_2h' is not defined

In [1001]:
pd.Series(best_model_h0.loss).rolling(1).mean().iplot(title="h0")

In [1012]:
best_model_h0.predict_prob(X_test), y_test

(array([[0.52103289, 0.52236394, 0.51456731, 0.51771818],
        [0.52380729, 0.52995846, 0.52693475, 0.53379744],
        [0.52036909, 0.52889703, 0.53345721, 0.52832498],
        [0.51847209, 0.51994136, 0.53528947, 0.53274468],
        [0.49762852, 0.49230664, 0.4871199 , 0.49006398],
        [0.49340618, 0.49110861, 0.49480332, 0.47611193],
        [0.42943568, 0.42620195, 0.41750477, 0.42539792],
        [0.46472245, 0.45902864, 0.4452639 , 0.45526354],
        [0.47360963, 0.45370491, 0.4479307 , 0.46190808],
        [0.43745429, 0.42157358, 0.40992531, 0.43294122]]),
 array([[1, 0, 0, 0],
        [1, 0, 0, 0],
        [0, 1, 0, 0],
        [0, 1, 0, 0],
        [0, 0, 1, 0],
        [0, 0, 1, 0],
        [0, 0, 0, 1],
        [0, 0, 0, 1],
        [0, 0, 0, 1],
        [0, 0, 0, 1]], dtype=uint8))

In [952]:
from machine_learning import validation 
from scipy import stats

glass_data = pd.read_csv(
    io.BytesIO(
        requests.get(
            "https://archive.ics.uci.edu/ml/machine-learning-databases/glass/glass.data", 
            verify=False
        ).content),
    header=None,
    names=[
        "id_number",
        "refractive_index",
        "sodium",
        "magnesium",
        "aluminum",
        "silicon",
        "potassium",
        "calcium",
        "barium",
        "iron",
        "class",
    ],
)

X, y = (
    glass_data.drop(['id_number', 'class'], axis=1).values, 
    glass_data['class'].astype('category').cat.codes.values
)

def score_func(y, yhat):
    return np.mean(np.argmax(y, axis=1) == yhat)

kfold = validation.KFoldStratifiedCV(num_folds=5)
accuracy_0h = []
accuracy_1h = []
accuracy_2h = []
baseline = []

for train, test in kfold.split(X=X, y=y.reshape(-1, )):
    max_scaler = pp.MaxScaler()
    X_train = max_scaler.fit_transform(X[train])
    X_test = max_scaler.transform(X[test])
    
    y_train = pd.get_dummies(y[train]).values
    y_test = pd.get_dummies(y[test]).values
    
    h2_callable = lambda h1, h2, lr: nn.SequentialNetwork(
        nn.LinearSigmoid(in_features=X.shape[1] + 1, out_features=h1),
        nn.LinearSigmoid(in_features=h1, out_features=h2),
        nn.LinearSigmoid(in_features=h2, out_features=y_train.shape[1]),
        learning_rate=lr, 
        convergence_tol=.01, 
        n_iter=2000,
        batch_size=24
    )

    results = list(validation.GridSearchCV(
        model_callable=h2_callable, 
        param_grid={'h1': list(range(5, 7, 1)), 'h2': list(range(3, 7)), 'lr':np.linspace(.005, .02, 15)}, 
        scoring_func=score_func, 
        cv_object=validation.KFoldCV(num_folds=3)
    ).get_cv_scores(X=X_train, y=y_train))
    
    print("Found best model for H2")
    print(sorted(results, key=lambda x: x[-1], reverse=True)[0][0])
    best_model_h2 = h2_callable(**sorted(results, key=lambda x: x[-1], reverse=True)[0][0])
    best_model_h2.fit(X_train, y_train)
  
    
    h0_callable = lambda lr: nn.SequentialNetwork(
        nn.LinearSigmoid(in_features=X_train.shape[1] + 1, out_features=y_train.shape[1]),
        learning_rate=lr, 
        convergence_tol=.0001, 
        n_iter=500,
        batch_size=24
    )
    
    results = list(validation.GridSearchCV(
        model_callable=h0_callable, 
        param_grid={"lr":np.linspace(.005, .02, 15)}, 
        scoring_func=score_func,
        cv_object=validation.KFoldCV(num_folds=3)
    ).get_cv_scores(X=X_train, y=y_train))
    
    print(sorted(results, key=lambda x: x[-1], reverse=True)[0][0])
    best_model_h0 = h0_callable(**sorted(results, key=lambda x: x[-1], reverse=True)[0][0])
    

    h1_callable = lambda h1, lr: nn.SequentialNetwork(
        nn.LinearSigmoid(in_features=X.shape[1] + 1, out_features=h1),
        nn.LinearSigmoid(in_features=h1, out_features=y_train.shape[1]),
        learning_rate=lr, 
        convergence_tol=.001, 
        n_iter=700,
        batch_size=48
    )

    
    results = list(validation.GridSearchCV(
        model_callable=h1_callable, 
        param_grid={'h1': list(range(8, 15, 1)), "lr":np.linspace(.005, .02, 15)}, 
        scoring_func=score_func,
        cv_object=validation.KFoldCV(num_folds=3)
    ).get_cv_scores(X=X_train, y=y_train))
    
    print(sorted(results, key=lambda x: x[-1], reverse=True)[0][0])
    best_model_h1 = h1_callable(**sorted(results, key=lambda x: x[-1], reverse=True)[0][0])

    
    best_model_h1.fit(X, y)
    best_model_h0.fit(X, y)
    pd.Series(best_model_h1.loss).rolling(50).mean().iplot(title="h1")
    pd.Series(best_model_h0.loss).rolling(15).mean().iplot(title="h0")
    pd.Series(best_model_h2.loss).rolling(50).mean().iplot(title="h2")

    print(best_model_h0.predict(X_test))
    print(best_model_h1.predict(X_test))
    print(best_model_h2.predict(X_test))
    accuracy_1h.append(np.mean(best_model_h1.predict(X_test) == np.argmax(y_test, axis=1)))
    baseline.append(np.mean(stats.mode(y[train]).mode[0]  == np.argmax(y_test, axis=1)))
    accuracy_0h.append(np.mean(best_model_h1.predict(X_test) == np.argmax(y_test, axis=1))) 
    accuracy_2h.append(np.mean(best_model_h2.predict(X_test) == np.argmax(y_test, axis=1)))
    print(list(map(np.mean, (accuracy_0h, accuracy_1h, accuracy_2h, baseline))))
    

SyntaxError: invalid syntax (<ipython-input-952-1985c8792281>, line 81)

In [None]:
list(map(np.mean, (accuracy_0h, accuracy_1h, accuracy_2h, baseline)))

In [39]:
import cufflinks
cufflinks.go_offline()
pd.DataFrame(h0_model.predict_prob(X[test])).iplot('box')

In [23]:
y[test]

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 4, 4, 5, 5, 5, 5, 5,
       5], dtype=int8)

In [166]:
from toolz import pipe
breast_net(pipe(breast_X, lambda X: np.concatenate([np.ones((X.shape[0], 1)), X], axis=1)))

array([[0.37530303],
       [0.37530303],
       [0.37530303],
       [0.37530303],
       [0.37530303],
       [0.37530303],
       [0.37530303],
       [0.37530303],
       [0.37530303],
       [0.37530303],
       [0.37530303],
       [0.37530303],
       [0.37530303],
       [0.37530303],
       [0.37530303],
       [0.37530303],
       [0.37530303],
       [0.37530303],
       [0.37530302],
       [0.37530303],
       [0.37530303],
       [0.37530303],
       [0.37530303],
       [0.37530303],
       [0.37530303],
       [0.37530303],
       [0.37530303],
       [0.37530303],
       [0.37530303],
       [0.37530303],
       [0.37530303],
       [0.37530302],
       [0.37530303],
       [0.37530303],
       [0.37530303],
       [0.37530303],
       [0.37530302],
       [0.37530303],
       [0.37530303],
       [0.37530303],
       [0.37530302],
       [0.37530302],
       [0.37530303],
       [0.37530303],
       [0.37530303],
       [0.37530303],
       [0.37530303],
       [0.375

In [233]:
new_X = np.concatenate([np.ones((breast_X.shape[0], 1)), breast_X], axis=1)


mynet = nn.SequentialNetwork(
    nn.LinearSigmoid(10, 2), 
    nn.LinearSigmoid(2, 2), 
    fit_intercept=False,
    **{'learning_rate':.01, 'convergence_tol':.01, 'n_iter':10, 'batch_size':new_X.shape[0]}
)


net = torch.nn.Sequential(
    torch.nn.Linear(10, 2, bias=False), 
    torch.nn.Sigmoid(), 
    torch.nn.Linear(2, 2, bias=False), 
    torch.nn.Sigmoid()
)
net[0].weight = torch.nn.Parameter(torch.tensor(mynet.modules[0].weight.T).type(torch.FloatTensor))
net[2].weight = torch.nn.Parameter(torch.tensor(mynet.modules[1].weight.T).type(torch.FloatTensor))


preds = net(torch.from_numpy(new_X).type(torch.FloatTensor))
loss = torch.nn.CrossEntropyLoss()(preds, torch.tensor(breast_y.reshape(-1, )).type(torch.LongTensor))
loss.backward()
    

In [234]:
net[2].weight.grad.T

tensor([[-0.0188,  0.0188],
        [-0.0188,  0.0188]])

In [236]:
mynet(new_X)
dl = mynet.get_delta_list(pd.get_dummies(breast_y.reshape(-1, )).values)
grads = mynet.get_gradient_updates(delta_list=dl)

[[1 0]
 [1 0]
 [1 0]
 ...
 [0 1]
 [0 1]
 [0 1]]
[[ 0.07504696 -0.07498992]
 [ 0.07522243 -0.07516513]]
[[ 1.77466924e-04  2.21260884e-04]
 [-3.53537791e-05 -4.39871456e-05]
 [-8.65656746e-05 -1.07815190e-04]
 [-8.20477525e-05 -1.02185118e-04]
 [-6.43554783e-05 -8.01467711e-05]
 [-2.95944603e-05 -3.68288696e-05]
 [-1.06869008e-04 -1.33106860e-04]
 [-4.40530933e-05 -5.48411527e-05]
 [-7.33185842e-05 -9.13144851e-05]
 [-1.30992646e-05 -1.62998453e-05]]


In [226]:
mynet.fit(new_X, pd.get_dummies(breast_y.reshape(-1, )).values)

[[1 0]
 [1 0]
 [1 0]
 ...
 [0 1]
 [0 1]
 [0 1]]
[[ 0.07513348 -0.07469494]
 [ 0.0753537  -0.0749113 ]]
[[-1.08477011e-04  1.39915472e-04]
 [ 2.03224077e-05 -2.69959300e-05]
 [ 5.13073506e-05 -6.71549619e-05]
 [ 4.85839258e-05 -6.36188607e-05]
 [ 3.80456742e-05 -4.98586231e-05]
 [ 1.71090459e-05 -2.26646432e-05]
 [ 6.34093615e-05 -8.29525619e-05]
 [ 2.57417990e-05 -3.39241888e-05]
 [ 4.34160307e-05 -5.68506666e-05]
 [ 7.53875694e-06 -1.00089719e-05]]
[[1 0]
 [1 0]
 [1 0]
 ...
 [0 1]
 [0 1]
 [0 1]]
[[ 0.07503967 -0.07460168]
 [ 0.07525927 -0.07481742]]
[[-5.22180191e-05  1.96027219e-04]
 [ 9.32039724e-06 -3.81679911e-05]
 [ 2.41215266e-05 -9.45178925e-05]
 [ 2.28242120e-05 -8.95534616e-05]
 [ 1.78502783e-05 -7.02007415e-05]
 [ 7.88381305e-06 -3.20173003e-05]
 [ 2.98357974e-05 -1.16733898e-04]
 [ 1.19657682e-05 -4.78474320e-05]
 [ 2.03966414e-05 -8.00260046e-05]
 [ 3.46088110e-06 -1.41486677e-05]]
[[1 0]
 [1 0]
 [1 0]
 ...
 [0 1]
 [0 1]
 [0 1]]
[[ 0.07494602 -0.07450858]
 [ 0.075165   -0.

In [201]:
list(map(lambda x: x.grad, net.parameters()))

[tensor([[-0.0004, -0.0036, -0.0033, -0.0024, -0.0005, -0.0045, -0.0012, -0.0030,
          -0.0001],
         [-0.0003, -0.0019, -0.0018, -0.0013, -0.0003, -0.0024, -0.0007, -0.0016,
          -0.0001]]),
 tensor([0.0132, 0.0065]),
 tensor([[-0.0210, -0.0205],
         [ 0.0213,  0.0208]])]

In [6]:
se

tensor([[-0.1552, -0.1552]])

In [34]:
- 2 *  (1. - lin_op(x)) * (lin_op(x)) * (1 - lin_op(x))

tensor([-0.2681], grad_fn=<MulBackward0>)

In [229]:
mylist = []

In [231]:
for i in range(10, 0, -1): 
    if i == 10:
        mylist.append(i)
    else:
        mylist.append(mylist[-1])
    

In [137]:
x_input = np.random.normal(size=(10, 2))

lin_op1 = nn.LinearSigmoid(2, 2)
lin_op2 = nn.LinearSigmoid(2, 3)
lin_op3 = nn.LinearSigmoid(3, 1)

mynet = nn.SequentialNetwork(lin_op1, lin_op2, lin_op3, learning_rate=1, n_iter=10, batch_size=10, convergence_tol=1)

net = torch.nn.Sequential(
    torch.nn.Linear(2, 2, bias=False), 
    torch.nn.Sigmoid(), 
    torch.nn.Linear(2, 3, bias=False), 
    torch.nn.Sigmoid(), 
    torch.nn.Linear(3, 1, bias=False), 
    torch.nn.Sigmoid()
)
net[0].weight = torch.nn.Parameter(torch.from_numpy(lin_op1.weight.T).type(torch.FloatTensor))
net[2].weight = torch.nn.Parameter(torch.from_numpy(lin_op2.weight.T).type(torch.FloatTensor))
net[4].weight = torch.nn.Parameter(torch.from_numpy(lin_op3.weight.T).type(torch.FloatTensor))


loss = torch.nn.MSELoss()(net(torch.tensor(x_input).type(torch.FloatTensor)), torch.tensor(1.))
loss.backward()
mynet(x_input)
delta_list = mynet.get_delta_list(target=1.)
mynet.get_gradient_updates(delta_list=delta_list)

[[-0.1247885 ]
 [-0.12506796]
 [-0.12493251]]
[[ 1.84397052e-05 -1.21614702e-04  6.54764955e-05]
 [ 1.83991091e-05 -1.21346960e-04  6.53323449e-05]]
[[ 1.13283775e-08 -5.34446150e-09]
 [ 2.94264359e-07 -1.38795894e-07]]


[array([[-0.1247885 ],
        [-0.12506796],
        [-0.12493251]]),
 array([[ 1.84397052e-05, -1.21614702e-04,  6.54764955e-05],
        [ 1.83991091e-05, -1.21346960e-04,  6.53323449e-05]]),
 array([[ 1.13283775e-08, -5.34446150e-09],
        [ 2.94264359e-07, -1.38795894e-07]])]

In [329]:
list(map(lambda x: x.grad, net.parameters()))

[tensor([[8.4080e-07, 5.7588e-07],
         [5.4104e-07, 3.7057e-07]]),
 tensor([[ 0.0001,  0.0001],
         [ 0.0003,  0.0003],
         [-0.0002, -0.0002]]),
 tensor([[-0.1255, -0.1255, -0.1252]])]

In [283]:
list(net.parameters())

[Parameter containing:
 tensor([[ 0.0081, -0.0033],
         [-0.0043,  0.0093]], requires_grad=True),
 Parameter containing:
 tensor([[0.1321, 0.1250]], requires_grad=True)]

In [225]:
mynet.modules

(<__main__.LinearSigmoid at 0x120ce6750>,
 <__main__.LinearSigmoid at 0x120ce6610>)

In [217]:
(grad_accum @ lin_op2.weight.T)

array([[0.00039786, 0.0011715 ]])

In [190]:
x_input = np.random.normal(size=(1, 2))

lin_op1 = LinearSigmoid(2, 2)
lin_op2 = LinearSigmoid(2, 1)

prev_output= lin_op2(x_input)

mynet = Sequential(lin_op1, lin_op2)
net = torch.nn.Sequential(
    torch.nn.Linear(2, 1, bias=False), 
    torch.nn.Sigmoid()
)
net[0].weight = torch.nn.Parameter(torch.from_numpy(lin_op2.weight.T).type(torch.FloatTensor))
loss = torch.nn.MSELoss()(net(torch.tensor(x_input).type(torch.FloatTensor)), torch.tensor(1.))
loss.backward()

np.allclose(net[0].weight.grad, lin_op2.last_layer_gradient(output=prev_output, target=1.) @ x_input)

  return F.mse_loss(input, target, reduction=self.reduction)


True

In [296]:
x_input = np.random.normal(size=(1, 2))

lin_op1 = LinearSigmoid(2, 2)
lin_op2 = LinearSigmoid(2, 1)

mynet = Sequential(lin_op1, lin_op2)

net = torch.nn.Sequential(
    torch.nn.Linear(2, 2, bias=False), 
    torch.nn.Sigmoid(), 
    torch.nn.Linear(2, 1, bias=False), 
    torch.nn.Sigmoid()
)
net[0].weight = torch.nn.Parameter(torch.from_numpy(lin_op1.weight.T).type(torch.FloatTensor))
net[2].weight = torch.nn.Parameter(torch.from_numpy(lin_op2.weight.T).type(torch.FloatTensor))

loss = torch.nn.MSELoss()(net(torch.tensor(x_input).type(torch.FloatTensor)), torch.tensor(1.))

loss.backward()


prev_output = mynet(x_input)

assert np.allclose(
    net[2].weight.grad, 
    lin_op2.get_last_layer_gradient(output=prev_output, target=1.) @ lin_op2.prev_input
)

grad_accum = lin_op2.get_last_layer_gradient(output=prev_output, target=1.)

np.allclose(net[0].weight.grad, lin_op1.gradient_update(
    grad_accumulated=grad_accum, prev_weights=lin_op2.weight
)  @ lin_op1.prev_input)

True

In [77]:
grad2 = lin_op2.gradient_update(grad_accumulated=-2 * (1 - mynet(x_input)))
grad2 @ x_input

O: (1, 1)
grad_accum: (1, 1)
weights: (2, 1)
prev_input: (1, 2)
delta: (2, 1)


array([[-0.00074726, -0.00108958],
       [-0.00041937, -0.00061149]])

In [81]:
lin_op1.gradient_update(
    grad_accumulated=grad2
) @ x_input.T

O: (1, 2)
grad_accum: (2, 1)
weights: (2, 2)
prev_input: (1, 2)
delta: (2, 2)


array([[-1.41792810e-06],
       [ 2.79193848e-07]])

In [167]:
X = np.ones((1, 2))
lin_op2(X) @ (1 - lin_op2(X).T)

array([[0.24999933]])

In [123]:
lin_op.weight

array([[0.00982391],
       [0.00421188]])

In [35]:
        


class NeuralNetwork:
    def __init__(self, n_inputs, hidden_layer_size_mapping):
        self.n_inputs = n_inputs
        self.n_layers = n_layers 
        self.hidden_layer_size_mapping = hidden_layer_size_mapping
        
        self.layers = {
            layer: np.random.uniform(low=-.01, high=.01, size=size)
            for layer, size in self.hidden_layer_size_mapping.items()
        }
        
    def get_output_layer_grad(self, X, y):
        self.layers[self.n_layers]
        