In [8]:
import pandas as pd
import numpy as np

## Standardising Data

In [9]:
# Function for standardising and unstandardising columns
def standardise_columns(df, cols):
    subset_df = df[cols]
    subset_df = 0.8 * ((subset_df - subset_df.min()) / (subset_df.max() - subset_df.min())) + 0.1
    return subset_df

def unstandardise_columns(df, cols, max_val, min_val):
    subset_df = df[cols]
    subset_df = ((subset_df - subset_df.min()) / 0.8) * (max_val - min_val) + min_val
    return subset_df

## Reading Data

In [10]:
my_data = pd.read_excel("River-Data-Lagged.xlsx")
my_data.drop(["Unnamed: 0"], axis=1, inplace=True)
my_data.head(10)

Unnamed: 0,Date,Skelton MDF (Cumecs),Crakehill MDF (t-1),Skip Bridge MDF (t-1),Westwick MDF (t-1),Skelton MDF (t-1),Crakehill MDF (t-2),Skip Bridge MDF (t-2),Westwick MDF (t-2),Skelton MDF (t-2),...,Malham Tarn DRT (t-1),Snaizeholme DRT (t-1),Arkengarthdale DRT (t-2),East Cowton DRT (t-2),Malham Tarn DRT (t-2),Snaizeholme DRT (t-2),Arkengarthdale DRT (t-3),East Cowton DRT (t-3),Malham Tarn DRT (t-3),Snaizeholme DRT (t-3)
0,1993-01-04,23.47,9.46,4.124,8.057,23.6,9.95,4.239,8.622,24.86,...,0.8,0.0,0.0,0.0,0.8,0.0,0.0,0.0,0.0,4.0
1,1993-01-05,60.7,9.41,4.363,7.925,23.47,9.46,4.124,8.057,23.6,...,0.8,61.6,0.0,0.0,0.8,0.0,0.0,0.0,0.8,0.0
2,1993-01-06,98.01,26.3,11.962,58.704,60.7,9.41,4.363,7.925,23.47,...,33.6,111.2,2.4,24.8,0.8,61.6,0.0,0.0,0.8,0.0
3,1993-01-07,56.99,32.1,10.237,34.416,98.01,26.3,11.962,58.704,60.7,...,1.6,0.8,11.2,5.6,33.6,111.2,2.4,24.8,0.8,61.6
4,1993-01-08,56.66,19.3,7.254,22.263,56.99,32.1,10.237,34.416,98.01,...,17.6,36.0,0.0,0.0,1.6,0.8,11.2,5.6,33.6,111.2
5,1993-01-09,78.1,22.0,7.266,29.587,56.66,19.3,7.254,22.263,56.99,...,1.6,2.4,5.6,4.0,17.6,36.0,0.0,0.0,1.6,0.8
6,1993-01-10,125.7,35.5,8.153,60.253,78.1,22.0,7.266,29.587,56.66,...,55.2,104.8,1.6,0.0,1.6,2.4,5.6,4.0,17.6,36.0
7,1993-01-11,195.9,51.0,13.276,93.951,125.7,35.5,8.153,60.253,78.1,...,76.0,136.8,14.4,0.8,55.2,104.8,1.6,0.0,1.6,2.4
8,1993-01-12,125.4,65.5,25.561,69.503,195.9,51.0,13.276,93.951,125.7,...,12.0,28.0,20.8,2.4,76.0,136.8,14.4,0.8,55.2,104.8
9,1993-01-13,161.5,32.0,20.715,40.514,125.4,65.5,25.561,69.503,195.9,...,0.8,24.0,10.4,16.0,12.0,28.0,20.8,2.4,76.0,136.8


In [11]:
target_cols = ["Skelton MDF (Cumecs)"]
flow_cols = [col for col in my_data.columns if "MDF (t" in col]
rain_cols = [col for col in my_data.columns if "DRT" in col]

In [12]:
feature_cols = flow_cols[:4] + rain_cols[:4]
feature_cols

['Crakehill MDF (t-1)',
 'Skip Bridge MDF (t-1)',
 'Westwick MDF (t-1)',
 'Skelton MDF (t-1)',
 'Arkengarthdale DRT (t-1)',
 'East Cowton DRT (t-1)',
 'Malham Tarn DRT (t-1)',
 'Snaizeholme DRT (t-1)']

In [13]:
training_df = my_data[target_cols + feature_cols]
training_df.head(10)

Unnamed: 0,Skelton MDF (Cumecs),Crakehill MDF (t-1),Skip Bridge MDF (t-1),Westwick MDF (t-1),Skelton MDF (t-1),Arkengarthdale DRT (t-1),East Cowton DRT (t-1),Malham Tarn DRT (t-1),Snaizeholme DRT (t-1)
0,23.47,9.46,4.124,8.057,23.6,0.0,0.0,0.8,0.0
1,60.7,9.41,4.363,7.925,23.47,2.4,24.8,0.8,61.6
2,98.01,26.3,11.962,58.704,60.7,11.2,5.6,33.6,111.2
3,56.99,32.1,10.237,34.416,98.01,0.0,0.0,1.6,0.8
4,56.66,19.3,7.254,22.263,56.99,5.6,4.0,17.6,36.0
5,78.1,22.0,7.266,29.587,56.66,1.6,0.0,1.6,2.4
6,125.7,35.5,8.153,60.253,78.1,14.4,0.8,55.2,104.8
7,195.9,51.0,13.276,93.951,125.7,20.8,2.4,76.0,136.8
8,125.4,65.5,25.561,69.503,195.9,10.4,16.0,12.0,28.0
9,161.5,32.0,20.715,40.514,125.4,7.2,4.0,0.8,24.0


In [14]:
std_training_df = standardise_columns(training_df, target_cols + feature_cols)
std_training_df.head(10)

Unnamed: 0,Skelton MDF (Cumecs),Crakehill MDF (t-1),Skip Bridge MDF (t-1),Westwick MDF (t-1),Skelton MDF (t-1),Arkengarthdale DRT (t-1),East Cowton DRT (t-1),Malham Tarn DRT (t-1),Snaizeholme DRT (t-1)
0,0.1356,0.127163,0.131519,0.113121,0.135834,0.1,0.1,0.10254,0.1
1,0.20262,0.12698,0.133932,0.112837,0.1356,0.108526,0.219807,0.10254,0.283333
2,0.269783,0.188979,0.210648,0.222008,0.20262,0.139787,0.127053,0.206667,0.430952
3,0.195941,0.210269,0.193233,0.169791,0.269783,0.1,0.1,0.105079,0.102381
4,0.195347,0.163283,0.163118,0.143663,0.195941,0.119893,0.119324,0.155873,0.207143
5,0.233942,0.173194,0.163239,0.159409,0.195347,0.105684,0.1,0.105079,0.107143
6,0.31963,0.222749,0.172194,0.225338,0.233942,0.151155,0.103865,0.275238,0.411905
7,0.446001,0.279646,0.223914,0.297786,0.31963,0.17389,0.111594,0.34127,0.507143
8,0.31909,0.332871,0.347939,0.245225,0.446001,0.136945,0.177295,0.138095,0.183333
9,0.384075,0.209902,0.299016,0.182901,0.31909,0.125577,0.119324,0.10254,0.171429


## Basic ANN Class

In [15]:
from sklearn.metrics import *

In [668]:
class BasicAnn:
    def __init__(self, layers):
        weight_shapes = [(layers[i],layers[i-1]) for i in range(1, len(layers))]
        self.weights = [np.random.standard_normal(s)/s[1]**0.5 for s in weight_shapes]
        self.biases = [np.random.randn(y,1) for y in layers[1:]]
    
    def train(self, features, targets):
        activations = [self.forward_pass(r) for r in features]
        self.backward_pass(activations, targets)
    
    def forward_pass(self, data):
        activation = data
        activations = []
        
        for w, b in zip(self.weights, self.biases):
            activation = self.activation(np.dot(w, activation) + b.flatten())
            activations.append(activation)

        return activations
    
    def backward_pass(self, activations, true_vals):
        for act, y_val in zip(activations, true_vals):
            output_err = y_val - act[-1]
            output_delta = output_err * self.activation_deriv(act[-1])
            new_delta = np.array([output_delta])
            self.weights[-1] += 0.1 * np.dot(new_delta, act[-1])
            self.biases[-1] += 0.1 * np.dot(new_delta, np.ones(act[-1].shape))
            print("\n\n\nNEW ACTIVATION\n")
            for i in range(len(act) - 2, -1, -1):
                w = self.weights[i+1]
                a = act[i]
                grd = np.array([self.activation_deriv(a)])
                print(w, grd, np.dot(grd, np.dot(w, new_delta)))
#                 new_delta = w * grd * new_delta
                new_delta = np.dot(new_delta, np.dot(w, grd))
                print(self.weights[i], self.biases[i], new_delta, sep="\n----\n", end="\n\n=====\n\n")
                self.weights[i] += 0.1 * np.dot(new_delta, a)
                self.biases[i] += 0.1 * np.dot(new_delta, np.ones(a.shape))
                print(self.weights[i], self.biases[i], new_delta, sep="\n----\n", end="\n\n=====\n\n")
            
            
    
    def activation(self, x, func_name="sigmoid"):
        if func_name == "sigmoid":
            return 1/(1+np.exp(-x))
    
    def activation_deriv(self, a, func_name="sigmoid"):
        if func_name == "sigmoid":
            return a * (1 - a)
    

In [669]:
# Utility Functions
def format_row(row, row_size):
    return row.to_numpy().reshape(row_size, 1)

In [670]:
myAnn = BasicAnn((8,3,2,1))

In [671]:
features = std_training_df[feature_cols]
targets = std_training_df[target_cols]
preds = myAnn.train(features.to_numpy(), targets.to_numpy())




NEW ACTIVATION



ValueError: shapes (1,2) and (1,1) not aligned: 2 (dim 1) != 1 (dim 0)

In [170]:
np_features = features.to_numpy()

In [572]:
np.array(np.array([[1,2,3]]))

array([[1, 2, 3]])