In [None]:
# Lets start with a simple fitting problem - finding the best fit line for a set of x,y points.
# And we will compare the results using scipy and those obtained with machine learning, 
# and start noticing some of the differences
x = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
m = 2
b = -3
y = [m*k + b for k in x]  # Calculate the y for a given x using our m,b.
x, y

In [None]:
# Now lets add a little noise to the data
import numpy as np
import numpy.random as random
import math
import time

time.time()
random.seed(int(time.time()))

mu, sigma = 0, 0.15

# Noise that is normally distributed around 0.0 with sigma of 0.15
dy = random.default_rng().normal(mu, sigma, size=len(y))
y = y + dy
y, dy

In [None]:
# Lets first look at our data
%matplotlib inline
import matplotlib.pyplot as plt
plt.scatter(x,y)
plt.show()

In [None]:
# Now we'll fit this using the scipy way - effectively it's a least squares fit
from scipy import optimize

def fnc(xx, m, b):
    return m*xx + b

res = optimize.curve_fit(fnc, x, y, p0=(1,1), full_output=False)
res

In [None]:
# this tells us our slope is around 2, and our intercept is around -3 as expected.
# Plotting these results, along with the orginal data, gives this

plt.scatter(x,y)
m0,b0 = res[0]   # Use the slope and the intercept that we calculated
xout = [0, 5, 11]

def f(ecks):
    return fnc(ecks, m0, b0)

yout = list(map(f, xout))
plt.plot(xout, yout, linestyle='dashed', color='orange')
plt.show()

In [None]:
# Now lets do this the "ML way". 
import tensorflow as tf

(x_train, y_train) = np.array(x), np.array(y)

model = tf.keras.Sequential([
    tf.keras.layers.Dense(units=1, input_shape=(1,))
])

# Use mean_squared_error as the loss, for consistency with the scipy way, 
# and using the default optimizer - Steepest Gradient Descent (SGD).
model.compile(optimizer=tf.keras.optimizers.SGD(learning_rate=0.01),
              loss='mean_squared_error',
              metrics='accuracy')

model.fit(x_train, y_train, epochs=50)

In [None]:
# Now lets plot these results
y_pred = model.predict(xout)
plt.scatter(x,y)
plt.plot(xout, y_pred, linestyle='dashed', color='orange')
plt.show()

# Not quite as good as we'd like to see, but not random. Lets see if we can do better!

In [None]:
# We'll allow it to train with more cycles, say 500
model.fit(x_train, y_train, epochs=500)

y_pred = model.predict(xout)
plt.scatter(x,y)
plt.plot(xout, y_pred, linestyle='dashed', color='orange')
plt.show()

# That should be much better

In [None]:
# This is a one node neural network.
# In general, each node multiplies your input by a matrix M and then adds a vector b.
# y = Mx + b

# Since we have only one point (per sample) we have a 1x1 matrix.
# Can we introspect those weights.
first_layer_weights = model.layers[0].get_weights()
first_layer_weights

# Looking at these results implies a slope of about 2 and a y-intercept of about -3.

In [None]:
# Note also that the accuracy that is shown is not changing, it always has a value of 0.0000e+00, i.e. 0.
# This is because the 'accuracy' metric only applies to classification problems, and not to continuous variable results.

# For the latter we can define a new metric using the keras.backend
# rmse_accuracy as 100-rmse : root mean squared error (rmse) for regression (only for Keras tensors)
def rmse_accuracy(y_true, y_pred):
    from keras import backend
    return (np.ones(backend.int_shape(y_pred)[0]) *  100) - backend.sqrt(backend.mean(backend.square(y_pred - y_true), axis=-1))
                                                                          
# This will return 100% minus the RMSE errror as our regression analogy to accuracy,
# which will make our results look more appealing and give us more confidence that things are working.
                                                                              
model.compile(optimizer=tf.keras.optimizers.SGD(learning_rate=0.01),
              loss='mean_squared_error',
              metrics=rmse_accuracy)                                     
    
model.fit(x_train, y_train, epochs=500)  # Again, using 500 epochs
y_pred = model.predict(xout)

plt.scatter(x,y)
plt.plot(xout, y_pred, linestyle='dashed', color='orange')
plt.show()                                                                       

In [None]:
# We can now see the accuracy increasing as the loss is decreasing.

# Now lets try this with a slightly more complex model, and a few more data points
from tensorflow.keras import layers

xx = [x for x in range(-15,16)]
m = 2
b = -3
yy = [m*k + b for k in xx]

XX = np.array(xx)

print(f"len(xx) = {len(xx)}")

(x_train, y_train) = XX, np.array(yy)

model = tf.keras.Sequential()

# Add the first layer (input layer)
# We set the input shape, which will be very useful when we have multidimensional input data like 2d images.
model.add(layers.Dense(units=1, activation='linear', input_shape=(1,)))

#Add another layer (hidden layer)
model.add(layers.Dense(units=3, activation='linear'))

# Add yet another layer (output layer)
model.add(layers.Dense(units=1, activation=None))  # Gives a simple number.

# Optionally, we can initalize the weights randomaly.
# This can be helpful in avoiding a system where all zero initial values does not have any gradient. 
tf.keras.initializers.RandomNormal(mean=0.0, stddev=0.05, seed=None)

# Use a mean_squared_error as the loss, and using the default optimizer, Steepest Gradient Descent.
model.compile(optimizer=tf.keras.optimizers.SGD(learning_rate=0.0004),
              loss='mean_squared_error',
              metrics=rmse_accuracy)

# This will allow us to see how many parameters we have per layer, and the total number of trainable parameters.
print(f"model.summary = {model.summary()}")

model.fit(x_train, y_train, epochs=800)

In [None]:
# Taking a look at the weights...
for i in range(3):
    layer_i_weights = model.layers[i].get_weights()
    print(f"layer_{i}_weights = {layer_i_weights}")
    
print('\n')
    
# ...We will see that it is no longer obvious how our slope and intercept got embedded into these 12 parameters (see output).
# In addition, the activation funtions were either None or linear: This is because if we chose the more tradition ones
# like 'relu' or 'tanh' the results would have difficulty converging.

# It was also necessary in this case to decrease the learning rate to make sure it converged. You can see this by
# resetting the learning rate to 0.1, and see what happens. 

# We'll need to start thinking of normalizing our features and regulizing our training for stable results 
# in more general situations. I'll try to cover that in a later 'lesson'

# Finally we need to think about how many trainable parameters we have relative to our total number of data points.
# In this case we have 31 data points, and Trainable params of 12. 
# It's good to have # data points >> # of trainable params

# To wrap up this training, lets plot these results
plt.scatter(x_train, y_train)

x_out = [-16, -7, 1, 7, 16]
y_pred = model.predict(x_out)

plt.plot(x_out, y_pred, linestyle='dashed', color='orange')
plt.show()

In [None]:
# Now lets solve the same problem using another Machine Learning framework

# For example, lets try scikit learn - specifically scikit.models.LinearRegression
import math
import numpy as np  # note: was already imported
from sklearn.model_selection import train_test_split  # This function is often used with other frameworks 
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# Prepare your data, using capital X as out input data name (as opposed to our original 'x')
# Using y = mX + b, with m = 2.0, and b = -3.0
X = np.array([[1], [2], [3], [4], [5], [6], [7], [8], [9], [10]])

# Sample target values
y = [m*x + b for x in X.flatten()]
y = y + dy  # dy, from before, a bit of noise.

# The next step is not necessary, but almost always done.
# If we know we have a line, we could get the best estimate of the fitted parameters m & b, by using all the points.
# However, In general, we want to compare different models and see what works best, and we seldom know for sure
# that we have a perfect linear relationship, so we need to set aside some portion of our data for testing.
# Split your data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=3428)

# Create the regression model.
model = LinearRegression()

# NOTE: no compile step in scikit-learn, as it does not include deep learning models. 
# Tensorflow often involves complicated architectures, and the compilation step allows for some graph optimization
# to allow for a faster fitting process. This is not needed for the simpler models employed in scikit-learn.

# Fit the model to the training data
model.fit(X_train, y_train)  # Note, this fit() process does not show it's per-epoch progress.

# Make predictions
y_predX = model.predict(X)

# Evaluate the fit and print results
mse = mean_squared_error(X, y_predX)
RMSE = math.sqrt(mse/len(y_predX))  
r2 = r2_score(y, y_predX)

print("Scikit-learn Regression results:")
print(f"  Root Mean Squared Error: {round(RMSE, 6)}, R-squared: {round(r2, 6)}")

# Output fitting variables: m0, b0
m0 = model.coef_
b0 = model.intercept_
print(f"  slope:{np.round(m0, 3)}, intercept: {np.round(b0, 3)} <<== should be close to expectations")

In [None]:
# Note: During fitting, this model does not have any output showing us it's progress toward a satisfactory model.
# There are some discussions on the web & chatGPT of the existance of a verbose flag, but that did not work in the 
# above situation...

# Finally, lets plot the raw data and results
plt.scatter(np.ndarray.flatten(X),y)

# Make predictions using our xout. 
# We need to reshape our xout to be two dimensional as scikit-learn typcally uses 2d input data , 
# as opposed to tensorflow which is anywhere from one to many dimensional.
xout_skl = np.array(xout).reshape(3,1)  # 'skl' for scikit-learn.

y_pred = model.predict(xout_skl)
plt.plot(xout, y_pred, linestyle='dashed', color='orange')
plt.show()


In [None]:
# Moving on to lesson_02