In [None]:
import numpy as np
from matplotlib import pyplot as plt
import pandas as pd
import tensorflow as tf
from tensorflow.keras import regularizers

## EarthML TensorFlow Exercise Notebook 1: Linear Regression Workflow


Please submit this notebook after completion. You are encouraged to further experiment by yourself. Instructions in this notebook are just to guide you further on your learning.

---

### Exercise 1: Linear Regression

1.1. Generate a dataset with 100 data points for linear regression using the equation ($
y = x^3 - 5x^2 + 4x - 7 $ ) , where `x` is a random number between 1 and 10 (This is an arbitrary choice).

Add some Gaussian noise to the output.

1.2. Split the dataset into training and testing sets (80% train and 20% test).

1.3. Implement a linear regression model in TensorFlow to predict `y` based on `x`.

1.4. Train your model and visualize the loss reduction.

1.5. Test your model on the test set and visualize the predicted line against the real data.

---

In [None]:
def gen_data(n,range_start,range_end,y_func):
    y=[]
    input=[]
    for i in range(n):
        x=np.random.uniform(range_start,range_end) 
        input.append(x)
        y.append(y_func(x))
    return input, y    

In [None]:
def y_1(x):
    return x**3 - 5*x**2 + 4*x - 7
x_in=np.linspace(0,10,100)
y_in=y_1(x_in)
x,y=gen_data(100,1,10,y_1)
plt.plot(x,y,'o',color='blue')
plt.plot(x_in,y_in, color='red')
plt.show()

In [None]:
noise=np.random.normal(scale=50,size=100)
y=y+noise
plt.plot(x,y,'o',color='blue')
plt.plot(x_in,y_in, color='red')
plt.show()

In [None]:
df = pd.DataFrame({
    'x': x,
    'y': y})
df

In [None]:
train= df.sample(frac=0.8)
test= df.drop(train.index)

In [None]:
model=tf.keras.Sequential([tf.keras.layers.Dense(1,input_shape=(1,))])
model.summary()
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=1),
    loss=tf.keras.losses.MeanAbsoluteError())

In [None]:
history=model.fit(train['x'],train['y'],epochs=200,verbose=1,validation_split=0.3)

In [None]:
hist=pd.DataFrame(history.history)
hist['epoch']=history.epoch
hist.tail()

In [None]:
loss=model.evaluate(test['x'],test['y']) 

In [None]:
plt.axhline(loss,color='g',linestyle='--',label='test')
plt.plot(history.history['loss'],label='loss')
plt.plot(history.history['val_loss'],label='val_loss')
plt.legend()
plt.show()

In [None]:
y_predict=model.predict(test['x'])
plt.plot(test['x'],test['y'],'o',label='data')
plt.plot(test['x'],y_predict,'o',label='prediction')
plt.legend()
plt.show()

In [None]:
t=np.linspace(1,10,100).reshape(-1,1)
line=model.predict(t)
plt.plot(df['x'],df['y'],'o')
plt.plot(t,line)
plt.plot(x_in,y_in,linestyle='--')
plt.show()

---
### Exercise 2: Overfitting and Underfitting

2.1. Generate a dataset with a quadratic relationship, for instance \(y = x^2 + 2x + 3\). Add Gaussian noise to the output.

2.2. Split the dataset into training and testing sets.

2.3. Train a simple linear model on the dataset and visualize the predictions.

2.4. Now, implement a polynomial regression model of degree 2 and train it on the dataset.

2.5. Compare the performance of the linear model and the polynomial model. Which one underfits? Which one is just right?

2.6. Now, try a polynomial regression of degree 10. What do you observe regarding overfitting?

---

2.6 da sorun yaşadım. model başta oluşturduğum datatyı normalize edip vermediğim sürece çok yüksek hatalar hesplıyor x in yüksek kuvvetlerinden ötürü. Farklı loss functıonları ve learning rateleri denedim hatta L2 regularızatıonu da bu kısımda denedim ama yalnızca normalize edilmiş data verdiğim zaman model düzgün çalışıyor. o yüzden overfit olmasını da sağlayamadım orijinal datayla bunu nasıl sağlayabilirim.   

In [None]:
def y_2(x):
    return 3*x**2- 4*x+ 7
x_in2=np.linspace(-10,10,100)
y_in2=y_2(x_in2)
x2,y2=gen_data(100,-10,10,y_2)
plt.plot(x2,y2,'o',color='blue')
plt.plot(x_in2,y_in2, color='red')
plt.show()

In [None]:
noise=np.random.normal(scale=20,size=100)
y2=y2+noise
plt.plot(x2,y2,'o',color='blue')
plt.plot(x_in2,y_in2, color='red')
plt.show()

In [None]:
df2 = pd.DataFrame({
    'x': x2,
    'y': y2})
df2
train= df2.sample(frac=0.8)
test= df2.drop(train.index)
model2=tf.keras.Sequential([tf.keras.layers.Dense(1,input_shape=(1,))])
model2.summary()
model2.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=1),
    loss=tf.keras.losses.MeanAbsoluteError())
history2=model2.fit(train['x'],train['y'],epochs=200,verbose=0,validation_split=0.3)


In [None]:
t=np.linspace(-10,10,100).reshape(-1,1)
line=model2.predict(t)
plt.plot(df2['x'],df2['y'],'o')
plt.plot(t,line)
plt.plot(x_in2,y_in2,linestyle='--')
plt.show()

POLYNOMIAL REGRESSION

In [None]:
x_sqr=[i**2 for i in x2]
df2_poly=pd.DataFrame({
    'x1': x2,
    'x2': x_sqr,
    'y' : y2})

In [None]:
train= df2_poly.sample(frac=0.8)
test2= df2_poly.drop(train.index)
model2_poly=tf.keras.Sequential([tf.keras.layers.Dense(1,input_shape=(2,))])
model2_poly.summary()
model2_poly.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.4),
    loss=tf.keras.losses.MeanAbsoluteError())
poly_history=model2_poly.fit(train.drop(columns=['y']),train['y'],epochs=200,validation_split=0.3)

In [None]:
t=[[x,x**2] for x in np.linspace(-10,10,100)]
line=model2_poly.predict(t)
plt.plot(df2_poly['x1'],df2_poly['y'],'o')
plt.plot([i[0] for i in t],line)
plt.plot(x_in2,y_in2,linestyle='--')
plt.show()

In [None]:
loss_linear=model2.evaluate(test['x'],test['y'])
loss_quadratic=model2_poly.evaluate(test2.drop(columns=['y']),test2['y'])


In [None]:
plt.plot(history2.history['loss'],label='loss')
plt.plot(history2.history['val_loss'],label='val_loss')
plt.title('linear')
plt.axhline(loss_linear,color='g',linestyle='--',label='test')
plt.legend()
plt.show()
plt.plot(poly_history.history['loss'],label='loss')
plt.plot(poly_history.history['val_loss'],label='val_loss')
plt.title('quadratic')
plt.axhline(loss_quadratic,color='g',linestyle='--',label='test')
plt.legend()
plt.show()

linear model underfits 

2.6 da yaşadığım sorun burdan itibaren başlıyor.

In [None]:
def poly_datagen(n,x,y=None):
    df=pd.DataFrame({})
    for i in range(1,n+1):
        df[i]=[j**i for j in x]
    df['y']=y    
    return df

In [None]:
n=10
df3=poly_datagen(n,x2,y2)

In [None]:
df3

In [None]:
train= df3.sample(frac=0.8)
test= df3.drop(train.index)

bu modelde aynı 3. kısımda olduğu gibi normalizer vardı o şekilde düzgün sonuç alıyorum ama şuan olduğu haliyle neden çalışmadığını anlamadım.  

In [None]:
model3=tf.keras.Sequential([tf.keras.layers.Dense(1,input_shape=(n,))])
model3.summary()
model3.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.4),
    loss=tf.keras.losses.MeanSquaredError())
history3=model3.fit(train.drop(columns=['y']),train['y'],
                    epochs=500,verbose=1,validation_split=0.2)

In [None]:
plt.plot(history3.history['loss'],label='loss')
plt.plot(history3.history['val_loss'],label='val_loss')
plt.legend()
plt.show()

In [None]:
t=[[i**j for j in range(1,11)] for i in np.linspace(-10,10,100)]

In [None]:
line=model3.predict(t)
plt.plot(df2_poly['x1'],df2_poly['y'],'o')
plt.plot([i[0] for i in t],line)
plt.ylim(-50,250)
plt.plot(x_in2,y_in2,linestyle='--')
plt.show()

loss 2000 epoch sonra dahi buna benzer değğerlerde sabitleniyor daha fazla azaltmak için ne yapmak gerek. 

In [None]:
model3.evaluate(test.drop(columns=['y']),test['y'])

---
### Exercise 3: Regularization

3.1. Continuing from the previous exercise, add L2 regularization to the polynomial regression model of degree 10.

3.2. Train the model and compare its performance with the unregularized degree 10 model. What do you observe?

3.3. Experiment with different regularization strengths. How does the strength of regularization affect the model?

---

In [None]:
train= df3.sample(frac=0.8)
test= df3.drop(train.index)
L2=regularizers.L2
train_features=train.drop(columns=['y'])
normalizer = tf.keras.layers.Normalization()
normalizer.adapt(np.array(train_features))

In [None]:
modelL2=tf.keras.Sequential([tf.keras.layers.InputLayer(input_shape=(n,)),
                            normalizer,tf.keras.layers.Dense(1,
                            kernel_regularizer=L2(1e-2))])
modelL2.summary()
modelL2.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.5),
    loss=tf.keras.losses.MeanSquaredError())
history=modelL2.fit(train.drop(columns=['y']),train['y'],epochs=500,verbose=1,validation_split=0.1)

In [None]:
plt.plot(history.history['loss'],label='loss')
plt.plot(history.history['val_loss'],label='val_loss')
plt.legend()
plt.show()

In [None]:
t=[[i**j for j in range(1,11)] for i in np.linspace(-15,15,100)]

In [None]:

line=modelL2.predict(t)
print(t[0])
print(line.shape)
plt.plot(df2_poly['x1'],df2_poly['y'],'o')
plt.plot([i[0] for i in t],line)
plt.ylim(-50,350)
plt.plot(x_in2,y_in2,linestyle='--')
plt.show()

In [None]:
modelL2.evaluate(test.drop(columns=['y']),test['y'])

---
### Exercise 4: Activation and Cost Functions

4.1. Define and visualize the following activation functions: Sigmoid, ReLU, and Tanh. Describe their main properties and potential use cases.

4.2. Implement a simple feed-forward neural network in TensorFlow for a binary classification problem. Use the sigmoid activation function for the output layer.

4.3. Generate a toy binary classification dataset and train your neural network on it.

4.4. Replace the sigmoid activation function in the hidden layers with ReLU and compare the performances. Which one trains faster?

4.5. Experiment with different cost functions such as Mean Squared Error (MSE) and Cross-Entropy. What are their differences and which one is more suitable for the given problem?

---

In [None]:
import numpy as np 

In [None]:
inputs=np.linspace(-10,10,100)

In [None]:
def reLU(x):
    return max(0.0, x)

outputs = [reLU(x) for x in inputs]
plt.plot(inputs, outputs)
plt.show()

In [None]:
def sigmoid(x):
 return 1.0 / (1.0 + np.exp(-x))

outputs = [sigmoid(x) for x in inputs]
plt.plot(inputs, outputs)
plt.show()

In [None]:
def tanh(x):
 return (np.exp(x) - np.exp(-x)) / (np.exp(x) + np.exp(-x))
 
outputs = [tanh(x) for x in inputs]
plt.plot(inputs, outputs)
plt.show()

further explanation is in activation_function.ipynb

In [None]:
classification_model=tf.keras.Sequential(
    [tf.keras.layers.Dense(1,input_shape=(2,),activation='sigmoid')])
classification_model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.01),
    loss=tf.keras.losses.BinaryCrossentropy(),
    metrics=['accuracy'])

generate toy data with two features x1 and x2 and label y(0 or 1)

In [None]:
X = np.random.uniform(-1, 1, (200,2))
print(X)

In [None]:
def generate_label2D(X):
    point1,point2 = np.random.uniform(-1, 1,(2,2))
    def classify(point):
        x1, y1 = point1
        x2, y2 = point2
        x, y = point
        #check whether the point is in the upside or downside region of target line 
        cross_product = (x2 - x1) * (y - y1) - (y2 - y1) * (x - x1)
        if cross_product > 0:
            return 1
        else:
            return 0
    labels=[]    
    for i in X:
        labels.append(classify(i))
    return labels, (point1,point2)   


In [None]:
L=generate_label2D(X)
Y=L[0]
p1,p2=L[1]

In [None]:
df= pd.DataFrame({
    'x1': X[:,0],
    'x2': X[:,1],
    'label': Y
})

In [None]:
df

visualizing the target function

In [None]:
def get_line(p1,p2):
    m=(p2[1]-p1[1])/(p2[0]-p1[0])
    b=p1[1]-m*p1[0]
    return m,b

In [None]:
m,b= get_line(p1,p2)
t=np.linspace(-1,1,100)
plt.plot(t,m*t+b,linestyle='--')
plt.ylim(-1,1)
plt.show()

showing the data with target function

In [None]:
def get_map(X,Y,m,b,lable=None):
    for ind,x in enumerate(X):
        if Y[ind]==1:
            color = 'blue'
        else: color = 'red'
        plt.plot(x[0],x[1],'o', color=color) 
    plt.plot(t,m*t+b,label=lable)
    if lable is not None: plt.legend()
    plt.ylim(-1,1)   

In [None]:
get_map(X,Y,m,b)
plt.show()

In [None]:
history=classification_model.fit(df.drop(columns=['label']),df['label'],epochs=200,verbose=1,
                                 validation_split=0.2)

In [None]:
plt.plot(history.history['loss'],label='loss')
plt.plot(history.history['val_loss'],label='val_loss')
plt.legend()
plt.show()

In [None]:
classification_model.summary()

In [None]:
classification_model.get_weights()

In [None]:
coordinates=classification_model.get_weights()[0]
bias=classification_model.get_weights()[1]

x0_1 = np.amin(X[:, 0])
x0_2 = np.amax(X[:, 0])

x1_1 = (-coordinates[0] * x0_1 - bias) / coordinates[1]
x1_2 = (-coordinates[0] * x0_2 - bias) / coordinates[1]

visualization of data, target function and result of the model 

In [None]:
m2,b2=get_line((x0_1,x1_1),(x0_2,x1_2))

In [None]:
get_map(X,Y,m,b,'target')
get_map(X,Y,m2,b2,'output')
plt.show()

In [None]:
classification_model1=tf.keras.Sequential(
    [tf.keras.layers.Dense(1,input_shape=(2,),activation='sigmoid')])
classification_model1.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.01),
    loss=tf.keras.losses.BinaryCrossentropy(),
    metrics=['accuracy'])
history1=classification_model1.fit(df.drop(columns=['label']),df['label'],epochs=200,verbose=0,
                                 validation_split=0.2)
plt.plot(history1.history['loss'],label='loss')
plt.plot(history1.history['val_loss'],label='val_loss')
plt.legend()
plt.show()

In [None]:
classification_model2=tf.keras.Sequential(
    [tf.keras.layers.Dense(1,input_shape=(2,),activation='relu')])
classification_model2.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.01),
    loss=tf.keras.losses.BinaryCrossentropy(),
    metrics=['accuracy'])
history2=classification_model2.fit(df.drop(columns=['label']),df['label'],epochs=200,verbose=0,
                                 validation_split=0.2)
plt.plot(history2.history['loss'],label='loss')
plt.plot(history2.history['val_loss'],label='val_loss')
plt.legend()
plt.show()

relu seems to be converging around 30 epochs while sigmoid continues to drop after 200 epochs with given learning rate(0.01)

bigger data without visualization