# Normalization Demo


In [None]:
import numpy as np
data = np.array([10,20,30,40,50,1000]) # 1000 is an outlier - Outliers are data points that are significantly different from the rest of the data.

# Min-Max Scaling: Scales values between 0 and 1
def min_max_scaling(data):
  max = np.max(data)
  min = np.min(data)
  return (data-min)/(max - min)

# Z-Score Standardization: Mean = 0, Standard Deviation = 1
def z_score_standardization(data):
  mean = np.mean(data)
  std_deviation = np.std(data)
  return (data-mean)/std_deviation

min_max_scaled_data = min_max_scaling(data)
z_score_standardized_data = z_score_standardization(data)

print(f'Original Data: {data}')
print(f'Max-min scaled Data: {min_max_scaled_data}')
print(f'Z-score standardized Data: {z_score_standardized_data}')

Original Data: [  10   20   30   40   50 1000]
Max-min scaled Data: [0.         0.01010101 0.02020202 0.03030303 0.04040404 1.        ]
Z-score standardized Data: [-0.50221883 -0.47457376 -0.44692869 -0.41928361 -0.39163854  2.23464343]


# Example 1 - House Price Prediction (Regression)

## Synthetic dataset

In [None]:
from re import VERBOSE
# Example 1 - House Price Prediction (Regression) with Synthetic dataset
# Predict house prices based on 10 numerical features

import tensorflow as tf
import numpy as np
from tensorflow.keras import layers,models

# Generate synthetic dataset (1000 samples, 10 features)
np.random.seed(42)
X_train = np.random.rand(1000,10)
y_train = np.random.rand(1000) * 500000 # House prices between 0-500K

# Generate test dataset (200 samples)
X_test = np.random.rand(200,10)
y_test = np.random.rand(200) * 500000
# Build model
model = models.Sequential([
    layers.Dense(64,activation='relu',input_shape=(10,)),
    layers.Dense(32,activation='relu'),
    layers.Dense(1)  # Single output for regression
])
# Compiling the model
model.compile(optimizer='adam',loss='mse',metrics=['mae'])
# Training the model
model.fit(X_train,y_train,epochs=20,batch_size=32,verbose=1)
# Evaluate the model
test_loss,test_mae = model.evaluate(X_test,y_test)

print(f' Test MAE:${test_mae:.2f}')
# Predict a random houseprice
random_sample = np.random.rand(1,10)
predicted_price = model.predict(random_sample)
print(f'Predicted Price for Random Sample: ${predicted_price[0][0]:.2f}')

# MAE is around 243K - 245K, which means on average, the predictions are off by nearly half of the actual house price.
# This is very bad because an error of $243K in a price range of $500K means the model is almost random.
# The dataset is completely random, meaning there is no real pattern for the model to learn.
# The model is guessing house prices randomly, which is why MAE is so large.
# In short, bad model


Epoch 1/20
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 84982595584.0000 - mae: 252982.5938
Epoch 2/20
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 85918089216.0000 - mae: 257775.4688
Epoch 3/20
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 83887521792.0000 - mae: 250283.1250
Epoch 4/20
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 83676422144.0000 - mae: 253247.4062
Epoch 5/20
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 87043252224.0000 - mae: 258080.7812
Epoch 6/20
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 84063567872.0000 - mae: 251758.5312
Epoch 7/20
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 86645383168.0000 - mae: 258333.1250
Epoch 8/20
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 84344872960.

## Synthetic dataset and Normalization


In [None]:
import numpy as np
import tensorflow as ty
from tensorflow.keras import layers,models

# Generate synthetic dataset (1000 samples, 10 features) in [0,1] range
np.random.seed(42)
X_train = np.random.rand(1000,10)
y_train = np.random.rand(1000) # Target values in range 0-1

X_test = np.random.rand(200,10)
y_test = np.random.rand(200)

# Create a model
model = models.Sequential([
    layers.Dense(64,activation='relu',input_shape=(10,)),
    layers.Dense(32,activation='relu'),
    layers.Dense(1) # Output
])

# Compile model
model.compile(optimizer='adam',loss='mse',metrics=['mae'])
# Train the model
model.fit(X_train,y_train,epochs=20,batch_size=32,verbose=1)
# Evaluate the model
test_loss,test_mae = model.evaluate(X_test,y_test)
print(f'Test MAE (Normalized): {test_mae:.4f}')
# Predict a sample pricing
sample_input = np.random.rand(1,10)
sample_prediction = model(sample_input)
print(f'Predicted Housing Price (Normalized) : {sample_prediction[0][0]:.4f}')
# This is again a bad model because we are still training on random values (just normalised values)

Epoch 1/20
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 0.3306 - mae: 0.4816
Epoch 2/20
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 0.0903 - mae: 0.2535
Epoch 3/20
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.0859 - mae: 0.2503
Epoch 4/20
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.0871 - mae: 0.2528
Epoch 5/20
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.0807 - mae: 0.2433
Epoch 6/20
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.0857 - mae: 0.2521
Epoch 7/20
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.0852 - mae: 0.2528
Epoch 8/20
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.0825 - mae: 0.2465
Epoch 9/20
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.08