In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns


from sklearn.datasets import fetch_california_housing
from sklearn.metrics import classification_report
from sklearn.svm import SVR
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

In [None]:
housing_dataset = fetch_california_housing()
housing = pd.DataFrame(housing_dataset.data,columns=housing_dataset.feature_names)
global_mean = housing.mean()
global_std = housing.std()
housing["target"] = housing_dataset.target
#housing["target"] = housing["target"].apply(np.ceil)
housing = housing.sample(frac=1)

housing.head()

correlation_matrix = housing.corr().round(2)
sns.heatmap(data=correlation_matrix, annot=True)
housing.head()

In [None]:
train_x = train[train.columns[:-1]]
train_y = train[train.columns[-1]]

split_train = np.array_split(train_x, 5)

# Initialize lists to store the min and max DataFrames for each part
min_dfs = []
max_dfs = []

# Iterate through the split parts
for part in split_train:
    # Calculate the minimum and maximum values for each column in the part
    min_values = part.min()
    max_values = part.max()

    # Append the result to the lists
    min_dfs.append(min_values)
    max_dfs.append(max_values)

# Create DataFrames from the lists of min and max values
min_df = pd.DataFrame(min_dfs)
max_df = pd.DataFrame(max_dfs)

train_min = train_x.min()
train_max = train_x.max()

# Find the overall minimum and maximum values in min_df and max_df
min_df_min = min_df.min()
max_df_max = max_df.max()

print(min_df)
print(max_df)

print(" --------------------------------------------------------------------")


print(min_df_min)
print(max_df_max)

print(" --------------------------------------------------------------------")

print("\nDifference of the min values:")
print(train_min - min_df_min)

print("\nDifference of the max values:")
print(train_max - max_df_max)

minmax_difference = max_df_max - min_df_min

print("\n---------------------------------------------------------------------")


Train model & Add Noise

In [None]:
max_values = max_df_max
min_values = min_df_min

def normalize_dataframe(dataframe, min_value, max_value):
    normalized_dataframe = (dataframe - min_value) / (max_value - min_value)
    return normalized_dataframe


def add_noise(row, noise):
    noised_row = row.copy()

    for column in noised_row.index:
        noised_row[column] += noised_row[column] * noise

    return noised_row


minmax_noised = []
noise_multpilier = 0.002
for noise_value in range(-10, 11):
  noise = noise_value * noise_multpilier
  min_value_noised = add_noise(min_values,noise)
  max_value_noised = add_noise(max_values,noise)
  minmax_noised.append(normalize_dataframe(train_x,min_value_noised, max_value_noised))


global_minmax_normalized = normalize_dataframe(train_x,train_x.min(),train_x.max())

for i, df in enumerate(minmax_noised):
  print(df.mean()-global_minmax_normalized.mean())


test_x_minmax = normalize_dataframe(test_x,test_x.min(),test_x.max())


def create_regression_model():
    model = keras.Sequential([
        layers.Input(shape=(8,)),  # Input layer with 8 features
        layers.Dense(128, activation='relu'),
        layers.Dense(64, activation='relu'),  # Hidden layer with 64 units and ReLU activation
        layers.Dense(1)  # Output layer with a single unit (for regression)
    ])

    # Compile the model with a default optimizer and loss function
    model.compile(optimizer='adam', loss='mean_squared_error', metrics=["accuracy"])

    return model

# Separate function for plotting
def plot_training_history(history, title):
    print("Test loss:", score[0])
    print("Train loss:", history.history['loss'][-1])
    print("Val loss:", history.history['val_loss'][-1])
    print("Test accuracy:", score[1])
    print("Train accuracy:", history.history['accuracy'][-1])
    print("Val accuracy:", history.history['val_accuracy'][-1])

    # Create a figure with two subplots
    plt.figure(figsize=(12, 6))



    # Subplot for loss
    plt.subplot(1, 2, 2)
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title('Model Loss - ' + title)
    plt.ylabel('Loss')
    plt.xlabel('Epoch')
    plt.legend(['Train', 'Validation'], loc='upper left')

    # Adjust spacing between subplots
    plt.tight_layout()

    # Display the figure
    plt.suptitle(title)
    plt.show()

noised_losses = []
for i, df in enumerate(minmax_noised):
    X_train = df.values  # Convert the DataFrame to a NumPy array
    y_train = train_y  # Assuming train_y is fixed

    regression_model = create_regression_model()

    batch_size = 256
    epochs = 32
    history = regression_model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, validation_split=0.2, verbose=0)


    #loss = tf.keras.losses.mean_squared_error(y_true, y_pred)
    #print("Mean Square Error")
    noised_losses.append(history.history['loss'][-1])
    # Plot accuracy and loss graphs
    print("Training and Validation for Noised DataFrame ", i+1)
    print("Noise amount:", (i-10)*0.2, "%")
    plot_training_history(history, f"Noised Data {i+1}")

#plot x -> noise values -2% to +2%, y-> accuracies


Loss Values vs Noise rate %

In [None]:
noise_values = np.arange(-2, 2.2, 0.2)

# Create a line plot
plt.plot(noise_values, noised_losses, marker='o', linestyle='-')

# Labeling the axes and giving the plot a title
plt.xlabel('Noise')
plt.ylabel('Loss')
plt.title('Loss vs. Noise')

# Display the plot
plt.grid(True)
plt.show()