# 0.1 Gaussian Noise

In [1]:
import pandas as pd
import numpy as np

def add_gaussian_noise(file_path, noise_level=0.1):
    """
    Add Gaussian noise to a dataset.

    Args:
    file_path (str): Path to the CSV file containing the dataset.
    noise_level (float): Fraction of the standard deviation of each column used as the standard deviation of the noise.

    Returns:
    pd.DataFrame: DataFrame with added noise.
    """
    data = pd.read_csv(file_path)
    # Applying noise only to numerical columns, assuming 'Vehicle' and 'Eligible' should not have noise
    numerical_cols = data.select_dtypes(include=[np.number]).columns.tolist()
    for column in numerical_cols:
        if column not in ['Vehicle', 'Eligible']:  # Presuming 'Vehicle' might be an identifier
            std_dev = data[column].std()
            noise = np.random.normal(loc=0, scale=noise_level * std_dev, size=data[column].shape)
            data[column] += noise
    return data

# Paths to your datasets
vehicle_dataset_path = '1000VehicleDataset.csv'
training_dataset_path = 'VehicleTrainingDataset.csv'

# Add Gaussian noise to datasets
noisy_vehicle_data = add_gaussian_noise(vehicle_dataset_path, noise_level=0.1)  # Change noise level as needed
noisy_training_data = add_gaussian_noise(training_dataset_path, noise_level=0.1)  # Change noise level as needed

# Save the noisy datasets to new CSV files
noisy_vehicle_data.to_csv('1000VehicleDataset_Noisy_0.1.csv', index=False)
noisy_training_data.to_csv('VehicleTrainingDataset_Noisy_0.1.csv', index=False)

print("Noisy datasets have been saved.")


Noisy datasets have been saved.


# 0.01 Gaussian Noise

In [2]:
import pandas as pd
import numpy as np

def add_gaussian_noise(file_path, noise_level=0.01):
    """
    Add Gaussian noise to a dataset.

    Args:
    file_path (str): Path to the CSV file containing the dataset.
    noise_level (float): Fraction of the standard deviation of each column used as the standard deviation of the noise.

    Returns:
    pd.DataFrame: DataFrame with added noise.
    """
    data = pd.read_csv(file_path)
    # Applying noise only to numerical columns, assuming 'Vehicle' and 'Eligible' should not have noise
    numerical_cols = data.select_dtypes(include=[np.number]).columns.tolist()
    for column in numerical_cols:
        if column not in ['Vehicle', 'Eligible']:  # Presuming 'Vehicle' might be an identifier
            std_dev = data[column].std()
            noise = np.random.normal(loc=0, scale=noise_level * std_dev, size=data[column].shape)
            data[column] += noise
    return data

# Paths to your datasets
vehicle_dataset_path = '1000VehicleDataset.csv'
training_dataset_path = 'VehicleTrainingDataset.csv'

# Add Gaussian noise to datasets
noisy_vehicle_data = add_gaussian_noise(vehicle_dataset_path, noise_level=0.01)  # Change noise level as needed
noisy_training_data = add_gaussian_noise(training_dataset_path, noise_level=0.01)  # Change noise level as needed

# Save the noisy datasets to new CSV files
noisy_vehicle_data.to_csv('1000VehicleDataset_Noisy_0.01.csv', index=False)
noisy_training_data.to_csv('VehicleTrainingDataset_Noisy_0.01.csv', index=False)

print("Noisy datasets have been saved.")


Noisy datasets have been saved.
