The pre processor is used to process all data into a suitable format for the autoencoder.

In [1]:
# PREPROCESSOR 
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer

# Loading Data from CSV
your_path = r'C:\Users\gusta\Documents\KTH\TriOptima\trioptima'
file_path = your_path + '/RT.IRS_CLEAN.csv'
data = pd.read_csv(file_path)

# Handling DateTime Variables
data['effectiveDate'] = pd.to_datetime(data['effectiveDate'], errors='coerce')
data['effectiveYear'] = data['effectiveDate'].dt.year
data['effectiveMonth'] = data['effectiveDate'].dt.month
data['effectiveDay'] = data['effectiveDate'].dt.day

# Drop the original 'effectiveDate' column
data = data.drop(columns=['effectiveDate'])

# Identifying variable types
numerical_vars = ['leg1FixedRate', 'leg1FixedRatePaymentFrequencyMultiplier', 'effectiveYear', 'effectiveMonth', 'effectiveDay']
categorical_vars = ['action', 'assetClass', 'leg2UnderlierTenorPeriod', 'leg2ResetFrequencyPeriod']

# Preprocessing: Numerical Variables
numeric_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler())
])

# Preprocessing: Categorical Variables
categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])

# Column Transformer: Applying transformations to respective columns
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numerical_vars),
        ('cat', categorical_transformer, categorical_vars)
    ]
)

# Applying Preprocessing
data_preprocessed = pd.DataFrame(preprocessor.fit_transform(data))


Exporting the processed data into a .csv

In [2]:
import os

# Define the path
path = your_path

# Check if the path does not exist
if not os.path.exists(path):
    # Create the directory
    os.makedirs(path)

# Now you can save the DataFrame
data_preprocessed.to_csv(f'{path}/processed_data.csv', index=False)

