In [None]:
pip install scikit-image

In [33]:
import pandas as pd
import sys
import time
import re
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import (
    LabelEncoder,
    OneHotEncoder,
    StandardScaler,
    MinMaxScaler,
    RobustScaler,
)
from sklearn.decomposition import PCA, TruncatedSVD
from scipy.stats import zscore, skew
from numpy import log1p
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.feature_extraction.text import ENGLISH_STOP_WORDS
import cv2
from skimage import color, feature
import numpy as np
from sklearn.decomposition import PCA

def load_dataset():
    try:
        dataset_path = input("Enter the path of your dataset: ")
        if dataset_path.endswith('.xlsx'):
            df = pd.read_excel(dataset_path)
            print(df.head())
        else:
            df = pd.read_csv(dataset_path)
            print(df.head())
        return df
    except Exception as e:
        print(f"Error loading dataset: {e}")
        return None

def handle_missing_data(df):
    while True:
        try:
            numeric_columns = df.select_dtypes(include=['number']).columns
            non_numeric_columns = df.select_dtypes(exclude=['number']).columns
            strategy = input("Choose an imputation strategy for missing data (1. Mean for numeric, 2. Most frequent for non-numeric): ")

            if strategy == "1":
                imputer_numeric = SimpleImputer(strategy="mean")
                df[numeric_columns] = imputer_numeric.fit_transform(df[numeric_columns])
            elif strategy == "2":
                imputer_non_numeric = SimpleImputer(strategy="most_frequent")
                df[non_numeric_columns] = imputer_non_numeric.fit_transform(df[non_numeric_columns])

            if df.isna().any().any():
                print("Warning: NaN values found in the dataset. Please handle them.")
                choice = input("Do you want to remove rows with missing values? (y/n): ").lower()
                if choice == "y":
                    df = df.dropna()
                else:
                    print("Please choose a valid imputation strategy.")
                continue

            return df

        except ValueError as e:
            print(f"Error: {e}")
            print("An error occurred due to missing values.")
            choice = input("Do you want to remove rows with missing values? (y/n): ").lower()
            if choice == "y":
                df = df.dropna()
            else:
                print("Please choose a valid imputation strategy.")
                continue

def handle_categorical_data(df):
    choice = input("Choose a method for handling categorical data (1. Label Encoding, 2. One-Hot Encoding): ")

    if choice == "1":
        label_encoder = LabelEncoder()
        for column in df.select_dtypes(include=["object"]).columns:
            df[column] = label_encoder.fit_transform(df[column])
    elif choice == "2":
        df = pd.get_dummies(df, columns=df.select_dtypes(include=["object"]).columns)

    return df


def scaling_and_normalization(df):
    numeric_columns = df.select_dtypes(include=['number']).columns

    choice = input("Choose a method for scaling and normalization (1. Standard Scaling, 2. Min-Max Scaling, 3. Robust Scaling): ")

    if choice == "1":
        scaler = StandardScaler()
        df[numeric_columns] = scaler.fit_transform(df[numeric_columns])
    elif choice == "2":
        scaler = MinMaxScaler()
        df[numeric_columns] = scaler.fit_transform(df[numeric_columns])
    elif choice == "3":
        scaler = RobustScaler()
        df[numeric_columns] = scaler.fit_transform(df[numeric_columns])

    return df

def handle_outliers(df):
    numeric_columns = df.select_dtypes(include=['number']).columns
    df_numeric = df[numeric_columns]
    z_scores = zscore(df_numeric)
    df_outliers = df[(z_scores < 3).all(axis=1)]

    return df_outliers

def handle_skewed_distributions(df):
    numeric_columns = df.select_dtypes(include=['number']).columns
    df_numeric = df[numeric_columns]
    skewed_features = df_numeric.apply(lambda x: skew(x))
    skewed_features = skewed_features[abs(skewed_features) > 0.5]

    for feature in skewed_features.index:
        df[feature] = log1p(df[feature])

    return df

def exclude_features(df):
    print("All features in the dataset:")
    print(df.columns)
    exclude_columns = []

    while True:
        user_input = input("Enter a column to be excluded or 'n' to stop: ")
        if user_input.lower() == 'n':
            break
        else:
            exclude_columns.append(user_input)
    df = df.drop(columns=exclude_columns, errors='ignore')
    print("\nDataset after excluding features:")
    print(df.head())

    return df

def add_feature(df):
    print("Please implement your logic for adding new fields to the dataset in the add_feature() function.")
    print("Need Termination of the program.")
    time.sleep(3600)
    try:
        raise SystemExit
    except SystemExit:
        pass
    
    try:
        sys.exit()
    except SystemExit:
        pass

def handle_time_series_features(df):
    date_columns = []
    print("Potential columns for date and time conversion:")
    for col in df.columns:
        if pd.api.types.is_datetime64_any_dtype(df[col]):
            print(f"  - {col} (Already in datetime format)")
        else:
            print(f"  - {col}")
    while True:
        user_input = input("Enter the name of a column to convert to date and time (or 'done' to finish): ").lower()
        if user_input == 'done':
            break
        elif user_input in df.columns:
            date_columns.append(user_input)
        else:
            print(f"Column '{user_input}' not found in the dataset. Please enter a valid column name.")

    if not date_columns:
        print("No columns selected for date and time conversion.")
        return df
    date_format_input = input("Enter the date and time format (or 'auto' to use detected format): ").strip()
    for col in date_columns:
        try:
            if date_format_input.lower() == 'auto':
                df[col] = pd.to_datetime(df[col], errors='coerce')
            else:
                df[col] = pd.to_datetime(df[col], format=date_format_input, errors='raise')
            print(f"Column '{col}' converted to datetime.")
        except Exception as e:
            print(f"Error converting column '{col}' to datetime: {e}")

    return df


def handle_nlp_features(df):
    
    from sklearn.feature_extraction.text import TfidfVectorizer
    from sklearn.feature_extraction.text import ENGLISH_STOP_WORDS

    # Ask the user if they want to concatenate any columns
    concatenate_columns = input("Do you want to concatenate any columns for NLP processing? (y/n): ").lower()
    if concatenate_columns == 'y':
        # Display available columns to the user
        print("Available columns for concatenation:")
        print(df.columns)

        while True:
            try:
                columns_to_concat = input("Enter the names of columns to concatenate (comma-separated): ").split(',')
                new_column_name = input("Enter the name for the new concatenated column: ")
                if all(col.strip() in df.columns for col in columns_to_concat):
                    df[new_column_name] = df[columns_to_concat].astype(str).agg(' '.join, axis=1)
                    print("\nDataFrame after concatenation:")
                    print(df.head())
                    break
                else:
                    print("Invalid column name(s). Please enter valid column name(s).")

            except Exception as e:
                print(f"Error: {e}")
    text_column = input("Enter the name of the column containing NLP text data (you can use your concatenated column also): ")
    if text_column not in df.columns:
        print(f"Error: The specified text column '{text_column}' does not exist in the dataset.")
        return df

    # Tokenization
    df['tokens'] = df[text_column].apply(lambda x: re.findall(r'\b\w+\b', x.lower()))

    # Display tokenized text
    print("\nTokenized Text:")
    print(df[['tokens']].head())

    # Remove Stop Words
    stop_words = ENGLISH_STOP_WORDS
    df['tokens'] = df['tokens'].apply(lambda x: [word for word in x if word not in stop_words])

    # Display text after stop words removal
    print("\nText after stop words removal:")
    print(df[['tokens']].head())

    # Add tokenized text to the dataset
    df['stopwords_removed_text'] = df['tokens'].apply(lambda x: ' '.join(x))

    # Display DataFrame with tokenized text
    print("\nDataFrame with Tokenized Text:")
    print(df.head())
    if not df[text_column].empty:
        # TF-IDF Vectorization
        tfidf_vectorizer = TfidfVectorizer()
        if df[text_column].apply(lambda x: len(re.findall(r'\b\w+\b', x.lower()))).sum() > 0:
            tfidf_matrix = tfidf_vectorizer.fit_transform(df[text_column])
            tfidf_df = pd.DataFrame(tfidf_matrix.toarray(), columns=tfidf_vectorizer.get_feature_names_out())
            print("\nTF-IDF DataFrame:")
            print(tfidf_df.head())

            print("\nNLP features engineered:")
            print(df.head())
        else:
            print("Error: Text data contains only stop words. TF-IDF vectorization cannot be performed.")
    else:
        print("Error: Text data is empty. TF-IDF vectorization cannot be performed.")

    return df


def load_dataset():
    try:
        dataset_path = input("Enter the path of your dataset: ")
        if dataset_path.endswith('.xlsx'):
            df = pd.read_excel(dataset_path)
            print(df.head())
        else:
            df = pd.read_csv(dataset_path)
            print(df.head())
        return df
    except Exception as e:
        print(f"Error loading dataset: {e}")
        return None



def handle_vision_features(df):
    print("Handling vision features...converting numerical, resizing, converting grey, normalizing, applying PCA and saving")

    def preprocess_image(image_path):
        try:
            img = cv2.imread(image_path)
            img = cv2.resize(img, (224, 224))
            gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
            normalized_img = gray_img / 255.0
            return normalized_img
        except Exception as e:
            print(f"Error processing image: {str(e)}")
            return None
    process_images = input("Do you want to process images and add them to the dataset? (y/n): ").lower()
    if process_images == 'y':
        image_path = input("Enter the path of the image file: ")
        processed_image = preprocess_image(image_path.strip())
        if processed_image is not None:
            # Assuming you have a function apply_pca for PCA, you can apply it here
            # pca_features = apply_pca(processed_images)
            df['processed_images'] = [processed_image] * len(df)
            print("Image processing and feature engineering completed.")
        else:
            print("No valid image processing performed.")
    else:
        print("No image processing performed.")
    return df


def handle_highly_correlated_features(df):
    numeric_columns = df.select_dtypes(include=['number']).columns
    correlation_matrix = df[numeric_columns].corr()
    high_correlation_threshold = 0.8  
    correlated_features = set()
    for i in range(len(correlation_matrix.columns)):
        for j in range(i):
            if abs(correlation_matrix.iloc[i, j]) > high_correlation_threshold:
                colname = correlation_matrix.columns[i]
                correlated_features.add(colname)

    df = df.drop(columns=correlated_features, errors='ignore')
    print(f"Highly correlated features removed: {correlated_features}")

    return df

def handle_dimensionality_reduction(df):
    while True:
        reduction_method = input("Choose dimensionality reduction method (1. PCA, 2. SVD): ").lower()
        try:
            if reduction_method == "1":
                n_components = int(input("Enter the number of components for PCA: "))
                if n_components > min(df.shape[0], df.shape[1]):
                    raise ValueError(f"Error: n_components={n_components} must be between 0 and min(n_samples, n_features)={min(df.shape[0], df.shape[1])}.")
                pca = PCA(n_components=n_components)
                transformed_data = pca.fit_transform(df)
                print(f"PCA applied with {n_components} components.")
                break  # Exit the loop if input is valid
            elif reduction_method == "2":
                n_components = int(input("Enter the number of components for Truncated SVD: "))
                if n_components > min(df.shape[0], df.shape[1]):
                    raise ValueError(f"Error: n_components={n_components} must be between 0 and min(n_samples, n_features)={min(df.shape[0], df.shape[1])}.")
                svd = TruncatedSVD(n_components=n_components)
                transformed_data = svd.fit_transform(df)
                print(f"Truncated SVD applied with {n_components} components.")
                break  
            else:
                print("Invalid dimensionality reduction method. Please choose either '1' for PCA or '2' for SVD.")
        except ValueError as ve:
            print(f"Error: {ve}")
            retry = input("Do you want to retry entering the number of components? (y/n): ").lower()
            if retry != "y":
                break  
    df_transformed = pd.DataFrame(transformed_data, columns=[f'PC{i}' for i in range(1, n_components + 1)])
    return df_transformed

def main():
    df = load_dataset()

    if df is not None:
        dataset_types = ["NLP", "Vision", "Time Series", "Other"]
        print("Your dataset types:")
        for i, dtype in enumerate(dataset_types, 1):
            print(f"{i}. {dtype}")

        while True:
            dataset_type_input = input("Enter the number corresponding to the dataset type: ")
            if dataset_type_input.isdigit():
                dataset_type_index = int(dataset_type_input)
                if 1 <= dataset_type_index <= len(dataset_types):
                    dataset_type = dataset_types[dataset_type_index - 1].capitalize()
                    break
            print("Invalid input. Please enter a valid number.")

        exclude_option = input("Do you want to exclude any features? (y/n): ").lower()
        if exclude_option == "y":
            df = exclude_features(df)

        add_feature_option = input("Do you want to add new fields to the dataset? (y/n): ").lower()
        if add_feature_option == "y":
            add_feature(df)

        apply_imputation = input("Do you want to handle missing data? (y/n): ").lower()
        if apply_imputation == "y":
            df = handle_missing_data(df)

        apply_categorical_handling = input("Do you want to handle categorical data? (y/n): ").lower()
        if apply_categorical_handling == "y":
            df = handle_categorical_data(df)

        apply_scaling = input("Do you want to scale and normalize the data? (y/n): ").lower()
        if apply_scaling == "y":
            df = scaling_and_normalization(df)

        apply_outlier_handling = input("Do you want to handle outliers? (y/n): ").lower()
        if apply_outlier_handling == "y":
            df = handle_outliers(df)

        apply_skew_handling = input("Do you want to handle skewed distributions? (y/n): ").lower()
        if apply_skew_handling == "y":
            df = handle_skewed_distributions(df)

        if dataset_type_input == "3":
            handle_time_series_features(df)
        elif dataset_type_input == "1":
            handle_nlp_features(df)
        elif dataset_type_input == "2":
            handle_vision_features(df)

        
        apply_highly_correlated_features = input("Do you want to remove highly correlated features? (y/n): ").lower()
        if apply_highly_correlated_features == "y":
            df = handle_highly_correlated_features(df)

        apply_dimensionality_reduction = input("Do you want to apply dimensionality reduction? (y/n): ").lower()
        if apply_dimensionality_reduction == "y":
            df = handle_dimensionality_reduction(df)

        
        print("Feature-engineered dataset:")
        print(df.head())

        save_option = input("Do you want to save the feature-engineered dataset? (y/n): ").lower()
        if save_option == "y":
            output_path = "feature_engineered_dataset"
            df.to_excel(f"{output_path}.xlsx", index=False)
            df.to_csv(f"{output_path}.csv", index=False)
            print(f"Feature-engineered dataset saved as {output_path}.xlsx and {output_path}.csv")

            
if __name__ == "__main__":
    main()

Enter the path of your dataset:  book1.xlsx


         Feature  Value 1  Value 2  Value 3  Value 4       time
0  Feature 1 is         1        2        3        4      21345
1  Feature 2 am         2        3        4        5  324234243
2      Feature 3        3        4        5        6  324324243
3      Feature 4        4        5        6        7  234243243
Your dataset types:
1. NLP
2. Vision
3. Time Series
4. Other


Enter the number corresponding to the dataset type:  1
Do you want to exclude any features? (y/n):  y


All features in the dataset:
Index(['Feature', 'Value 1', 'Value 2', 'Value 3', 'Value 4', 'time'], dtype='object')


Enter a column to be excluded or 'n' to stop:  time
Enter a column to be excluded or 'n' to stop:  n



Dataset after excluding features:
         Feature  Value 1  Value 2  Value 3  Value 4
0  Feature 1 is         1        2        3        4
1  Feature 2 am         2        3        4        5
2      Feature 3        3        4        5        6
3      Feature 4        4        5        6        7


Do you want to add new fields to the dataset? (y/n):  n
Do you want to handle missing data? (y/n):  n
Do you want to handle categorical data? (y/n):  n
Do you want to scale and normalize the data? (y/n):  n
Do you want to handle outliers? (y/n):  n
Do you want to handle skewed distributions? (y/n):  n
Do you want to concatenate any columns for NLP processing? (y/n):  y


Available columns for concatenation:
Index(['Feature', 'Value 1', 'Value 2', 'Value 3', 'Value 4'], dtype='object')


Enter the names of columns to concatenate (comma-separated):  Feature,Value 1
Enter the name for the new concatenated column:  col



DataFrame after concatenation:
         Feature  Value 1  Value 2  Value 3  Value 4              col
0  Feature 1 is         1        2        3        4  Feature 1 is  1
1  Feature 2 am         2        3        4        5  Feature 2 am  2
2      Feature 3        3        4        5        6      Feature 3 3
3      Feature 4        4        5        6        7      Feature 4 4


Enter the name of the column containing NLP text data (you can use your concatenated column also):  col



Tokenized Text:
                tokens
0  [feature, 1, is, 1]
1  [feature, 2, am, 2]
2      [feature, 3, 3]
3      [feature, 4, 4]

Text after stop words removal:
            tokens
0  [feature, 1, 1]
1  [feature, 2, 2]
2  [feature, 3, 3]
3  [feature, 4, 4]

DataFrame with Tokenized Text:
         Feature  Value 1  Value 2  Value 3  Value 4              col   
0  Feature 1 is         1        2        3        4  Feature 1 is  1  \
1  Feature 2 am         2        3        4        5  Feature 2 am  2   
2      Feature 3        3        4        5        6      Feature 3 3   
3      Feature 4        4        5        6        7      Feature 4 4   

            tokens stopwords_removed_text  
0  [feature, 1, 1]            feature 1 1  
1  [feature, 2, 2]            feature 2 2  
2  [feature, 3, 3]            feature 3 3  
3  [feature, 4, 4]            feature 4 4  

TF-IDF DataFrame:
         am   feature        is
0  0.000000  0.462637  0.886548
1  0.886548  0.462637  0.000000
2  0.000

Do you want to remove highly correlated features? (y/n):  y


Highly correlated features removed: {'Value 4', 'Value 2', 'Value 3'}


Do you want to apply dimensionality reduction? (y/n):  n


Feature-engineered dataset:
         Feature  Value 1              col           tokens   
0  Feature 1 is         1  Feature 1 is  1  [feature, 1, 1]  \
1  Feature 2 am         2  Feature 2 am  2  [feature, 2, 2]   
2      Feature 3        3      Feature 3 3  [feature, 3, 3]   
3      Feature 4        4      Feature 4 4  [feature, 4, 4]   

  stopwords_removed_text  
0            feature 1 1  
1            feature 2 2  
2            feature 3 3  
3            feature 4 4  


Do you want to save the feature-engineered dataset? (y/n):  y


Feature-engineered dataset saved as feature_engineered_dataset.xlsx and feature_engineered_dataset.csv


In [37]:
# for Vision

Enter the path of your dataset:  book1.xlsx


         Feature  Value 1  Value 2  Value 3  Value 4       time
0  Feature 1 is         1        2        3        4      21345
1  Feature 2 am         2        3        4        5  324234243
2      Feature 3        3        4        5        6  324324243
3      Feature 4        4        5        6        7  234243243
Your dataset types:
1. NLP
2. Vision
3. Time Series
4. Other


Enter the number corresponding to the dataset type:  2
Do you want to exclude any features? (y/n):  n
Do you want to add new fields to the dataset? (y/n):  n
Do you want to handle missing data? (y/n):  n
Do you want to handle categorical data? (y/n):  n
Do you want to scale and normalize the data? (y/n):  n
Do you want to handle outliers? (y/n):  n
Do you want to handle skewed distributions? (y/n):  n


Handling vision features...converting numerical, resizing, converting grey, normalizing, applying PCA and saving


Do you want to process images and add them to the dataset? (y/n):  y
Enter the path of the image file:  Capture.PNG


Image processing and feature engineering completed.


Do you want to remove highly correlated features? (y/n):  n
Do you want to apply dimensionality reduction? (y/n):  n


Feature-engineered dataset:
         Feature  Value 1  Value 2  Value 3  Value 4       time   
0  Feature 1 is         1        2        3        4      21345  \
1  Feature 2 am         2        3        4        5  324234243   
2      Feature 3        3        4        5        6  324324243   
3      Feature 4        4        5        6        7  234243243   

                                    processed_images  
0  [[0.9411764705882353, 0.9411764705882353, 0.94...  
1  [[0.9411764705882353, 0.9411764705882353, 0.94...  
2  [[0.9411764705882353, 0.9411764705882353, 0.94...  
3  [[0.9411764705882353, 0.9411764705882353, 0.94...  


Do you want to save the feature-engineered dataset? (y/n):  y


Feature-engineered dataset saved as feature_engineered_dataset.xlsx and feature_engineered_dataset.csv


In [38]:
#Time Series

Enter the path of your dataset:  book1.xlsx


         Feature  Value 1  Value 2  Value 3  Value 4       time
0  Feature 1 is         1        2        3        4      21345
1  Feature 2 am         2        3        4        5  324234243
2      Feature 3        3        4        5        6  324324243
3      Feature 4        4        5        6        7  234243243
Your dataset types:
1. NLP
2. Vision
3. Time Series
4. Other


Enter the number corresponding to the dataset type:  3
Do you want to exclude any features? (y/n):  n
Do you want to add new fields to the dataset? (y/n):  n
Do you want to handle missing data? (y/n):  n
Do you want to handle categorical data? (y/n):  n
Do you want to scale and normalize the data? (y/n):  n
Do you want to handle outliers? (y/n):  n
Do you want to handle skewed distributions? (y/n):  n


Potential columns for date and time conversion:
  - Feature
  - Value 1
  - Value 2
  - Value 3
  - Value 4
  - time


Enter the name of a column to convert to date and time (or 'done' to finish):  time
Enter the name of a column to convert to date and time (or 'done' to finish):  done
Enter the date and time format (or 'auto' to use detected format):  auto


Column 'time' converted to datetime.


Do you want to remove highly correlated features? (y/n):  y


Highly correlated features removed: {'Value 4', 'Value 2', 'Value 3'}


Do you want to apply dimensionality reduction? (y/n):  n


Feature-engineered dataset:
         Feature  Value 1                          time
0  Feature 1 is         1 1970-01-01 00:00:00.000021345
1  Feature 2 am         2 1970-01-01 00:00:00.324234243
2      Feature 3        3 1970-01-01 00:00:00.324324243
3      Feature 4        4 1970-01-01 00:00:00.234243243


Do you want to save the feature-engineered dataset? (y/n):  y


Feature-engineered dataset saved as feature_engineered_dataset.xlsx and feature_engineered_dataset.csv


In [40]:
# others

Enter the path of your dataset:  book1.xlsx


         Feature  Value 1  Value 2  Value 3  Value 4       time
0  Feature 1 is         1        2        3        4      21345
1  Feature 2 am         2        3        4        5  324234243
2      Feature 3        3        4        5        6  324324243
3      Feature 4        4        5        6        7  234243243
Your dataset types:
1. NLP
2. Vision
3. Time Series
4. Other


Enter the number corresponding to the dataset type:  4
Do you want to exclude any features? (y/n):  y


All features in the dataset:
Index(['Feature', 'Value 1', 'Value 2', 'Value 3', 'Value 4', 'time'], dtype='object')


Enter a column to be excluded or 'n' to stop:  Value 3
Enter a column to be excluded or 'n' to stop:  n



Dataset after excluding features:
         Feature  Value 1  Value 2  Value 4       time
0  Feature 1 is         1        2        4      21345
1  Feature 2 am         2        3        5  324234243
2      Feature 3        3        4        6  324324243
3      Feature 4        4        5        7  234243243


Do you want to add new fields to the dataset? (y/n):  n
Do you want to handle missing data? (y/n):  y
Choose an imputation strategy for missing data (1. Mean for numeric, 2. Most frequent for non-numeric):  1
Do you want to handle categorical data? (y/n):  y
Choose a method for handling categorical data (1. Label Encoding, 2. One-Hot Encoding):  1
Do you want to scale and normalize the data? (y/n):  y
Choose a method for scaling and normalization (1. Standard Scaling, 2. Min-Max Scaling, 3. Robust Scaling):  1
Do you want to handle outliers? (y/n):  y
Do you want to handle skewed distributions? (y/n):  y


  result = getattr(ufunc, method)(*inputs, **kwargs)


Do you want to remove highly correlated features? (y/n):  y


Highly correlated features removed: {'Value 4', 'Value 2', 'time', 'Value 1'}


Do you want to apply dimensionality reduction? (y/n):  y
Choose dimensionality reduction method (1. PCA, 2. SVD):  2
Enter the number of components for Truncated SVD:  2


Error: Error: n_components=2 must be between 0 and min(n_samples, n_features)=1.


Do you want to retry entering the number of components? (y/n):  y
Choose dimensionality reduction method (1. PCA, 2. SVD):  1
Enter the number of components for PCA:  1


PCA applied with 1 components.
Feature-engineered dataset:
        PC1
0 -1.341641
1 -0.447214
2  0.447214
3  1.341641


Do you want to save the feature-engineered dataset? (y/n):  y


Feature-engineered dataset saved as feature_engineered_dataset.xlsx and feature_engineered_dataset.csv
