In [5]:
import numpy as np
import pandas as pd
from typing import Optional, List
from sklearn.preprocessing import OrdinalEncoder

class Preprocess():

    def __init__(self, data_path: str):

        self.data_path = data_path
        self.line = '--'*25
    
    def __print_data_info(self, df):
        
        print(self.line +  "Columns: \n")
        print(df.info())
        print(self.line + "Basic statistics: \n")
        print(df.describe())
    
    def _load_data(self):
        
        self.data = pd.read_csv(self.data_path)
        #self.__print_data_info(self.data)

    def _encode(self, column_name: str):
        enc = OrdinalEncoder()
        trans_data = enc.fit_transform(self.data[column_name].values.reshape(-1,1))

        self.data[f"enc_{column_name}"] = trans_data


    def _kelvin_to_celsius(self, column_name: str):
        
        self.data[column_name] = self.data[column_name].apply(lambda x: x - 273.15)
    
    def _slice_data(self, columns):

        return self.data[columns]

    
    def preprocess( self, 
                    columns: Optional[List[str]] = None, 
                    enc_columns: Optional[List[str]] = None, 
                    kelvin_columns: Optional[List[str]] = None
                ):

        self._load_data()

        if enc_columns is not None:
            for enc_column in enc_columns:
                self._encode(enc_column)
        
        if kelvin_columns is not None:
            for kelvin_column in kelvin_columns:
                self._kelvin_to_celsius(kelvin_column)

        if columns is not None:
            return self._slice_data(columns)
        else:
            return self._slice_data(list(self.data.columns.values))    




Unnamed: 0,UDI,Product ID,Type,Air temperature [K],Process temperature [K],Rotational speed [rpm],Torque [Nm],Tool wear [min],Machine failure,TWF,HDF,PWF,OSF,RNF,enc_Type,enc_Product ID
0,1,M14860,M,24.95,35.45,1551,42.8,0,0,0,0,0,0,0,2.0,7003.0
1,2,L47181,L,25.05,35.55,1408,46.3,3,0,0,0,0,0,0,1.0,1003.0
2,3,L47182,L,24.95,35.35,1498,49.4,5,0,0,0,0,0,0,1.0,1004.0
3,4,L47183,L,25.05,35.45,1433,39.5,7,0,0,0,0,0,0,1.0,1005.0
4,5,L47184,L,25.05,35.55,1408,40.0,9,0,0,0,0,0,0,1.0,1006.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,9996,M24855,M,25.65,35.25,1604,29.5,14,0,0,0,0,0,0,2.0,9997.0
9996,9997,H39410,H,25.75,35.25,1632,31.8,17,0,0,0,0,0,0,0.0,1001.0
9997,9998,M24857,M,25.85,35.45,1645,33.4,22,0,0,0,0,0,0,2.0,9998.0
9998,9999,H39412,H,25.85,35.55,1408,48.5,25,0,0,0,0,0,0,0.0,1002.0
