In [1]:
import re
import csv
import shutil
import pickle

import numpy as np
import pandas as pd

from pathlib import Path

from sklearn.svm import SVC
from sklearn.impute import KNNImputer
from sklearn.decomposition import PCA
from imblearn.over_sampling import SMOTE
from sklearn.naive_bayes import GaussianNB
from xgboost import XGBClassifier, DMatrix
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import classification_report, recall_score

  from pandas import MultiIndex, Int64Index
  _numeric_index_types = (pd.Int64Index, pd.Float64Index, pd.UInt64Index)
  _numeric_index_types = (pd.Int64Index, pd.Float64Index, pd.UInt64Index)
  _numeric_index_types = (pd.Int64Index, pd.Float64Index, pd.UInt64Index)


In [2]:
np.random.seed(42)

In [3]:
TRAIN_DIRECTORY = True
TEST_DIRECTORY = False

In [4]:
class NoFileException(Exception):
    '''
        This exception is raised when no csv files are found in the directory.
    '''
    def __init__(self, dirName: str):
        self.message = f'No csv files found in "{dirName}" directory.'
        super().__init__(self.message)

In [5]:
class NoGoodFileException(Exception):
    '''
        This exception is raised when no good files are found in the directory.
    '''
    def __init__(self, dirName: str):
        self.message = f'No good files found in "{dirName}" directory.'
        super().__init__(self.message)

In [6]:
def validateFileName(filepath: str) -> bool:
    '''
        Validate Filename to be of correct format - Wafer_[8 digit date (ddmmyyyy)]_[6 digit time (hhmmss)].csv
        
        Inputs:
            filepath: str => Path of the file with filename at the end.
        
        Outputs:
            validName: bool => True if validation is successful, else False.
        
        Exceptions:
            TypeError => This exception is raised if the filepath is not of correct data type.
    '''
    
    if type(filepath) != str:
        raise TypeError('Invalid datatype of "filepath" parameter.')
    
    validName = False
    filename = filepath.split('\\')[-1]
    pattern = re.compile('wafer_[0-3][\d][01][\d][12][\d]{3}_[0-2][\d][0-5][\d][0-5][\d].csv')
    
    if pattern.match(filename.lower()):
        validName = True
    
    return validName

In [7]:
def validateColumns(filepath: str, trainDir: bool) -> bool:
    '''
        Validate if all the columns are proper in the given file based on directory the file is stored in.
        
        Inputs:
            filepath: str => Path of the file with filename at the end.
            trainDir: bool => True if the directory is train directory, False if it is test directory.
        
        Outputs:
            validColumnStructure: bool => True if file follows the desired column structure, else False.
            
        Exceptions:
            TypeError => This exception is raised if the filepath is not of correct data type.
    '''
    
    if type(filepath) != str:
        raise TypeError('Invalid datatype of "filepath" parameter.')
        
    validColumnStructure = False
    
    expectedColumnStructure = list()
    expectedColumnStructure.append('Unnamed: 0')
    
    for i in range(1, 591):
        expectedColumnStructure.append(f'Sensor-{i}')
        
    if trainDir:
        expectedColumnStructure[0] = ''
        expectedColumnStructure.append('Good/Bad')
    
    with open(filepath, newline='') as csvfile:
        reader = csv.reader(csvfile)
        headers = next(reader)
        if len(list(set(expectedColumnStructure).difference(set(headers)))) == 0:
            validColumnStructure = True
            
    return validColumnStructure

In [8]:
def identifyGoodFilesAndGetDir(dirName: str, trainDir: bool) -> str:
    '''
        Identify all the good csv files for the Wafer dataset based on filename and column structure as specified by client,\
        and stores the valid files in a new directory called goodTrainFiles or goodTestFiles (based on the trainDir parameter).
        
        Filename Validation:
            Wafer_[8 digit date (ddmmyyyy)]_[6 digit time (hhmmss)].csv
            
        Column Structure Validation:
            Train: Wafer Name, Sensor - 1, Sensor - 2, ..., Sensor - 590, Output
            Test: Wafer Name, Sensor - 1, Sensor - 2, ..., Sensor - 590
        
        Inputs:
            dirName: str => Name of the directory where the files are located which are to be identified
            trainDir: bool => Does the directory contain files used for training or testing
        
        Outputs:
            goodFilesDir: str => Relative Path of the folder containing the files identified as good files to folder of this file.
        
        Exceptions:
            NoFileException => This error is raised when no csv files are found in the directory
            NoGoodFileException => This error is raised when no good files are found in the directory
    '''
    
    goodFilesByName = list()
    all_csv_files = list([str(filename) for filename in Path(dirName).glob('*.csv')])
    
    if len(all_csv_files) == 0:
        raise NoFileException(dirName)
    
    for file in all_csv_files:
        if validateFileName(file):
            goodFilesByName.append(file)
    
    if len(goodFilesByName) == 0:
        raise NoGoodFileException(dirName)
    
    goodFiles = list()
    
    for file in goodFilesByName:
        if validateColumns(file, trainDir):
            goodFiles.append(file)
    
    if len(goodFiles) == 0:
        raise NoGoodFileException(dirName)
    
    goodFilesDir = 'Dataset/goodTrainFiles' if trainDir else 'Dataset/goodTestFiles'
    
    path = Path(goodFilesDir)
    if path.exists() and path.is_dir():
        shutil.rmtree(path)
    path.mkdir(parents=True)
    
    for filepath in goodFiles:
        shutil.copy(filepath, path)
        
    return goodFilesDir

In [9]:
goodTrainFilesPath = identifyGoodFilesAndGetDir('Dataset/Training_Batch_Files', TRAIN_DIRECTORY)
goodTestFilesPath = identifyGoodFilesAndGetDir('Dataset/Prediction_Batch_files', TEST_DIRECTORY)

In [10]:
column_names = list()
column_names.append('Wafer')
for i in range(1, 591):
    column_names.append(f'Sensor-{i}')
column_names.append('Output')

In [11]:
train_data = pd.DataFrame(columns=column_names)
for train_file in Path(goodTrainFilesPath).glob('*.csv'):
    df_new = pd.read_csv(str(train_file))
    df_new.rename(columns={'Unnamed: 0': 'Wafer', 'Good/Bad': 'Output'}, inplace=True)
    train_data = pd.concat([train_data, df_new], axis=0, ignore_index=True)

In [12]:
for col in train_data.columns:
    if 'sensor' in col.lower():
        train_data[col] = train_data[col].astype(np.float64)

In [13]:
train_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1537 entries, 0 to 1536
Columns: 592 entries, Wafer to Output
dtypes: float64(590), object(2)
memory usage: 6.9+ MB


In [14]:
train_data.describe()

Unnamed: 0,Sensor-1,Sensor-2,Sensor-3,Sensor-4,Sensor-5,Sensor-6,Sensor-7,Sensor-8,Sensor-9,Sensor-10,...,Sensor-581,Sensor-582,Sensor-583,Sensor-584,Sensor-585,Sensor-586,Sensor-587,Sensor-588,Sensor-589,Sensor-590
count,1531.0,1530.0,1523.0,1523.0,1523.0,1523.0,1523.0,1528.0,1535.0,1535.0,...,595.0,595.0,1536.0,1536.0,1536.0,1536.0,1537.0,1537.0,1537.0,1537.0
mean,3015.024193,2495.483771,2200.370187,1400.184797,4.255665,100.0,101.062428,0.121817,1.461905,-0.000798,...,0.005352,97.796315,0.500105,0.015029,0.003782,3.007635,0.021432,0.016414,0.005268,99.777881
std,73.908774,80.495576,29.46146,443.937101,56.906664,0.0,6.267841,0.009045,0.074078,0.01502,...,0.00311,88.689937,0.003357,0.0127,0.002733,2.641657,0.012398,0.008768,0.002865,94.560344
min,2743.24,2158.75,2060.66,0.0,0.6815,100.0,82.1311,0.0,1.191,-0.0534,...,0.001,0.0,0.4778,0.006,0.0017,1.1975,-0.0169,0.0032,0.001,0.0
25%,2967.465,2452.1825,2180.9666,1084.3779,1.0177,100.0,97.84,0.1211,1.4103,-0.01065,...,0.0033,45.833,0.4979,0.0116,0.0031,2.309525,0.0134,0.0106,0.0033,44.2355
50%,3012.09,2498.84,2200.9889,1287.3538,1.3168,100.0,101.4922,0.1224,1.4607,-0.0013,...,0.0046,71.5333,0.50015,0.0138,0.0036,2.75895,0.0207,0.0148,0.0046,71.5753
75%,3057.39,2538.5,2217.8667,1593.122,1.5291,100.0,104.52555,0.1238,1.5164,0.0083,...,0.0064,116.88855,0.502325,0.0165,0.0041,3.295575,0.0276,0.0203,0.0064,115.1005
max,3356.35,2846.44,2315.2667,3715.0417,1114.5366,100.0,129.2522,0.1286,1.6564,0.0749,...,0.0286,737.3048,0.5098,0.4714,0.1039,98.6628,0.1028,0.0799,0.0286,737.3048


In [15]:
train_data.dtypes

Wafer          object
Sensor-1      float64
Sensor-2      float64
Sensor-3      float64
Sensor-4      float64
               ...   
Sensor-587    float64
Sensor-588    float64
Sensor-589    float64
Sensor-590    float64
Output         object
Length: 592, dtype: object

In [16]:
train_data.head()

Unnamed: 0,Wafer,Sensor-1,Sensor-2,Sensor-3,Sensor-4,Sensor-5,Sensor-6,Sensor-7,Sensor-8,Sensor-9,...,Sensor-582,Sensor-583,Sensor-584,Sensor-585,Sensor-586,Sensor-587,Sensor-588,Sensor-589,Sensor-590,Output
0,Wafer-501,3076.81,2158.75,2208.2334,1517.0152,1.098,100.0,110.19,0.1247,1.4357,...,64.2405,0.5016,0.0152,0.004,3.0319,0.0465,0.0299,0.009,64.2405,-1
1,Wafer-502,2951.62,2511.92,2253.5111,1397.506,0.966,100.0,109.7611,0.121,1.5527,...,0.0,0.4953,0.0105,0.0037,2.1266,-0.0012,0.0252,0.0081,0.0,-1
2,Wafer-503,2930.42,2505.17,2235.0556,1302.6607,1.6347,100.0,109.9856,0.123,1.4588,...,,0.4958,0.0111,0.0033,2.2296,-0.0012,0.0252,0.0081,0.0,-1
3,Wafer-504,2997.28,2357.99,2141.0667,1236.5212,0.9698,100.0,98.3344,0.1238,1.5973,...,,0.4962,0.0086,0.0024,1.7297,-0.0012,0.0252,0.0081,0.0,-1
4,Wafer-505,3025.1,2475.18,2235.0556,1302.6607,1.6347,100.0,109.9856,0.123,1.5525,...,,0.4983,0.0159,0.0041,3.1927,-0.0012,0.0252,0.0081,0.0,-1


In [17]:
train_data.drop_duplicates(inplace=True)

In [18]:
train_data.drop(columns=['Wafer'], inplace=True)

In [19]:
train_data['Output'].replace({-1: 0}, inplace=True)

In [20]:
train_data.head()

Unnamed: 0,Sensor-1,Sensor-2,Sensor-3,Sensor-4,Sensor-5,Sensor-6,Sensor-7,Sensor-8,Sensor-9,Sensor-10,...,Sensor-582,Sensor-583,Sensor-584,Sensor-585,Sensor-586,Sensor-587,Sensor-588,Sensor-589,Sensor-590,Output
0,3076.81,2158.75,2208.2334,1517.0152,1.098,100.0,110.19,0.1247,1.4357,0.0089,...,64.2405,0.5016,0.0152,0.004,3.0319,0.0465,0.0299,0.009,64.2405,0
1,2951.62,2511.92,2253.5111,1397.506,0.966,100.0,109.7611,0.121,1.5527,0.0119,...,0.0,0.4953,0.0105,0.0037,2.1266,-0.0012,0.0252,0.0081,0.0,0
2,2930.42,2505.17,2235.0556,1302.6607,1.6347,100.0,109.9856,0.123,1.4588,-0.0143,...,,0.4958,0.0111,0.0033,2.2296,-0.0012,0.0252,0.0081,0.0,0
3,2997.28,2357.99,2141.0667,1236.5212,0.9698,100.0,98.3344,0.1238,1.5973,-0.0534,...,,0.4962,0.0086,0.0024,1.7297,-0.0012,0.0252,0.0081,0.0,0
4,3025.1,2475.18,2235.0556,1302.6607,1.6347,100.0,109.9856,0.123,1.5525,-0.0078,...,,0.4983,0.0159,0.0041,3.1927,-0.0012,0.0252,0.0081,0.0,0


In [21]:
X, y = train_data.drop(columns=['Output']), train_data['Output']

In [22]:
imputer = KNNImputer(n_neighbors=3)
X = imputer.fit_transform(X)

In [23]:
scaler = StandardScaler()
X = scaler.fit_transform(X)

In [24]:
X

array([[ 0.83928822, -4.19351661,  0.26440929, ...,  1.53856474,
         1.30324967, -0.37593928],
       [-0.85637125,  0.20439486,  1.80659926, ...,  1.00234775,
         0.98898396, -1.05552023],
       [-1.14351863,  0.1203393 ,  1.17799209, ...,  1.00234775,
         0.98898396, -1.05552023],
       ...,
       [ 0.95793969, -1.62789184, -0.74189694, ..., -0.092904  ,
        -0.30299731,  0.10390101],
       [ 0.5234261 , -0.47850988, -0.95156095, ..., -0.092904  ,
        -0.30299731,  0.10390101],
       [ 1.43349368, -1.23413824, -0.7986659 , ..., -0.092904  ,
        -0.30299731,  0.10390101]])

In [25]:
pca = PCA(n_components=0.95, svd_solver='full')
pca.fit(X)

PCA(n_components=0.95, svd_solver='full')

In [26]:
X_train_data = pca.transform(X)
pd.DataFrame(X_train_data, columns=[f'PC-{i+1}' for i in range(pca.n_components_)]).head()

Unnamed: 0,PC-1,PC-2,PC-3,PC-4,PC-5,PC-6,PC-7,PC-8,PC-9,PC-10,...,PC-161,PC-162,PC-163,PC-164,PC-165,PC-166,PC-167,PC-168,PC-169,PC-170
0,-0.481504,1.915886,4.25694,-3.690994,1.874274,0.115925,0.494321,2.077702,-3.558232,-3.511659,...,-0.130654,0.105103,-0.596525,0.194341,-1.082136,1.073974,-1.001467,-0.369822,1.090352,1.63172
1,0.320273,2.695612,4.916052,-4.068747,0.63819,0.277909,-0.021831,0.848662,0.447673,-2.278988,...,-0.737617,0.396714,-0.96048,0.757489,1.456428,0.174036,0.175157,-0.046467,1.447847,0.195213
2,0.475491,-0.539261,1.739185,-1.546155,0.32793,0.35428,0.354387,-1.060128,-1.629293,-6.134755,...,0.041494,0.392431,0.253402,0.258477,0.395288,0.472977,1.216162,-0.374405,0.31112,-0.619863
3,1.365005,2.299948,6.871609,-4.206412,2.253421,2.58744,-1.753602,3.233079,-4.263065,-3.643338,...,-1.770405,0.155535,0.17549,-0.264693,-0.38243,0.001612,-0.909024,-0.111799,0.047485,2.087767
4,0.786307,0.570597,2.717501,-2.443015,0.893007,1.906021,1.049044,0.84891,-2.083136,-3.01712,...,0.567324,0.753729,-0.542781,0.973731,-1.346377,-1.01479,-0.410657,0.189903,-0.155378,-0.102774


In [27]:
list(y).count(0), list(y).count(1)

(1448, 89)

In [28]:
sm = SMOTE()
X_data, y_data = sm.fit_resample(X_train_data, y)

In [29]:
classifiers = {
    'lr': LogisticRegression(max_iter=250),
    'nb': GaussianNB(),
    'knn': KNeighborsClassifier(),
    'rfc': RandomForestClassifier(),
    'dtc': DecisionTreeClassifier(),
    'svc': SVC(),
}

xgbc_params = {"objective":"binary:logistic",'colsample_bytree': 0.3,'learning_rate': 0.1,
                'max_depth': 5, 'alpha': 10}
xgc = XGBClassifier(**xgbc_params)

In [30]:
kfold = StratifiedKFold(n_splits=5, shuffle=True)
clf_reports = {}
for clf_name in classifiers:
    model = classifiers[clf_name]
    clf_reports[clf_name] = list()
    for train_index, test_index in kfold.split(X_data, y_data):
        X_train, X_test, y_train, y_test = X_data[train_index], X_data[test_index], y_data[train_index], y_data[test_index]
        model.fit(X_train, y_train)
        y_preds = model.predict(X_test)
        clf_reports[clf_name] = classification_report(y_test, y_preds, target_names=['Bad Wafer', 'Good Wafer'], zero_division=0)

In [31]:
clf_reports['xgc'] = list()
for train_index, test_index in kfold.split(X_data, y_data):
    X_train, X_test, y_train, y_test = X_data[train_index], X_data[test_index], y_data[train_index], y_data[test_index]
    xgc.fit(X_train, y_train)
    y_preds = model.predict(X_test)
    clf_reports['xgc'] = classification_report(y_test, y_preds, target_names=['Bad Wafer', 'Good Wafer'], zero_division=0)

In [32]:
for clf_name in clf_reports:
    print('\nModel Name:', clf_name)
    print('Classification Report:')
    print(clf_reports[clf_name])


Model Name: lr
Classification Report:
              precision    recall  f1-score   support

   Bad Wafer       0.96      0.85      0.90       289
  Good Wafer       0.87      0.96      0.91       290

    accuracy                           0.91       579
   macro avg       0.91      0.91      0.91       579
weighted avg       0.91      0.91      0.91       579


Model Name: nb
Classification Report:
              precision    recall  f1-score   support

   Bad Wafer       0.81      0.74      0.78       289
  Good Wafer       0.76      0.83      0.80       290

    accuracy                           0.79       579
   macro avg       0.79      0.79      0.79       579
weighted avg       0.79      0.79      0.79       579


Model Name: knn
Classification Report:
              precision    recall  f1-score   support

   Bad Wafer       1.00      0.30      0.47       289
  Good Wafer       0.59      1.00      0.74       290

    accuracy                           0.65       579
   macro a

In [33]:
final_model = xgc
final_model.fit(X_data, y_data)
pickle.dump(imputer, open('imputer.pkl', 'wb'))
pickle.dump(scaler, open('scaler.pkl', 'wb'))
pickle.dump(pca, open('pca.pkl', 'wb'))
pickle.dump(final_model, open('classifier.pkl', 'wb'))

In [34]:
def predict(X: list) -> int:
    imputer = pickle.load(open('imputer.pkl', 'rb'))
    scaler = pickle.load(open('scaler.pkl', 'rb'))
    pca = pickle.load(open('pca.pkl', 'rb'))
    classifier = pickle.load(open('classifier.pkl', 'rb'))
    X = imputer.transform(X)
    X = scaler.transform(X)
    X = pca.transform(X)
    y_pred = classifier.predict(X)
    return y_pred

In [35]:
X=[2940.65,None,2214.0556,1150.7775,1.3772,100,102.9389,0.1205,1.4978,0.0221,-0.0055,0.9709,201.9724,0,7.7634,410.5165,10.3643,0.9803,191.6082,12.4578,1.3924,-5511.75,2740.25,-5078.5,783.75,1.3348,1.9835,7.3065,61.5778,3.1556,0.2334,3.407,86.3416,9.219,50.5347,64.0969,49.4653,65.8265,86.336,117.4196,77.9,2.163,70,364.1273,9.8895,140.2436,747.4044,1.239,141.7336,1,646.1045,201.7344,0,4.561,4.767,2897,0.9266,0.9567,4.6391,0.7582,363.3782,10.2557,121.0018,17.7409,19.2418,28.0488,719.3555,1.146,143.9836,1,628.3636,100.6331,162.7636,465.6001,0,0.0169,-0.0411,-0.0219,-0.0747,0.0147,-0.0985,-0.0169,-0.0357,7.1093,0.1288,None,2.4099,0.9904,1844.4508,0.1604,8340.3199,-0.0438,0.0014,0.0006,-0.0001,0,0.1544,0,-0.2459,0.0288,-0.0001,-0.0001,0.0291,-0.0091,-0.0004,-0.0014,-0.0006,-0.0232,-0.1153,None,None,None,0.4686,0.9423,0,728.1562,0.9909,58.4142,0.5969,0.9781,6.2949,15.77,2.632,15.83,15.83,0.9744,2.758,0.4541,3.341,0.7569,0.823,0.9962,2.3834,1011.6333,36.6674,148,105.7,None,48.5999,378.13,0.1789,0,5.45,0.0032,0.1373,0.0501,0.0516,0.0087,7.0638,0,5.004,13.084,0.966,0.0044,6.0979,0.3,0.0329,None,None,958,267,4048,716,0.241,0.172,0.473,1.3,1.2,0.146,0.341,0.7865,0.1347,0.3375,0.5479,0.3375,0.7062,0.1764,0.224,0,0,18.65,0.57,12.53,16.048,0.2052,7.02,0,7.74,57.628,0,0,0,0,0,0,0.104,7.26,21.91,0.611,8.08,13.27,7.26,10.354,20.864,0.0864,10.2,0,13.27,91.444,0,0.0917,0.0356,0.0364,0.0758,0.0749,0.1135,0.0862,0.062,2.4393,0.0037,None,0.073,0.0004,91.7754,0.0294,1358.2998,0,0.0112,0.0203,0,0,0,0,0,0,0,0,0,0.0056,0.0044,0,0,0,0,None,None,None,0.0413,0.0305,0,108.0401,0.0006,2.4242,0.0448,0.0042,0.6271,0,0,0,0,0,0,0,0,0,0,0,0.0795,12.64,2.7454,26.2466,33.0717,None,18.3432,112.2522,0.0568,0,1.8472,0.0008,0.0414,0.0144,0.0146,0.0029,2.3786,0,1.5159,3.8755,0.2832,0.0016,2.1196,0.0755,0.0106,None,None,397.4455,118.317,1907.1047,336.6307,0.1146,0.0755,0.1975,0.5286,0.4065,0.0506,0.1327,0.2922,0.049,0.1403,0.2009,0.1403,0.2828,0.0687,0.1006,0,0,0,5.6836,0.1712,3.7945,4.5966,0.0586,1.7857,0,2.3346,15.2324,0,0,0,0,0,0,0.0246,2.2152,6.9346,0.2056,2.4493,8.307,2.2152,3.3317,6.6205,0.0308,3.077,0,4.1144,29.0455,3.3662,2.42,0,0.0273,0.0168,0.0153,0.0312,0.0236,0.0586,0.0423,0.0289,0.8269,0.001,None,0.0251,0.0001,30.787,0.0086,450.0402,0,0.0028,0.005,0.0031,0.0024,0,0,0,0,0,0,0,0.0018,0.0016,0,0,0,0,None,None,None,0.0078,0.0106,0,39.8244,0.0002,0.6498,0.0135,0.0014,0.2015,0,0,0,0,0,0,0,0,0,0,0,0.0286,4.5224,1.1,6.6371,3.5944,None,2.1951,32.8587,12.9901,0,5.2944,2.655,9.1669,226.6968,941.3911,0.894,3.4974,0,64.4566,3.1872,9.3205,0.451,3.1825,2.4081,2.3657,17.381,9.7436,79.7086,91.3557,18.0558,8.6715,6.4737,2.1112,38.0282,62.5416,10.0088,0.9109,1.4607,0.6679,0.8548,0.6823,1.0728,0.2043,0.1908,0,0,0,5.1218,5.7637,8.9345,2.1472,16.5587,4.953,0,1.1979,28.5663,0,0,0,0,0,0,2.2418,957.554,6.0295,5.9576,6.6776,74.799,37.7303,36.9142,2.9004,7.5369,7.0841,0,2.1118,90.8687,0,542.2025,86.6707,166.0205,101.4047,509.0909,115.2284,510.0592,173.5479,34.3113,2.8719,None,3.0291,0.0409,4.9758,18.3326,16.2859,0,814.5455,0,0,0,0,0,0,0,0,0,0,61.3699,0,0,0,0,0,None,None,None,8.8137,3.2315,0,14.8375,0.0575,4.1501,7.513,0.4293,9.9618,0,0,0,0,0,0,0,0,0,0,0,3.3361,1.2495,7.4873,0.1096,0.0078,0.0026,7.116,1.6092,404.422,75.108,0.4014,22.96,0.38,0.1503,10.6759,0.1564,24.9458,5.6772,0.5059,0.9403,0.2835,0.0534,30.1487,249.378,0.858,9.8,0.119,3.473,0.0527,3.9298,13.865,529.0654,2.4848,6.89,0.5791,2.3555,0.1673,1.3023,23.3076,0.0118,0.0098,0.0031,83.1192,0.5038,0.0188,0.004,3.7356,0.0118,0.0098,0.0031,83.1192]
predict([X])

array([1], dtype=int64)