**MIT License**

Copyright (c) 2021 **Kunal Verma**

Permission is hereby granted, free of charge, to any person obtaining a copy
of this model and associated documentation files , to deal
in the development of classification models without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the files.

THE FILE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.


# Importing Necessary Libaries and Frameworks

In [None]:
# Importing Libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import tensorflow as tf
from sklearn.impute import SimpleImputer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report

# Loading the dataset (Training Dataset)

In [None]:
# Loading dataset
data = pd.read_csv('Training Data.csv')

# Visualizing the dataset

In [None]:
data

Unnamed: 0,clientid_cr,clmbuserid_cr,conversiontime_cr,imprid_cr,adslotdimid_cr,algo_cr,audiences_cr,clickbid_cr,geodimid_cr,ip_cr,...,cityGrpDimId_cr,siteClusterIds_cr,refClusterId_cr,paid_cr,spend_cr,attributionType_cr,conversionid_cr,optimize_on_cr,bundleId_cr,conversion_fraud
0,75694,31629cd0-0b34-460f-aa61-80c3234e1225-1sjfw,1.620000e+12,01649076-c77f-4d70-afc8-213535427147-1sjd4,321300,38.0,"8gw,o3b",2.869316,144,XXX.XXX.XXX.XXX,...,348007.0,,,,,,,,,True
1,75694,4c98f114-c34c-48f4-84d6-50fc8e9b6d65-10wgg,1.620000e+12,f84d83b9-b757-4f97-9813-34f8dcc57cc5-1sjfw,129239,38.0,"8kv,2vk,8gw,2vl",2.790000,144,XXX.XXX.XXX.XXX,...,348010.0,,,,,,,,,True
2,75694,cd92087a-54b0-4911-bc3d-1144a3c2babd-1sjlg,1.620000e+12,061edef6-3d9d-408b-884a-ad840e6930ce-10o78,321554,21.0,"8gw,o3b",2.790000,144,XXX.XXX.XXX.XXX,...,,,,,,,,,,True
3,75694,f79f3de7-5edb-4319-9549-117f47996066-10wrk,1.620000e+12,0a937239-9091-4bb7-b26b-37d4bb29e50b-10oa0,321300,38.0,"8gw,o3b",2.790000,144,XXX.XXX.XXX.XXX,...,348007.0,,,,,,,,,True
4,30431,01fd1a8a-c3f8-4b57-94ff-66318d0f63c7-1sjts,1.620000e+12,0d46d0d4-8fc6-4d95-aaa1-406354a8400c-1sjo8,355768,21.0,,,144,XXX.XXX.XXX.XXX,...,,,,1.0,275.0,1.0,,,,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
960,2139,41d7dab3-0ab7-4039-a936-2ceab9dbcbf7,1.620000e+12,a9371035-94c0-49a9-b807-70e472bb31a9-1sk24,321114,18.0,"8go,8gr,8k6,8h4,8gv,2vk,8gw,8iy,3si,2x6,nqv,2v...",2.000000,144,XXX.XXX.XXX.XXX,...,348005.0,128527333734333000,,,,,,,,False
961,70491,39511cf3-0358-4ae9-9fec-fe3a74e98a13,1.620000e+12,a955a5eb-1f0f-4654-8381-9f709bb98a7a-10ni8,334758,6.0,"2x6,8k6,8k8,30r,8gv,2vk,8gw,8j1",,144,XXX.XXX.XXX.XXX,...,,333734128527333000,,,,,,,,False
962,85572,e5d4037f-ec67-4d7b-8d83-9012ee7d64b9-1sjac,1.620000e+12,a97061e8-4863-440a-956c-14c85984c29e-10nw4,348089,1.0,,10.000000,144,XXX.XXX.XXX.XXX,...,,333507333503333000,,,,,,,,False
963,75694,c7491247-f203-48df-b08d-d25d0d873e20-1sjwk,1.620000e+12,a974732d-bfa0-4018-b216-b2d1c97f3337-1sj7k,356952,21.0,,2.790000,144,XXX.XXX.XXX.XXX,...,348008.0,,,,,,,,,False


# Featuring Engineering

# 1. Removing redudant columns in our dataset that are not required

In [None]:
# Removing redundant columns
data.drop(['clmbuserid_cr','sdkVersion_cr','imprid_cr','audiences_cr','ip_cr','refurl_cr','allAudiences_cr','conversionid_cr','bundleId_cr'],axis=1,inplace=True)
features = data.columns
len(features)

47

## Visualizing Updated Dataset

In [None]:
data

Unnamed: 0,clientid_cr,conversiontime_cr,adslotdimid_cr,algo_cr,clickbid_cr,geodimid_cr,itemcolumbiaid_cr,itemid_cr,position_cr,pubclientid_cr,...,platformId_cr,usrClusterId_cr,cityGrpDimId_cr,siteClusterIds_cr,refClusterId_cr,paid_cr,spend_cr,attributionType_cr,optimize_on_cr,conversion_fraud
0,75694,1.620000e+12,321300,38.0,2.869316,144,5488088,49504155,1.0,2320,...,,333495,348007.0,,,,,,,True
1,75694,1.620000e+12,129239,38.0,2.790000,144,5488773,49504157,1.0,15240,...,,,348010.0,,,,,,,True
2,75694,1.620000e+12,321554,21.0,2.790000,144,5488088,49504160,1.0,2320,...,,128491,,,,,,,,True
3,75694,1.620000e+12,321300,38.0,2.790000,144,5488088,49504155,1.0,2320,...,,232809333496,348007.0,,,,,,,True
4,30431,1.620000e+12,355768,21.0,,144,5614054,53607445,1.0,1,...,,,,,,1.0,275.0,1.0,,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
960,2139,1.620000e+12,321114,18.0,2.000000,144,5610343,53184745,1.0,16153,...,,,348005.0,128527333734333000,,,,,,False
961,70491,1.620000e+12,334758,6.0,,144,5584022,53924142,1.0,76969,...,,257716333501333000,,333734128527333000,,,,,,False
962,85572,1.620000e+12,348089,1.0,10.000000,144,5599296,52820331,1.0,15336,...,,,,333507333503333000,,,,,,False
963,75694,1.620000e+12,356952,21.0,2.790000,144,5488773,49504154,1.0,85030,...,,,348008.0,,,,,,,False


## 2. Dealing with redundant characters like ',', '.', 'abs2342', etc

In [None]:
# Dealing with redudant character like ',', etc
for col in features:
    try:
        data[col]=data[col].str.replace(",","").astype(float);
    except:
        pass

## Visualizing updated dataset

In [None]:
data

Unnamed: 0,clientid_cr,conversiontime_cr,adslotdimid_cr,algo_cr,clickbid_cr,geodimid_cr,itemcolumbiaid_cr,itemid_cr,position_cr,pubclientid_cr,...,platformId_cr,usrClusterId_cr,cityGrpDimId_cr,siteClusterIds_cr,refClusterId_cr,paid_cr,spend_cr,attributionType_cr,optimize_on_cr,conversion_fraud
0,75694,1.620000e+12,321300,38.0,2.869316,144,5488088,49504155,1.0,2320,...,,3.334950e+05,348007.0,,,,,,,True
1,75694,1.620000e+12,129239,38.0,2.790000,144,5488773,49504157,1.0,15240,...,,,348010.0,,,,,,,True
2,75694,1.620000e+12,321554,21.0,2.790000,144,5488088,49504160,1.0,2320,...,,1.284910e+05,,,,,,,,True
3,75694,1.620000e+12,321300,38.0,2.790000,144,5488088,49504155,1.0,2320,...,,2.328093e+11,348007.0,,,,,,,True
4,30431,1.620000e+12,355768,21.0,,144,5614054,53607445,1.0,1,...,,,,,,1.0,275.0,1.0,,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
960,2139,1.620000e+12,321114,18.0,2.000000,144,5610343,53184745,1.0,16153,...,,,348005.0,1.285273e+17,,,,,,False
961,70491,1.620000e+12,334758,6.0,,144,5584022,53924142,1.0,76969,...,,2.577163e+17,,3.337341e+17,,,,,,False
962,85572,1.620000e+12,348089,1.0,10.000000,144,5599296,52820331,1.0,15336,...,,,,3.335073e+17,,,,,,False
963,75694,1.620000e+12,356952,21.0,2.790000,144,5488773,49504154,1.0,85030,...,,,348008.0,,,,,,,False


## 3. Splitting the dataset in Features and Labels

In [None]:
# Splitting the dataset
X = data.iloc[:,:-1].values
y = data.iloc[:,-1].values
print("X shape -> ",X.shape)
print("y shape -> ", y.shape)

X shape ->  (965, 46)
y shape ->  (965,)


## 4. Dealing with Missing or invalid values in our dataset

In [None]:
# Dealing with missing data
imputer = SimpleImputer(missing_values=np.nan, strategy='mean', verbose=0)
X=imputer.fit_transform(X)

## Visualizing the updated dataset

In [None]:
pd.DataFrame(X)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,35,36,37,38,39,40,41,42,43,44
0,75694.0,1.620000e+12,321300.0,38.0,2.869316,144.0,5488088.0,49504155.0,1.0,2320.0,...,4.690162e+07,1.473214,3.334950e+05,348007.000000,3.128461e+17,282461.924242,1.0,526.572165,1.0,1.0
1,75694.0,1.620000e+12,129239.0,38.0,2.790000,144.0,5488773.0,49504157.0,1.0,15240.0,...,4.690162e+07,1.473214,1.143450e+17,348010.000000,3.128461e+17,282461.924242,1.0,526.572165,1.0,1.0
2,75694.0,1.620000e+12,321554.0,21.0,2.790000,144.0,5488088.0,49504160.0,1.0,2320.0,...,4.690162e+07,1.473214,1.284910e+05,348007.040258,3.128461e+17,282461.924242,1.0,526.572165,1.0,1.0
3,75694.0,1.620000e+12,321300.0,38.0,2.790000,144.0,5488088.0,49504155.0,1.0,2320.0,...,4.690162e+07,1.473214,2.328093e+11,348007.000000,3.128461e+17,282461.924242,1.0,526.572165,1.0,1.0
4,30431.0,1.620000e+12,355768.0,21.0,3.512760,144.0,5614054.0,53607445.0,1.0,1.0,...,4.690162e+07,1.473214,1.143450e+17,348007.040258,3.128461e+17,282461.924242,1.0,275.000000,1.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
960,2139.0,1.620000e+12,321114.0,18.0,2.000000,144.0,5610343.0,53184745.0,1.0,16153.0,...,4.690162e+07,1.473214,1.143450e+17,348005.000000,1.285273e+17,282461.924242,1.0,526.572165,1.0,1.0
961,70491.0,1.620000e+12,334758.0,6.0,3.512760,144.0,5584022.0,53924142.0,1.0,76969.0,...,4.690162e+07,1.473214,2.577163e+17,348007.040258,3.337341e+17,282461.924242,1.0,526.572165,1.0,1.0
962,85572.0,1.620000e+12,348089.0,1.0,10.000000,144.0,5599296.0,52820331.0,1.0,15336.0,...,4.690162e+07,1.473214,1.143450e+17,348007.040258,3.335073e+17,282461.924242,1.0,526.572165,1.0,1.0
963,75694.0,1.620000e+12,356952.0,21.0,2.790000,144.0,5488773.0,49504154.0,1.0,85030.0,...,4.690162e+07,1.473214,1.143450e+17,348008.000000,3.128461e+17,282461.924242,1.0,526.572165,1.0,1.0


## Splitting the dataset in training and testing sets so as to train the model and then evaluate it on test set

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
print("X train shape -> ",X_train.shape)
print("X test shape -> ",X_test.shape)
print("y train shape -> ",y_train.shape)
print("y test shape -> ",y_test.shape)

X train shape ->  (772, 45)
X test shape ->  (193, 45)
y train shape ->  (772,)
y test shape ->  (193,)


## 5. Featuring Scaling - Brining the values in our dataset in same range i.e., 0 to 1

In [None]:
# Feature Scaling
fs = StandardScaler()
X_train = fs.fit_transform(X_train)
X_test = fs.transform(X_test)

## 6. Coverting our labels to numerical values 
1 - True; 0 - False

In [None]:
# converting to label encoding in y_train and y_test
y_train = LabelEncoder().fit_transform(y_train)
y_test = LabelEncoder().fit_transform(y_test)

# Making our Model - Random Forest Classifier with 300 Estimators

In [None]:
# Classifier
classifier = RandomForestClassifier(n_estimators=300)
classifier.fit(X_train,y_train)

RandomForestClassifier(n_estimators=300)

## Getting the accuracy score

In [None]:

score=accuracy_score(y_test,classifier.predict(X_test))
score

0.9326424870466321

# Preparing the Submission csv file

## Reading the test dataset for predictions

In [None]:
# prediciting on test csv
test=pd.read_csv('Test Data.csv')

## Visulaizing our dataset

In [None]:
test

Unnamed: 0,record_id,clientid_cr,clmbuserid_cr,conversiontime_cr,imprid_cr,adslotdimid_cr,algo_cr,audiences_cr,clickbid_cr,geodimid_cr,...,usrClusterId_cr,cityGrpDimId_cr,siteClusterIds_cr,refClusterId_cr,paid_cr,spend_cr,attributionType_cr,conversionid_cr,optimize_on_cr,bundleId_cr
0,1000,82122,1045e8b8-662e-49b3-bc4a-4af5326c8236-62kw,1620000000000,864cdaf0-cca7-4bcc-b210-a33858cf8755-1siz8,233531,38,,,145,...,,,3.337341e+17,,,,,,,
1,1001,75694,53a9a707-967d-4a67-8e9f-e3dd55b464e8-10wm0,1620000000000,6c44c171-9445-4e6b-856e-89bfcd0be22c-1sito,356952,20,,3.10,144,...,,348006.0,,,,,,,,
2,1002,75694,53a9a707-967d-4a67-8e9f-e3dd55b464e8-10wm0,1620000000000,c00eae97-46c5-48b8-89bf-9d45628b9de3-1siz8,356952,18,,3.10,144,...,,348006.0,,,,,,,,
3,1003,30431,5febd7e2-b66a-4af0-91ec-117557684188-10wrk,1620000000000,40aad66c-d2da-4635-8b5e-46aca58d2dc6-10o78,355768,21,,,144,...,,348010.0,,,1.0,275.0,1.0,,,
4,1004,30431,5febd7e2-b66a-4af0-91ec-117557684188-10wrk,1620000000000,4742d072-32ed-45bc-9ca9-5dca4a69f5b1-10onw,355768,21,,,144,...,,348010.0,,,1.0,275.0,1.0,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
450,1450,75694,d7b5dfcb-b5ea-4862-8e47-5d775c568b00-6m6g,1620000000000,ff755b7d-00a4-426a-835c-ddf7f56af2b3-10o78,332783,20,"8kt,8j4,8k6,8kv,8j6,8k8,2vk,8gv,2vl,8gw,8iy,3s...",3.10,144,...,3.334963e+17,,3.337341e+17,,,,,,,
451,1451,75694,81b2e0f3-ba16-4f1d-ba54-cbb18ab54dca-1sjac,1620000000000,ff89a128-e0b9-4dcd-9a3f-257dff9e4dd5-10o78,129239,20,"8gq,8k6,8kv,8k8,2vk,8gv,2vl,8gw,2x6,2vv,30r,8g...",,144,...,3.334951e+17,348009.0,,,,,,,,
452,1452,75694,9453538d-3c8a-4e9b-aff3-85a37b722429-1sj4s,1620000000000,ff9b716c-4cd1-44a7-9c21-19f6a73f31a2-10nqk,129237,37,"8kv,2vk,8gw,2vl",3.10,144,...,,348010.0,,,,,,,,
453,1453,8895,8399fad4-a7de-49ec-9e58-d18530086e7d,1620000000000,ffc803da-ac63-4bf3-88fa-bd1d70bcdc39-10ntc,334760,2,"jof,8l5,jog,3si,jo3,nqv,jo8,8ke,8kf,8kg,jo9,7g...",,144,...,3.335000e+05,,3.337341e+17,,1.0,600.0,1.0,,,


# Performing some Feature Engineering same as above

In [None]:
test_col = test['record_id']
test.drop(['record_id','clmbuserid_cr','sdkVersion_cr','imprid_cr','audiences_cr','ip_cr','refurl_cr','allAudiences_cr','conversionid_cr','bundleId_cr'],axis=1,inplace=True)
features = data.columns
len(features)

47

In [None]:
test

Unnamed: 0,clientid_cr,conversiontime_cr,adslotdimid_cr,algo_cr,clickbid_cr,geodimid_cr,itemcolumbiaid_cr,itemid_cr,position_cr,pubclientid_cr,...,uv_cr,platformId_cr,usrClusterId_cr,cityGrpDimId_cr,siteClusterIds_cr,refClusterId_cr,paid_cr,spend_cr,attributionType_cr,optimize_on_cr
0,82122,1620000000000,233531,38,,145,5526656,53184285,3,2658,...,42860785.0,1.0,,,3.337341e+17,,,,,
1,75694,1620000000000,356952,20,3.10,144,5488773,49504162,1,85030,...,,,,348006.0,,,,,,
2,75694,1620000000000,356952,18,3.10,144,5488088,49504161,1,85030,...,,,,348006.0,,,,,,
3,30431,1620000000000,355768,21,,144,5614054,53607445,1,1,...,,,,348010.0,,,1.0,275.0,1.0,
4,30431,1620000000000,355768,21,,144,5614054,53607445,1,1,...,,,,348010.0,,,1.0,275.0,1.0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
450,75694,1620000000000,332783,20,3.10,144,5488773,49504155,181581299,2658,...,,,3.334963e+17,,3.337341e+17,,,,,
451,75694,1620000000000,129239,20,,144,5488773,49504157,1,15240,...,,1.0,3.334951e+17,348009.0,,,,,,
452,75694,1620000000000,129237,37,3.10,144,5488773,49504154,1,15240,...,,,,348010.0,,,,,,
453,8895,1620000000000,334760,2,,144,5601460,52820721,3,76969,...,,,3.335000e+05,,3.337341e+17,,1.0,600.0,1.0,


In [None]:
# Dealing with redudant character like ',', etc
for col in features:
    try:
        test[col]=test[col].str.replace(",","").astype(float);
    except:
        pass

## Visualizing the prepared dataset

In [None]:
test

Unnamed: 0,clientid_cr,conversiontime_cr,adslotdimid_cr,algo_cr,clickbid_cr,geodimid_cr,itemcolumbiaid_cr,itemid_cr,position_cr,pubclientid_cr,...,uv_cr,platformId_cr,usrClusterId_cr,cityGrpDimId_cr,siteClusterIds_cr,refClusterId_cr,paid_cr,spend_cr,attributionType_cr,optimize_on_cr
0,82122,1620000000000,233531,38,,145,5526656,53184285,3,2658,...,42860785.0,1.0,,,3.337341e+17,,,,,
1,75694,1620000000000,356952,20,3.10,144,5488773,49504162,1,85030,...,,,,348006.0,,,,,,
2,75694,1620000000000,356952,18,3.10,144,5488088,49504161,1,85030,...,,,,348006.0,,,,,,
3,30431,1620000000000,355768,21,,144,5614054,53607445,1,1,...,,,,348010.0,,,1.0,275.0,1.0,
4,30431,1620000000000,355768,21,,144,5614054,53607445,1,1,...,,,,348010.0,,,1.0,275.0,1.0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
450,75694,1620000000000,332783,20,3.10,144,5488773,49504155,181581299,2658,...,,,3.334963e+17,,3.337341e+17,,,,,
451,75694,1620000000000,129239,20,,144,5488773,49504157,1,15240,...,,1.0,3.334951e+17,348009.0,,,,,,
452,75694,1620000000000,129237,37,3.10,144,5488773,49504154,1,15240,...,,,,348010.0,,,,,,
453,8895,1620000000000,334760,2,,144,5601460,52820721,3,76969,...,,,3.335000e+05,,3.337341e+17,,1.0,600.0,1.0,


## Preparing the dataset as array to feed the model

In [None]:
# Preparing the test data
X_final_test = test.iloc[:,:].values

## Visualizing the test array

In [None]:
X_final_test

array([[82122, 1620000000000, 233531, ..., nan, nan, nan],
       [75694, 1620000000000, 356952, ..., nan, nan, nan],
       [75694, 1620000000000, 356952, ..., nan, nan, nan],
       ...,
       [75694, 1620000000000, 129237, ..., nan, nan, nan],
       [8895, 1620000000000, 334760, ..., 600.0, 1.0, nan],
       [75694, 1620000000000, 239188, ..., nan, nan, nan]], dtype=object)

In [None]:
# dealing missing values
imputer_test = SimpleImputer(missing_values=np.nan, strategy='mean', verbose=0)
X_final_test=imputer.fit_transform(X_final_test)
print("X_final_test ->",X_final_test.shape)

In [None]:
pd.DataFrame(X_final_test)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,35,36,37,38,39,40,41,42,43,44
0,82122.0,1.620000e+12,233531.0,38.0,3.657184,145.0,5526656.0,53184285.0,3.0,2658.0,...,42860785.0,1.000000,1.067699e+17,348007.062914,3.337341e+17,283094.857143,1.0,576.479481,1.0,1.0
1,75694.0,1.620000e+12,356952.0,20.0,3.100000,144.0,5488773.0,49504162.0,1.0,85030.0,...,46336182.0,1.486842,1.067699e+17,348006.000000,3.102418e+17,283094.857143,1.0,576.479481,1.0,1.0
2,75694.0,1.620000e+12,356952.0,18.0,3.100000,144.0,5488088.0,49504161.0,1.0,85030.0,...,46336182.0,1.486842,1.067699e+17,348006.000000,3.102418e+17,283094.857143,1.0,576.479481,1.0,1.0
3,30431.0,1.620000e+12,355768.0,21.0,3.657184,144.0,5614054.0,53607445.0,1.0,1.0,...,46336182.0,1.486842,1.067699e+17,348010.000000,3.102418e+17,283094.857143,1.0,275.000000,1.0,1.0
4,30431.0,1.620000e+12,355768.0,21.0,3.657184,144.0,5614054.0,53607445.0,1.0,1.0,...,46336182.0,1.486842,1.067699e+17,348010.000000,3.102418e+17,283094.857143,1.0,275.000000,1.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
450,75694.0,1.620000e+12,332783.0,20.0,3.100000,144.0,5488773.0,49504155.0,181581299.0,2658.0,...,46336182.0,1.486842,3.334963e+17,348007.062914,3.337341e+17,283094.857143,1.0,576.479481,1.0,1.0
451,75694.0,1.620000e+12,129239.0,20.0,3.657184,144.0,5488773.0,49504157.0,1.0,15240.0,...,46336182.0,1.000000,3.334951e+17,348009.000000,3.102418e+17,283094.857143,1.0,576.479481,1.0,1.0
452,75694.0,1.620000e+12,129237.0,37.0,3.100000,144.0,5488773.0,49504154.0,1.0,15240.0,...,46336182.0,1.486842,1.067699e+17,348010.000000,3.102418e+17,283094.857143,1.0,576.479481,1.0,1.0
453,8895.0,1.620000e+12,334760.0,2.0,3.657184,144.0,5601460.0,52820721.0,3.0,76969.0,...,46336182.0,1.486842,3.335000e+05,348007.062914,3.337341e+17,283094.857143,1.0,600.000000,1.0,1.0


In [None]:
# Feature Scaling
fst = StandardScaler()
X_final_test = fst.fit_transform(X_final_test)

# Making the predictions on test set

In [None]:
y_final_pred = classifier.predict(X_final_test)

# Visualizing the predictions as true and false

In [None]:
y_final_pred=y_final_pred.astype(bool)
pd.DataFrame(y_final_pred)

Unnamed: 0,0
0,False
1,False
2,False
3,False
4,False
...,...
450,False
451,False
452,True
453,False


## Preparing the columns in dataset as required by the problem statement

In [None]:
datafr={'record_id':test_col,
       'conversion_fraud':y_final_pred
       }
datafr = pd.DataFrame(datafr)

# Visualizing the prepared csv file

In [None]:
datafr

Unnamed: 0,record_id,conversion_fraud
0,1000,False
1,1001,False
2,1002,False
3,1003,False
4,1004,False
...,...,...
450,1450,False
451,1451,False
452,1452,True
453,1453,False


## saving submission.csv file at current path

In [None]:
datafr.to_csv("submission.csv",index=False)