In [38]:
import os
import json
import pickle
import warnings
import datetime
import pandas as pd
import numpy as np
from tqdm import tqdm
from itertools import combinations, product

warnings.filterwarnings("ignore", category=FutureWarning)

In [39]:
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from catboost import CatBoostClassifier

In [40]:
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import StackingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.multioutput import MultiOutputClassifier
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.base import BaseEstimator, ClassifierMixin

In [41]:
import tensorflow as tf
from keras.models import Sequential
from keras.utils import to_categorical
from keras.layers import Conv1D, LSTM, Dense, Flatten
from keras.callbacks import (
    EarlyStopping,
    ModelCheckpoint,
    ReduceLROnPlateau,
    TensorBoard,
)

In [42]:
print("tensorflow version:", tf.__version__)
physicalDevices = tf.config.list_physical_devices("GPU")
print(physicalDevices)

if len(physicalDevices) > 0:
    tf.config.experimental.set_memory_growth(physicalDevices[0], True)

tensorflow version: 2.10.1
[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


In [43]:
ModelDir = "./Models/" + datetime.datetime.now().strftime("%Y%m%d-%H") + "/"
if(not os.path.exists("./Models/")):
    os.mkdir("./Models/")
if not os.path.exists(ModelDir):
    os.mkdir(ModelDir)

In [44]:
%reload_ext tensorboard
modelPath = ModelDir + "model1.sav"
logsDir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H")
tensorboardCBK = TensorBoard(log_dir=logsDir, histogram_freq=1)
earlyStoppingCBK = EarlyStopping(
    monitor='val_loss', patience=10, verbose=0, mode='min')
modelCBK = ModelCheckpoint(
    modelPath+'.mcp.hdf5', save_best_only=True, monitor='val_loss', mode='min')
reduceLRPlateauCBK = ReduceLROnPlateau(
    monitor='val_loss', factor=0.1, patience=7, verbose=1, mode='min')
callbacks = [earlyStoppingCBK, 
             reduceLRPlateauCBK, tensorboardCBK]


In [45]:
# from metadata import BICYCLEMETADATA

In [46]:
class BicycleDataset:
    def __init__(
        self,
        bicycleFolderPath: str,
        metaDataFilepath: str,
        columnsRetain: list = ["day", "Total"],
    ):
        self.folderPath = bicycleFolderPath
        self.columnsRetain = columnsRetain
        self.metaData = json.load(open(metaDataFilepath, "r"))["BICYCLEMETADATA"]
        self.directionsMapping = {
            "NorthBound": 1,
            "SouthBound": 2,
            "WestBound": 3,
            "EastBound": 4,
        }
        self.bicycleDataFrame = self.LoadDataSet()

    def DropColumns(
        self,
        dataFrame: pd.DataFrame,
        renameColumns: dict,
    ):
        dataFrame = dataFrame[self.columnsRetain]
        dataFrame = dataFrame.rename(columns=renameColumns)
        return dataFrame

    def ConvertDaytoDateTime(self, dataFrame: pd.DataFrame):
        dataFrame["day"] = pd.to_datetime(dataFrame["day"])
        dataFrame = dataFrame.sort_values(by="day")
        return dataFrame

    def Get1HrIntervals(self, dataFrame: pd.DataFrame, columnName: str):
        dataFrame = dataFrame.resample("1H", on=columnName).sum().reset_index()
        return dataFrame

    def ConcatDataFrames(self, dataFrames: list):
        dataFrame = pd.concat(dataFrames, axis=1)
        retainColumns = ~dataFrame.columns.duplicated()
        dataFrame = dataFrame.loc[:, retainColumns]
        return dataFrame

    def FindBestDirections(self, row: np.ndarray):
        maxValue = row.max()
        return [
            self.directionsMapping[direction]
            for direction in row.index
            if row[direction] == maxValue
        ]

    def LoadDataSet(self):
        if os.path.isdir(self.folderPath) and self.folderPath[-1] != "/":
            print("enter a valid folderPath")
        else:
            bicycleDataFrame = None
            for data in self.metaData:
                print("Reading DataSet from", data["filename"])
                dataFrame = pd.read_csv(
                    self.folderPath + data["filename"], index_col=None, header=0
                )
                dataFrame = self.ConvertDaytoDateTime(dataFrame)
                dataFrame = self.Get1HrIntervals(dataFrame, "day")
                dataFrame = self.DropColumns(
                    dataFrame, renameColumns=data["renameColumns"]
                )
                dataFrame["Zipcode"] = data["Zipcode"]
                DFColumns = list(dataFrame.columns)
                columnsRearrange = [DFColumns[0], DFColumns[-1]] + DFColumns[1:-1]
                dataFrame = dataFrame[columnsRearrange]
                if type(bicycleDataFrame) == type(None):
                    bicycleDataFrame = dataFrame
                else:
                    bicycleDataFrame = pd.merge(
                        bicycleDataFrame, dataFrame, on=["day", "Zipcode"], how="outer"
                    )

        bicycleDataFrame = bicycleDataFrame.dropna()
        bicycleDataFrame["day"] = pd.to_datetime(bicycleDataFrame["day"])
        # bicycleDataFrame["EastBound"] = 0
        bicycleDataFrame["BestDirections"] = bicycleDataFrame[
            ["NorthBound", "SouthBound", "WestBound"]
        ].apply(self.FindBestDirections, axis=1)
        return bicycleDataFrame

In [47]:
bicycleDatasetFolderPath = "Dataset/Bicycle Dataset/"
bicycleMetaDataFilepath = "Dataset/Bicycle Dataset/metadata/metadata.json"
bicycleData = BicycleDataset(bicycleDatasetFolderPath, bicycleMetaDataFilepath)
bicycleDataFrame = bicycleData.bicycleDataFrame
print("bicycleDataFrame Shape", bicycleDataFrame.shape)
bicycleDataFrame.head()

Reading DataSet from Colorado_and_30th_Northbound.csv
Reading DataSet from Colorado_and_30th_Southbound.csv
Reading DataSet from Colorado_and_30th_Westbound.csv
bicycleDataFrame Shape (55032, 6)


Unnamed: 0,day,Zipcode,NorthBound,SouthBound,WestBound,BestDirections
0,2017-08-01 00:00:00,80309,1,2.0,2.0,"[2, 3]"
1,2017-08-01 01:00:00,80309,0,1.0,0.0,[2]
2,2017-08-01 02:00:00,80309,1,0.0,0.0,[1]
3,2017-08-01 03:00:00,80309,0,0.0,0.0,"[1, 2, 3]"
4,2017-08-01 04:00:00,80309,0,0.0,0.0,"[1, 2, 3]"


In [48]:
class WeatherDataset:
    def __init__(self, weatherDatasetFolderPath):
        self.folderPath = weatherDatasetFolderPath
        self.replaceDirection = {
            "ESE": "E",
            "SSE": "S",
            "WSW": "W",
            "NNE": "N",
            "ENE": "E",
            "NNE": "N",
            "SSW": "S",
            "WNW": "W",
            "NNW": "N",
        }
        self.windDirectionEncoder = None
        self.climateEncoder = None
        self.weatherDataFrame = self.LoadDataSet()

    def LoadDataSet(self):
        weatherDataFrame = []
        fileList = os.listdir(self.folderPath)
        for fileName in tqdm(fileList):
            with open(self.folderPath + fileName, "r") as jsonFile:
                fileData = json.load(jsonFile)
            for date, weather in fileData.items():
                fileData = {}
                fileData["day"] = date
                for key, value in weather.items():
                    fileData[key] = value
                weatherDataFrame.append(fileData)

        weatherDataFrame = pd.DataFrame(weatherDataFrame)
        weatherDataFrame["Zipcode"] = 80309
        weatherDataFrame["day"] = pd.to_datetime(weatherDataFrame["day"])
        weatherDataFrame = self.PreprocessDataset(weatherDataFrame)
        return weatherDataFrame

    def PreprocessDataset(self, weatherDataFrame: pd.DataFrame):
        weatherDataFrame.replace(self.replaceDirection, inplace=True)
        windDirectionColumns = [
            columnName
            for columnName in weatherDataFrame.columns
            if columnName.__contains__("windDir")
        ]

        if self.windDirectionEncoder == None:
            self.windDirectionEncoder = LabelEncoder()
            uniquewindDirectionValues = []
            for column in windDirectionColumns:
                uniquewindDirectionValues += list(weatherDataFrame[column].unique())
            self.windDirectionEncoder = self.windDirectionEncoder.fit(
                uniquewindDirectionValues
            )

        for column in windDirectionColumns:
            weatherDataFrame[column] = self.windDirectionEncoder.transform(
                weatherDataFrame[column]
            )

        climateColumns = [
            columnName
            for columnName in weatherDataFrame.columns
            if columnName.__contains__("weather")
        ]

        if self.climateEncoder == None:
            self.climateEncoder = LabelEncoder()
            uniqueClimateValues = []
            for column in climateColumns:
                uniqueClimateValues += list(weatherDataFrame[column].unique())
            self.climateEncoder = self.climateEncoder.fit(uniqueClimateValues)

        for column in climateColumns:
            weatherDataFrame[column] = self.climateEncoder.transform(
                weatherDataFrame[column]
            )

        return weatherDataFrame

In [49]:
weatherDatasetFolderPath = "Dataset/Weather Dataset/JsonFiles/"
weatherData = WeatherDataset(weatherDatasetFolderPath)
weatherDataFrame = weatherData.weatherDataFrame
print("weatherDataFrame Shape", weatherDataFrame.shape)
weatherDataFrame.head()

100%|██████████| 2293/2293 [00:10<00:00, 217.79it/s]


weatherDataFrame Shape (55025, 27)


Unnamed: 0,day,tempF_North,dewpointF_North,windSpeedMPH_North,windDir_North,weather_North,tempF_East,dewpointF_East,windSpeedMPH_East,windDir_East,...,dewpointF_West,windSpeedMPH_West,windDir_West,weather_West,tempF_currentLocation,dewpointF_currentLocation,windSpeedMPH_currentLocation,windDir_currentLocation,weather_currentLocation,Zipcode
0,2017-08-01 00:00:00,62.88,50.56,7.67,0,3,63.39,53.28,3.22,0,...,47.78,10.16,0,3,70.38,51.6,4.13,0,3,80309
1,2017-08-01 01:00:00,63.18,50.47,3.67,4,3,63.19,53.91,4.99,7,...,46.34,4.19,5,3,69.64,50.92,5.61,4,3,80309
2,2017-08-01 02:00:00,63.78,50.73,4.47,7,3,61.2,52.92,8.28,7,...,46.02,2.72,6,3,68.89,51.03,4.83,7,3,80309
3,2017-08-01 03:00:00,62.93,50.1,5.41,7,3,60.33,53.55,9.31,7,...,46.13,4.13,7,3,67.74,50.2,6.22,7,3,80309
4,2017-08-01 04:00:00,62.8,49.56,5.08,7,3,59.31,53.39,8.46,7,...,44.44,4.26,7,20,67.51,49.98,4.8,7,3,80309


In [50]:
finalDataFrame = pd.merge(
    bicycleDataFrame, weatherDataFrame, on=["day", "Zipcode"], how="outer"
)

In [51]:
finalDataFrame = finalDataFrame.dropna()
finalDataFrame = finalDataFrame.drop(columns=["day", "Zipcode"])
finalDataFrame.columns

Index(['NorthBound', 'SouthBound', 'WestBound', 'BestDirections',
       'tempF_North', 'dewpointF_North', 'windSpeedMPH_North', 'windDir_North',
       'weather_North', 'tempF_East', 'dewpointF_East', 'windSpeedMPH_East',
       'windDir_East', 'weather_East', 'tempF_South', 'dewpointF_South',
       'windSpeedMPH_South', 'windDir_South', 'weather_South', 'tempF_West',
       'dewpointF_West', 'windSpeedMPH_West', 'windDir_West', 'weather_West',
       'tempF_currentLocation', 'dewpointF_currentLocation',
       'windSpeedMPH_currentLocation', 'windDir_currentLocation',
       'weather_currentLocation'],
      dtype='object')

In [52]:
finalDataFrame.head()

Unnamed: 0,NorthBound,SouthBound,WestBound,BestDirections,tempF_North,dewpointF_North,windSpeedMPH_North,windDir_North,weather_North,tempF_East,...,tempF_West,dewpointF_West,windSpeedMPH_West,windDir_West,weather_West,tempF_currentLocation,dewpointF_currentLocation,windSpeedMPH_currentLocation,windDir_currentLocation,weather_currentLocation
0,1,2.0,2.0,"[2, 3]",62.88,50.56,7.67,0.0,3.0,63.39,...,57.04,47.78,10.16,0.0,3.0,70.38,51.6,4.13,0.0,3.0
1,0,1.0,0.0,[2],63.18,50.47,3.67,4.0,3.0,63.19,...,54.81,46.34,4.19,5.0,3.0,69.64,50.92,5.61,4.0,3.0
2,1,0.0,0.0,[1],63.78,50.73,4.47,7.0,3.0,61.2,...,53.95,46.02,2.72,6.0,3.0,68.89,51.03,4.83,7.0,3.0
3,0,0.0,0.0,"[1, 2, 3]",62.93,50.1,5.41,7.0,3.0,60.33,...,54.05,46.13,4.13,7.0,3.0,67.74,50.2,6.22,7.0,3.0
4,0,0.0,0.0,"[1, 2, 3]",62.8,49.56,5.08,7.0,3.0,59.31,...,53.18,44.44,4.26,7.0,20.0,67.51,49.98,4.8,7.0,3.0


In [53]:
y = finalDataFrame["BestDirections"]
finalDataFrame = finalDataFrame.drop(
    columns=["NorthBound", "SouthBound", "WestBound", "BestDirections"]
)

In [54]:
MlBinarizer = MultiLabelBinarizer()
MlBinarizer = MlBinarizer.fit(y)
y = MlBinarizer.transform(y)

In [55]:
X_train, X_test, y_train, y_test = train_test_split(
    finalDataFrame, y, test_size=0.2, random_state=42
)

In [56]:
print("X_train shape", X_train.shape)
print("X_test shape", X_test.shape)
print("y_train shape", y_train.shape)
print("y_test shape", y_test.shape)

X_train shape (44020, 25)
X_test shape (11005, 25)
y_train shape (44020, 3)
y_test shape (11005, 3)


In [57]:
X_train = X_train.to_numpy()
X_test = X_test.to_numpy()

In [58]:
def CreateXGBClassifier(
    parameters: dict = {"tree_method": "hist", "device": "cuda", "verbosity": 1}
):
    XGBModel = XGBClassifier(**parameters)
    return XGBModel

In [59]:
def CreateLGBClassifier(parameters: dict = {"device": "gpu", "verbosity": 1}):
    LGBModel = LGBMClassifier(**parameters)
    return LGBModel

In [60]:
def CreateCBClassifier(
    parameters: dict = {
        "task_type": "GPU",
        "devices": "0:1",
        "verbose": 1,
        "iterations": 100,
    }
):
    CBModel = CatBoostClassifier(**parameters)
    return CBModel

In [61]:
def CreateLRClassifier(
    parameters: dict = {
        "n_jops": -1,
    }
):
    LRModel = LogisticRegression(**parameters)
    return LRModel

In [62]:
class CNNClassifier(BaseEstimator, ClassifierMixin):
    def __init__(
        self,
        inputShape,
        numClasses,
        epochs,
        batchSize,
        lossFunction,
        optimizer,
        metrics,
        verbose,
    ):
        self.verbose = verbose
        self.lossFunction = lossFunction
        self.optimizer = optimizer
        self.metrics = metrics
        self.inputShape = inputShape
        self.numClasses = numClasses
        self.epochs = epochs
        self.batchSize = batchSize
        self.classes_ = np.arange(self.numClasses)
        self.model = self.CreateCNNModel()

    def fit(self, X, y):
        X = X.reshape((X.shape[0], self.inputShape[0], self.inputShape[1]))

        self.model.fit(
            X, y, epochs=self.epochs, batch_size=self.batchSize, verbose=self.verbose
        )
        return self

    def predict(self, X):
        X = X.reshape((X.shape[0], self.inputShape[0], self.inputShape[1]))

        predictions = self.model.predict(X)
        return (predictions > 0.5).astype("int32")

    def predict_proba(self, X):
        return self.model.predict(X)

    def CreateCNNModel(self):
        model = Sequential()
        model.add(
            Conv1D(
                filters=64,
                kernel_size=3,
                activation="relu",
                input_shape=self.inputShape,
            )
        )
        model.add(Flatten())
        model.add(Dense(50, activation="relu"))
        model.add(Dense(self.numClasses, activation="sigmoid"))
        model.compile(
            loss=self.lossFunction,
            optimizer=self.optimizer,
            metrics=self.metrics,
        )
        return model

In [63]:
class LSTMClassifier(BaseEstimator, ClassifierMixin):
    def __init__(
        self,
        inputShape,
        numClasses,
        epochs,
        batchSize,
        lossFunction,
        optimizer,
        metrics,
        verbose,
    ):
        self.verbose = verbose
        self.lossFunction = lossFunction
        self.optimizer = optimizer
        self.metrics = metrics
        self.inputShape = inputShape
        self.numClasses = numClasses
        self.epochs = epochs
        self.batchSize = batchSize
        self.classes_ = np.arange(self.numClasses)
        self.model = self.CreateLSTMModel()

    def fit(self, X, y):
        X = X.reshape((X.shape[0], self.inputShape[0], self.inputShape[1]))
        # y = to_categorical(y, self.numClasses)
        self.model.fit(
            X, y, epochs=self.epochs, batch_size=self.batchSize, verbose=self.verbose
        )
        return self

    def predict(self, X):
        X = X.reshape((X.shape[0], self.inputShape[0], self.inputShape[1]))

        predictions = self.model.predict(X)
        return (predictions > 0.5).astype("int32")

    def predict_proba(self, X):
        return self.model.predict(X)

    def CreateLSTMModel(self):
        model = Sequential()
        model.add(LSTM(50, return_sequences=True, input_shape=self.inputShape))
        model.add(LSTM(50))
        model.add(Dense(50, activation="relu"))
        model.add(Dense(self.numClasses, activation="sigmoid"))
        model.compile(
            loss=self.lossFunction,
            optimizer=self.optimizer,
            metrics=self.metrics,
        )
        return model

In [64]:
def CreateEnsembleCombinations(MLModelsNames: list, minimumModels: int):
    EnsembleCombinations = []
    for length in range(minimumModels, len(MLModelsNames) + 1):
        for combo in combinations(MLModelsNames, length):
            EnsembleCombinations.append(list(combo))

    return EnsembleCombinations

In [65]:
lossFunction = "binary_crossentropy"
optimizer = "adam"
metrics = ["accuracy"]
inputShape = (X_train.shape[1], 1)
numClasses = y_train.shape[1]
print("inputShape:", inputShape)
print("numClasses:", numClasses)

inputShape: (25, 1)
numClasses: 3


In [66]:
XGBModel = MultiOutputClassifier(CreateXGBClassifier())
LGBModel = MultiOutputClassifier(CreateLGBClassifier())
CBModel = MultiOutputClassifier(CreateCBClassifier())
CNNModel = CNNClassifier(
    inputShape=inputShape,
    numClasses=numClasses,
    epochs=100,
    batchSize=32,
    lossFunction=lossFunction,
    optimizer=optimizer,
    metrics=metrics,
    verbose=1,
)
LSTMModel = LSTMClassifier(
    inputShape=inputShape,
    numClasses=numClasses,
    epochs=100,
    batchSize=32,
    lossFunction=lossFunction,
    optimizer=optimizer,
    metrics=metrics,
    verbose=1,
)
MLModels = {
    # "XGBModel": XGBModel,
    # "LGBModel": LGBModel,
    # "CBModel": CBModel,
    "CNNModel": CNNModel,
    "LSTMModel": LSTMModel,
}

In [67]:
EnsembleCombinations = CreateEnsembleCombinations(list(MLModels.keys()), 2)
EnsembleCombinations.reverse()
# finalEstimator = MultiOutputClassifier(LogisticRegression(n_jobs=-1))
finalEstimator = XGBModel
for EnsembleCombination in EnsembleCombinations:
    print(EnsembleCombination)

['CNNModel', 'LSTMModel']


In [68]:
for EnsembleCombination in EnsembleCombinations:
    estimators = []
    for modelName in EnsembleCombination:
        estimators.append((modelName, MLModels[modelName]))
    print("EnsembleClassifer combination:", EnsembleCombination)
    EnsembleClassifer = StackingClassifier(
        estimators=estimators, verbose=1, final_estimator=finalEstimator
    )
    EnsembleClassifer.fit(X_train, y_train)

    y_pred = EnsembleClassifer.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)
    print(f"Ensemble model accuracy: {accuracy}")
    c = 0
    for ypred, yacc in zip(
        MlBinarizer.inverse_transform(y_pred), MlBinarizer.inverse_transform(y_test)
    ):
        # print( ypred, yacc)
        if any(label in yacc for label in ypred):
            c += 1
    customAcc = c / len(y_test)
    print(customAcc)

    pklFileName = "-".join(EnsembleCombination)
    with open(pklFileName + ".pkl", "wb") as pklFile:
        pickle.dump(EnsembleClassifer, pklFile)
    break

EnsembleClassifer combination: ['CNNModel', 'LSTMModel']
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epo



INFO:tensorflow:Assets written to: ram://ddb1186d-2dc1-4564-884f-2acc03c1c3d1/assets


INFO:tensorflow:Assets written to: ram://ddb1186d-2dc1-4564-884f-2acc03c1c3d1/assets


INFO:tensorflow:Assets written to: ram://57bd3dd4-1405-483f-9fe9-fec030abb5aa/assets


INFO:tensorflow:Assets written to: ram://57bd3dd4-1405-483f-9fe9-fec030abb5aa/assets


INFO:tensorflow:Assets written to: ram://8dca2ebb-953b-4608-b143-b8a848907e05/assets


INFO:tensorflow:Assets written to: ram://8dca2ebb-953b-4608-b143-b8a848907e05/assets


INFO:tensorflow:Assets written to: ram://df52fd1c-f43d-4028-b9f8-d64b6e366544/assets


INFO:tensorflow:Assets written to: ram://df52fd1c-f43d-4028-b9f8-d64b6e366544/assets


In [69]:
# Best Params:
# XGBModel params: {'device': 'cuda', 'tree_method': 'hist', 'verbosity': 0, 'max_depth': 3, 'learning_rate': 0.1, 'gamma': 0}
# LGBModel params: {'device': 'gpu', 'verbosity': 0, 'num_leaves': 31, 'max_depth': -1, 'learning_rate': 0.01, 'n_estimators': 100, 'reg_lambda': 0, 'reg_alpha': 0}
# CBModel params: {'task_type': 'GPU', 'devices': '0:1', 'verbose': 0, 'depth': 5, 'learning_rate': 0.1, 'iterations': 1000}
# LRModel params: {'max_iter': 100, 'n_jobs': -1}

In [70]:
# hyperParametersRanges = {
#     "XGB:device": ["cuda"],
#     # "XGB:objective": ["multi:softmax"],
#     "XGB:tree_method": ["hist"],
#     "XGB:verbosity": [0],
#     "XGB:max_depth": [3, 4, 5],
#     "XGB:learning_rate": [0.1, 0.2, 0.3, 0.4, 0.5],
#     "XGB:gamma": [0, 0.1],
#     "LGB:device": ["gpu"],
#     "LGB:verbosity": [0],
#     "LGB:num_leaves": [31, 50, 100, 150],
#     "LGB:max_depth": [-1, 5, 15, 20],
#     "LGB:learning_rate": [0.01, 0.05, 0.1, 0.2],
#     "LGB:n_estimators": [100, 500, 1000],
#     "LGB:reg_lambda": [0, 0.01, 0.1],
#     "LGB:reg_alpha": [0, 0.01, 0.1],
#     "CB:task_type": ["GPU"],
#     "CB:devices": ["0:1"],
#     "CB:verbose": [0],
#     "CB:depth": [5, 10],
#     "CB:learning_rate": [0.01, 0.1],
#     "CB:iterations": [100, 500, 1000],
#     # "LR:penalty": ["l1", "l2", "elasticnet", "none"],
#     # "LR:C": [0.001, 0.01, 0.1, 1],
#     # "LR:solver": ["newton-cg", "lbfgs", "liblinear", "sag", "saga"],
#     "LR:max_iter": [100, 500],
#     # "LR:l1_ratio": [0, 0.5, 1],
#     "LR:n_jobs": [-1],
# }

In [71]:
# paramGrids = [
#     dict(zip(hyperParametersRanges.keys(), values))
#     for values in product(*hyperParametersRanges.values())
# ]

In [72]:
# XGBParams = {key.split(':')[1]: value for key, value in paramGrid.items() if key.startswith('XGB:')}
# LGBParams = {key.split(':')[1]: value for key, value in paramGrid.items() if key.startswith('LGB:')}
# CBParams = {key.split(':')[1]: value for key, value in paramGrid.items() if key.startswith('CB:')}

In [73]:
# for paramGrid in paramGrids:
#     XGBParams = {
#         key.split(":")[1]: value
#         for key, value in paramGrid.items()
#         if key.startswith("XGB:")
#     }
#     LGBParams = {
#         key.split(":")[1]: value
#         for key, value in paramGrid.items()
#         if key.startswith("LGB:")
#     }
#     CBParams = {
#         key.split(":")[1]: value
#         for key, value in paramGrid.items()
#         if key.startswith("CB:")
#     }
#     LRParams = {
#         key.split(":")[1]: value
#         for key, value in paramGrid.items()
#         if key.startswith("LR:")
#     }
#     XGBModel = MultiOutputClassifier(CreateXGBClassifier(XGBParams))
#     LGBModel = MultiOutputClassifier(CreateLGBClassifier(LGBParams))
#     CBModel = MultiOutputClassifier(CreateCBClassifier(CBParams))
#     LRModel = MultiOutputClassifier(CreateLRClassifier(LRParams))
#     MLModels = {
#         "XGBModel": XGBModel,
#         "LGBModel": LGBModel,
#         "CBModel": CBModel,
#     }
#     # EnsembleCombinations = CreateEnsembleCombinations(list(MLModels.keys()), 2)
#     # for EnsembleCombination in EnsembleCombinations:
#     # print(EnsembleCombination)
#     # for EnsembleCombination in EnsembleCombinations:
#     print("-"*30)
#     print("XGBModel params:", XGBParams)
#     print("LGBModel params:", LGBParams)
#     print("CBModel params:", CBParams)
#     print("LRModel params:", LRParams)
#     estimators = []

#     for key in MLModels.keys():
#         estimators.append((key, MLModels[key]))

#     EnsembleClassifer = StackingClassifier(
#         estimators=estimators, verbose=1, final_estimator=LRModel
#     )
#     EnsembleClassifer.fit(X_train, y_train)

#     y_pred = EnsembleClassifer.predict(X_test)

#     accuracy = accuracy_score(y_test, y_pred)
#     print(f"Ensemble model accuracy: {accuracy}")
#     c = 0
#     for ypred, yacc in zip(
#         MlBinarizer.inverse_transform(y_pred), MlBinarizer.inverse_transform(y_test)
#     ):
#         if any(label in yacc for label in ypred):
#             c += 1
#     print(f"Ensemble model custom accuracy: {c / len(y_test)}")

In [74]:
import requests

emailConfig = {
    "sendersEmailId": "99kalitkar@gmail.com",
    "sendersMessage": "BigData Project executed",
    "sendersSubject": f"Accuracy: {accuracy}, {customAcc}",
}
response = requests.post(
    url="https://www.restapi.99kalitkar.in/email",
    json=emailConfig,
    headers={"Content-Type": "application/json"},
)
print(response.json())
if response.json().get("success", False):
    print("Thanks for your Email. I will respond as soon as possible!")

{'success': True}
Thanks for your Email. I will respond as soon as possible!
