In [None]:
import pandas as pd
import numpy as np

In [None]:
def train_fn(X_train, y_train, cv):
    from lightgbm import LGBMClassifier
    from sklearn.linear_model import LogisticRegression
    from sklearn.svm import SVC
    from sklearn.ensemble import RandomForestClassifier
    from sklearn.ensemble import VotingClassifier
    import time
    
    if(cv):
        from sklearn.model_selection import GridSearchCV
        params = {'n_estimators':[100, 200, 500, 1000]}
        lgb = LGBMClassifier(random_state=0, objective='multiclass', boosting_type = 'dart', max_bin = 510,
                            colsample_bytree = 0.7, subsample=0.7)
        clf = GridSearchCV(lgb, params, scoring='accuracy', verbose=2, n_jobs=-1)
        clf.fit(X_train, y_train)

        n_estimators = clf.best_params_['n_estimators']
        
        params = {'learning_rate':[0.1, 0.05, 0.01], 'max_depth':[3, 5, 7]}
        lgb = LGBMClassifier(n_estimators=n_estimators, random_state=0, objective='multiclass', boosting_type = 'dart', max_bin = 510,
                            colsample_bytree = 0.7, subsample=0.7)
        clf = GridSearchCV(lgb, params, scoring='accuracy', verbose=2, n_jobs=-1)
        clf.fit(X_train, y_train)
        best = clf.best_params_
        lgb_params = {'objective':'multiclass', 'boosting_type':'dart', 'max_bin':510,
                            'colsample_bytree':0.7, 'subsample':0.7, 'n_estimators':n_estimators,
                      'learning_rate':best['learning_rate'], 'max_depth':best['max_depth']}
        
        lr = LogisticRegression()
        params = {'penalty':['l2','none'], 'C':[1,4,10], 'solver':['newton-cg', 'lbfgs']}
        lr_cv = GridSearchCV(lr, params, scoring='accuracy', verbose=2, n_jobs=-1, refit = True)
        lr_cv.fit(X_train, y_train)
        lr_params = lr_cv.best_params_
        
        svm = SVC()
        params = {'kernel':['linear', 'rbf', 'poly'],'C': [0.001, 0.01, 0.1, 1, 10], 'gamma':[0.001, 0.01, 0.1, 1]}
        svm_cv = GridSearchCV(svm, params, scoring='accuracy', verbose=2, n_jobs=-1, refit=True)
        svm_cv.fit(X_train, y_train)
        svm_params = svm_cv.best_params_
        svm_params['probability'] = True

        rf = RandomForestClassifier(random_state=0)
        params = {'n_estimators':[100, 200, 500, 1000]}
        rf_cv = GridSearchCV(rf, params, scoring='f1_weighted', n_jobs=-1)
        rf_cv.fit(X_train, y_train)
        n_estimators = rf_cv.best_params_['n_estimators']

        rf = RandomForestClassifier(n_estimators=n_estimators, random_state=0)
        params = {'max_depth': [1, 5, 10, 20, 50],
            'max_features': ['auto', 'sqrt', 'log2'],
            'criterion' :['gini', 'entropy']}
        rf_cv = GridSearchCV(rf, params, scoring='accuracy', n_jobs=-1, refit=True)
        rf_cv.fit(X_train, y_train)
        best = rf_cv.best_params_
        rf_params = {'max_depth':best['max_depth'], 'n_estimators':n_estimators, 'criterion':best['criterion'], 'max_features':best['max_features']}  
    else:
        lgb_params = {'objective':'multiclass', 'boosting_type':'dart', 'max_bin':510,
                            'colsample_bytree':0.7, 'subsample':0.7, 'n_estimators':1000,
                      'learning_rate':0.01, 'max_depth':7}
        lr_params = {'penalty':'none','C':1, 'solver':'lbfgs'}
        svm_params = {'kernel':'linear','C':0.001, 'gamma':0.001, 'probability':True}
        rf_params = {'max_depth':50, 'n_estimators':1000, 'criterion':'gini', 'max_features':'auto'}  
        
    models = [('model1',LGBMClassifier(**lgb_params)),
        ('model2',LogisticRegression(**lr_params)),
        ('model3',SVC(**svm_params)),
        ('model4',RandomForestClassifier(**rf_params))
        ]
    start=time.time()
    vote=VotingClassifier(models, voting='soft', weights=None, flatten_transform=True, n_jobs=-1)
    vote.fit(X_train, y_train)
    end=time.time()
    time=end-start
    
    return time, vote