In [34]:
def run_exp(sf_pass, algo, dataset, target):    
    algo = algo.split('.')
    import os
    from snowflake.snowpark import Session
    from snowflake.ml.modeling.pipeline import Pipeline
    from snowflake.ml.modeling.preprocessing import MinMaxScaler, OrdinalEncoder
    from snowflake.ml.modeling.metrics import mean_squared_error, mean_absolute_error, r2_score
    from snowflake.ml.modeling.xgboost import XGBClassifier
    from snowflake.snowpark import Session, FileOperation

    connection_parameters = {
        "account": "ug94937.us-east4.gcp",
        "user": "ADITYASINGH",
        "password": sf_pass,
        "role": "ADITYASINGH",  # optional
        "warehouse": "FOSFOR_INSIGHT_WH",  # optional
        "database": "FIRST_DB",  # optional
        "schema": "PUBLIC",  # optional
    } 
    
    session = Session.builder.configs(connection_parameters).create()
    session.sql_simplifier_enabled = True
    
    # Read dataset
    df_train, df_test = session.table(dataset).drop('ROW').random_split(weights=[0.9, 0.1], seed=0)
    features = df_train.columns
    features.remove(target)
    
    # generating feature names
    data_schema = session.sql(f"DESCRIBE TABLE {dataset}").collect()
    categorical_types = ['VARCHAR','CHAR','STRING','TEXT','BOOL']
    categorical_features = []
    for row in data_schema:
        for typ in categorical_types:
            if typ in row['type'] and row['name']!=target:
                categorical_features.append(row['name'])
                break
    numerical_features = list(set(features) - set(categorical_features))
    categorical_features_oe = list(map(lambda a: a+'_OE', categorical_features))
    
    # Define a pipeline that does the preprocessing and training of 
    # a XGBRegressor model
    pipe = Pipeline(steps=[
              ("ord", OrdinalEncoder(input_cols=categorical_features, output_cols=categorical_features_oe)),
              ("scaler", MinMaxScaler(input_cols=numerical_features, output_cols=numerical_features)),
              ("algorithm", XGBClassifier(input_cols=categorical_features_oe+numerical_features
                                          , label_cols=[target]
                                          , output_cols=['PREDICTION'], n_jobs=-1))
             ]
           )
     
    # Fit the pipeline
    xgb_model = pipe.fit(df_train)
     
    # Test the model
    df_test_pred = xgb_model.predict(df_test)
    
    # metrices
    mse = mean_squared_error(df=df_test_pred, y_true_col_names=target, y_pred_col_names="PREDICTION")
    mae = mean_absolute_error(df=df_test_pred, y_true_col_names=target, y_pred_col_names="PREDICTION")
    r2 = r2_score(df=df_test_pred, y_true_col_name=target, y_pred_col_name="PREDICTION")
    print(f'MSE: {mse}')
    print(f'MAE: {mae}')
    print(f'R2: {r2}')
    return df_test_pred

In [None]:
test = run_exp(os.environ.get('SF_Password'),'snowflake.ml.modeling.ensemble.AdaBoostRegressor', 'EMPLOYEE', 'LEAVEORNOT')

In [35]:
algo = 'snowflake.ml.modeling.ensemble.AdaBoostRegressor'

In [36]:
algo = algo.split('.')

In [37]:
algo

['snowflake', 'ml', 'modeling', 'ensemble', 'AdaBoostRegressor']