In [46]:
def run_exp(sf_pass, algos, dataset, target):    
    import os, importlib
    from snowflake.snowpark import Session
    from snowflake.ml.modeling.pipeline import Pipeline
    from snowflake.ml.modeling.preprocessing import MinMaxScaler, OrdinalEncoder
    from snowflake.ml.modeling.metrics import mean_squared_error, mean_absolute_error, r2_score
#     from snowflake.ml.modeling.xgboost import XGBClassifier
    from snowflake.snowpark import Session, FileOperation

    connection_parameters = {
        "account": "ug94937.us-east4.gcp",
        "user": "ADITYASINGH",
        "password": sf_pass,
        "role": "ADITYASINGH",  # optional
        "warehouse": "FOSFOR_INSIGHT_WH",  # optional
        "database": "FIRST_DB",  # optional
        "schema": "PUBLIC",  # optional
    } 
    
    session = Session.builder.configs(connection_parameters).create()
    session.sql_simplifier_enabled = True
    
    # Read dataset
    df_train, df_test = session.table(dataset).drop('ROW').random_split(weights=[0.9, 0.1], seed=0)
    features = df_train.columns
    features.remove(target)
    
    # generating feature names
    data_schema = session.sql(f"DESCRIBE TABLE {dataset}").collect()
    categorical_types = ['VARCHAR','CHAR','STRING','TEXT','BOOL']
    categorical_features = []
    for row in data_schema:
        for typ in categorical_types:
            if typ in row['type'] and row['name']!=target:
                categorical_features.append(row['name'])
                break
    numerical_features = list(set(features) - set(categorical_features))
    categorical_features_oe = list(map(lambda a: a+'_OE', categorical_features))
    
    
    # Define a pipeline that does the preprocessing and training of 
    # dynamically generate list of selected algorithms for imports
    for algorithm in algos:
        algorithm = algorithm.rsplit('.', 1)
        module = importlib.import_module(algorithm[0])
        attr = getattr(module, algorithm[1])
        pipe = Pipeline(steps=[
                  ("ord", OrdinalEncoder(input_cols=categorical_features, output_cols=categorical_features_oe)),
                  ("scaler", MinMaxScaler(input_cols=numerical_features, output_cols=numerical_features)),
                  ("algorithm", attr(input_cols=categorical_features_oe+numerical_features
                                              , label_cols=[target]
                                              , output_cols=['PREDICTION']))
                 ]
               )
         
        # Fit the pipeline
        xgb_model = pipe.fit(df_train)
         
        # Test the model
        df_test_pred = xgb_model.predict(df_test)
        
        # metrices
        mse = mean_squared_error(df=df_test_pred, y_true_col_names=target, y_pred_col_names="PREDICTION")
        mae = mean_absolute_error(df=df_test_pred, y_true_col_names=target, y_pred_col_names="PREDICTION")
        r2 = r2_score(df=df_test_pred, y_true_col_name=target, y_pred_col_name="PREDICTION")
        print(f'MSE: {mse}')
        print(f'MAE: {mae}')
        print(f'R2: {r2}')
    return df_test_pred

In [47]:
test = run_exp(os.environ.get('SF_Password'),['snowflake.ml.modeling.ensemble.AdaBoostRegressor'], 'EMPLOYEE', 'LEAVEORNOT')

The version of package 'snowflake-snowpark-python' in the local environment is 1.17.0, which does not fit the criteria for the requirement 'snowflake-snowpark-python'. Your UDF might not work when the package version is different between the server and your local environment.
The version of package 'scikit-learn' in the local environment is 1.3.2, which does not fit the criteria for the requirement 'scikit-learn==1.3.0'. Your UDF might not work when the package version is different between the server and your local environment.
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
The version of package 'scikit-learn' in the local environment is 1.3.2, which does not fit the criteria for the requirement 'scikit-learn==1.3.0'. Your UDF might not work when the package version is different between the server and your local environment.


MSE: 0.1727640136965599
MAE: 0.31845926309526407
R2: 0.26641396693757746


In [39]:
algo = 'snowflake.ml.modeling.ensemble.AdaBoostRegressor'

In [55]:
print(test.show())

---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|"AGE"                |"EXPERIENCEINCURRENTDOMAIN"  |"JOININGYEAR"        |"PAYMENTTIER"  |"EDUCATION_OE"  |"CITY_OE"  |"GENDER_OE"  |"EVERBENCHED_OE"  |"EDUCATION"  |"CITY"     |"GENDER"  |"EVERBENCHED"  |"LEAVEORNOT"  |"PREDICTION"         |
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|0.0                  |0.0                          |0.6666666666666856   |0.5            |0.0             |1.0        |0.0          |0.0               |Bachelors    |New Delhi  |Female    |No             |1             |0.7052023121387283   |
|0.26315789473684204  |0

'AdaBoostRegressor'