In [37]:
def run_exp(sf_pass, algos, dataset, target):    
    import os, importlib
    from snowflake.snowpark import Session
    from snowflake.ml.modeling.pipeline import Pipeline
    from snowflake.ml.modeling.preprocessing import MinMaxScaler, OrdinalEncoder
    from snowflake.ml.modeling.metrics import mean_squared_error, mean_absolute_error, r2_score
    from snowflake.snowpark.types import StructType, StructField, IntegerType, StringType
#     from snowflake.ml.modeling.xgboost import XGBClassifier
    from snowflake.ml.modeling.compose import ColumnTransformer
    from snowflake.snowpark import Session, FileOperation

    connection_parameters = {
        "account": "ug94937.us-east4.gcp",
        "user": "ADITYASINGH",
        "password": sf_pass,
        "role": "ADITYASINGH",  # optional
        "warehouse": "FOSFOR_INSIGHT_WH",  # optional
        "database": "FIRST_DB",  # optional
        "schema": "PUBLIC",  # optional
    } 
    
    session = Session.builder.configs(connection_parameters).create()
    session.sql_simplifier_enabled = True
    
    # Read dataset
    df_train, df_test = session.table(dataset).drop('ROW').random_split(weights=[0.9, 0.1], seed=0)
    print(df_train.show())
    features = df_train.columns
    features.remove(target)
    
    # generating feature names
    data_schema = session.sql(f"DESCRIBE TABLE {dataset}").collect()
    categorical_types = ['VARCHAR','CHAR','STRING','TEXT','BOOL']
    categorical_features = []
    for row in data_schema:
        for typ in categorical_types:
            if typ in row['type'] and row['name']!=target:
                categorical_features.append(row['name'])
                break
    numerical_features = list(set(features) - set(categorical_features))
    categorical_features_oe = list(map(lambda a: a+'_OE', categorical_features))
    print("numerical_features: ", numerical_features)
    print("categorical_features_oe: ", categorical_features_oe)
    
#     #Numerical pipeline
#     numeric_transform = Pipeline(steps=[
#         ("scaler", MinMaxScaler(output_cols=numerical_features))
#     ]
#     )
    
#     #Categorical pipeline
#     categoric_transform = Pipeline(steps=[
#         ("ord", OrdinalEncoder(output_cols=categorical_features_oe))
#     ]
#     )
    
#     #preprocessor
#     preprocessor = ColumnTransformer(
#         output_cols=categorical_features_oe+numerical_features+[target],
#         transformers=[
#             ('num', numeric_transform, numerical_features),
#             ('cat', categoric_transform, categorical_features)
#         ],
#         remainder='passthrough'
#     )
    # Define a pipeline that does the preprocessing and training of 
    # dynamically generate list of selected algorithms for imports
    df_all_pred = None
    for algorithm in algos:
        algorithm = algorithm.rsplit('.', 1)
        module = importlib.import_module(algorithm[0])
        print(algorithm[1])
        attr = getattr(module, algorithm[1])
        pipe = Pipeline(steps=[
                  ("ord", OrdinalEncoder(input_cols=categorical_features, output_cols=categorical_features_oe)),
                  ("scaler", MinMaxScaler(input_cols=numerical_features, output_cols=numerical_features)),
                  ("algorithm", attr(input_cols=categorical_features_oe+numerical_features
                                              , label_cols=[target]
                                              , output_cols=[f'PREDICTIONS_{algorithm[1]}'.upper()]))
                 ]
               )
#         pipe = Pipeline(steps=[
#             ('preprocessor', preprocessor),
#             ('algorithm', attr(input_cols=categorical_features+numerical_features
#                                               , label_cols=[target]
#                                               , output_cols=[f'PREDICTIONS_{algorithm[1]}'.upper()]))
#         ])
        # Fit the pipeline
        xgb_model = pipe.fit(df_train)
         
        # Test the model
        df_test_pred = xgb_model.predict(df_test)
        
        #combining predictions
        if df_all_pred is None:
            df_all_pred = df_test_pred.select(df_test_pred[f'PREDICTIONS_{algorithm[1]}'.upper()])
        else:
            df_all_pred = df_all_pred.join(df_test_pred.select(df_test_pred[f'PREDICTIONS_{algorithm[1]}'.upper()]))
            
        # metrices
        mse = mean_squared_error(df=df_test_pred, y_true_col_names=target, y_pred_col_names=f'PREDICTIONS_{algorithm[1]}'.upper())
        mae = mean_absolute_error(df=df_test_pred, y_true_col_names=target, y_pred_col_names=f'PREDICTIONS_{algorithm[1]}'.upper())
        r2 = r2_score(df=df_test_pred, y_true_col_name=target, y_pred_col_name=f'PREDICTIONS_{algorithm[1]}'.upper())
        print(f'{algorithm[1]} MSE: {mse}')
        print(f'{algorithm[1]} MAE: {mae}')
        print(f'{algorithm[1]} R2: {r2}')
        
    return df_all_pred

In [38]:
test = run_exp(os.environ.get('SF_Password'),
               ['snowflake.ml.modeling.naive_bayes.GaussianNB',
                'snowflake.ml.modeling.neighbors.KNeighborsClassifier',
               ],
               'EMPLOYEE', 
               'LEAVEORNOT')

-------------------------------------------------------------------------------------------------------------------------------------------
|"EDUCATION"  |"JOININGYEAR"  |"CITY"     |"PAYMENTTIER"  |"AGE"  |"GENDER"  |"EVERBENCHED"  |"EXPERIENCEINCURRENTDOMAIN"  |"LEAVEORNOT"  |
-------------------------------------------------------------------------------------------------------------------------------------------
|Bachelors    |2017           |Bangalore  |3              |34     |Male      |No             |0                            |0             |
|Bachelors    |2013           |Pune       |1              |28     |Female    |No             |3                            |1             |
|Bachelors    |2014           |New Delhi  |3              |38     |Female    |No             |2                            |0             |
|Masters      |2016           |Bangalore  |3              |27     |Male      |No             |5                            |1             |
|Masters      |2017 

The version of package 'snowflake-snowpark-python' in the local environment is 1.17.0, which does not fit the criteria for the requirement 'snowflake-snowpark-python'. Your UDF might not work when the package version is different between the server and your local environment.
The version of package 'scikit-learn' in the local environment is 1.3.2, which does not fit the criteria for the requirement 'scikit-learn==1.3.0'. Your UDF might not work when the package version is different between the server and your local environment.
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
The ve

SnowparkSQLException: (1300) (1304): 01b4a1bd-0000-6581-0000-576d06b9ad12: 100357 (P0000): Python Interpreter Error:
Traceback (most recent call last):
  File "_udf_code.py", line 77, in compute
  File "/packages/Python-3.9-Snowpark/5fc8e328-978d-48c7-b3ec-89dee2264cf3/3.9/snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_trainer.py", line 227, in fit_wrapper_function
  File "/usr/lib/python_udf/e96eabd0c0f78aa7cc879e7b17c87cdcdc0cd1335954fe6881f43cc7d6c3139d/lib/python3.9/site-packages/sklearn/base.py", line 1151, in wrapper
    return fit_method(estimator, *args, **kwargs)
  File "/usr/lib/python_udf/e96eabd0c0f78aa7cc879e7b17c87cdcdc0cd1335954fe6881f43cc7d6c3139d/lib/python3.9/site-packages/sklearn/naive_bayes.py", line 263, in fit
    return self._partial_fit(
  File "/usr/lib/python_udf/e96eabd0c0f78aa7cc879e7b17c87cdcdc0cd1335954fe6881f43cc7d6c3139d/lib/python3.9/site-packages/sklearn/naive_bayes.py", line 423, in _partial_fit
    X, y = self._validate_data(X, y, reset=first_call)
  File "/usr/lib/python_udf/e96eabd0c0f78aa7cc879e7b17c87cdcdc0cd1335954fe6881f43cc7d6c3139d/lib/python3.9/site-packages/sklearn/base.py", line 621, in _validate_data
    X, y = check_X_y(X, y, **check_params)
  File "/usr/lib/python_udf/e96eabd0c0f78aa7cc879e7b17c87cdcdc0cd1335954fe6881f43cc7d6c3139d/lib/python3.9/site-packages/sklearn/utils/validation.py", line 1147, in check_X_y
    X = check_array(
  File "/usr/lib/python_udf/e96eabd0c0f78aa7cc879e7b17c87cdcdc0cd1335954fe6881f43cc7d6c3139d/lib/python3.9/site-packages/sklearn/utils/validation.py", line 917, in check_array
    array = _asarray_with_order(array, order=order, dtype=dtype, xp=xp)
  File "/usr/lib/python_udf/e96eabd0c0f78aa7cc879e7b17c87cdcdc0cd1335954fe6881f43cc7d6c3139d/lib/python3.9/site-packages/sklearn/utils/_array_api.py", line 380, in _asarray_with_order
    array = numpy.asarray(array, order=order, dtype=dtype)
  File "/usr/lib/python_udf/e96eabd0c0f78aa7cc879e7b17c87cdcdc0cd1335954fe6881f43cc7d6c3139d/lib/python3.9/site-packages/pandas/core/generic.py", line 2150, in __array__
    arr = np.asarray(values, dtype=dtype)
ValueError: could not convert string to float: 'Bachelors'
 in function SNOWPARK_TEMP_PROCEDURE_EZSQPBFXR4 with handler compute

In [16]:
test.show()

-----------------------------------------------------------------
|"PREDICTIONS_GAUSSIANNB"  |"PREDICTIONS_KNEIGHBORSCLASSIFIER"  |
-----------------------------------------------------------------
|1                         |0                                   |
|1                         |0                                   |
|1                         |0                                   |
|1                         |1                                   |
|1                         |1                                   |
|1                         |0                                   |
|1                         |0                                   |
|1                         |0                                   |
|1                         |0                                   |
|1                         |0                                   |
-----------------------------------------------------------------



In [39]:
test = run_exp(os.environ.get('SF_Password'),
               ['snowflake.ml.modeling.linear_model.SGDRegressor',
                'snowflake.ml.modeling.svm.LinearSVR',
               ],
               'ALCOHOL_QUALITY', 
               'QUALITY')

---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|"FIXED_ACIDITY"  |"VOLATILE_ACIDITY"  |"CITRIC_ACID"  |"RESIDUAL_SUGAR"  |"CHLORIDES"  |"FREE_SULFUR_DIOXIDE"  |"TOTAL_SULFUR_DIOXIDE"  |"DENSITY"  |"PH"  |"SULPHATES"  |"ALCOHOL"  |"QUALITY"  |
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|7.4              |0.7                 |0.0            |1.9               |0.076        |11.0                   |34.0                    |0.9978     |3.51  |0.56         |9.4        |5          |
|7.8              |0.88                |0.0            |2.6               |0.098        |25.0                   |67.0                    |0.9968     |3.2   |0.68         |9.8        |5          |
|7.8              |0

The version of package 'snowflake-snowpark-python' in the local environment is 1.17.0, which does not fit the criteria for the requirement 'snowflake-snowpark-python'. Your UDF might not work when the package version is different between the server and your local environment.
The version of package 'scikit-learn' in the local environment is 1.3.2, which does not fit the criteria for the requirement 'scikit-learn==1.3.0'. Your UDF might not work when the package version is different between the server and your local environment.
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
The ve

SnowparkSQLException: (1300) (1304): 01b4a1be-0000-6584-0000-576d06b9c922: 100357 (P0000): Python Interpreter Error:
Traceback (most recent call last):
  File "/home/udf/1466799529/udf_py_558262474.zip/udf_py_558262474.py", line 78, in compute
    return lock_function_once(func, invoked)(df)
  File "/home/udf/1466799529/udf_py_558262474.zip/udf_py_558262474.py", line 67, in wrapper
    result = f(*args, **kwargs)
  File "/packages/Python-3.9-Snowpark/5fc8e328-978d-48c7-b3ec-89dee2264cf3/3.9/snowflake/ml/modeling/_internal/snowpark_implementations/snowpark_handlers.py", line 137, in vec_batch_infer
  File "/usr/lib/python_udf/975df9fb743811690102dc44dd07a900dddf0cb37dca10c4d62dca2408b92f25/lib/python3.9/site-packages/sklearn/linear_model/_stochastic_gradient.py", line 1644, in predict
    return self._decision_function(X)
  File "/usr/lib/python_udf/975df9fb743811690102dc44dd07a900dddf0cb37dca10c4d62dca2408b92f25/lib/python3.9/site-packages/sklearn/linear_model/_stochastic_gradient.py", line 1628, in _decision_function
    scores = safe_sparse_dot(X, self.coef_.T, dense_output=True) + self.intercept_
  File "/usr/lib/python_udf/975df9fb743811690102dc44dd07a900dddf0cb37dca10c4d62dca2408b92f25/lib/python3.9/site-packages/sklearn/utils/extmath.py", line 193, in safe_sparse_dot
    ret = a @ b
TypeError: can't multiply sequence by non-int of type 'float'
 in function SNOWPARK_TEMP_FUNCTION_Z29W4GGKM8 with handler udf_py_558262474.compute

In [12]:
test.show()

-----------------------------------------------------------------
|"PREDICTIONS_GAUSSIANNB"  |"PREDICTIONS_KNEIGHBORSCLASSIFIER"  |
-----------------------------------------------------------------
|1                         |0                                   |
|0                         |0                                   |
|0                         |0                                   |
|1                         |0                                   |
|0                         |0                                   |
|0                         |0                                   |
|1                         |0                                   |
|0                         |0                                   |
|0                         |0                                   |
|0                         |0                                   |
-----------------------------------------------------------------

