In [None]:
def run_exp(sf_pass, algos, dataset, target):    
    import os, importlib
    from snowflake.snowpark import Session
    from snowflake.ml.modeling.pipeline import Pipeline
    from snowflake.ml.modeling.preprocessing import MinMaxScaler, OrdinalEncoder
    from snowflake.ml.modeling.metrics import mean_squared_error, mean_absolute_error, r2_score
    from snowflake.snowpark.types import StructType, StructField, IntegerType, StringType
    from snowflake.ml.modeling.compose import ColumnTransformer
    from snowflake.snowpark import Session, FileOperation

    connection_parameters = {
        "account": "ug94937.us-east4.gcp",
        "user": "ADITYASINGH",
        "password": sf_pass,
        "role": "ADITYASINGH",  # optional
        "warehouse": "FOSFOR_INSIGHT_WH",  # optional
        "database": "FIRST_DB",  # optional
        "schema": "PUBLIC",  # optional
    } 
    
    session = Session.builder.configs(connection_parameters).create()
    session.sql_simplifier_enabled = True
    
    # Read dataset
    df_train, df_test = session.table(dataset).drop('ROW').random_split(weights=[0.9, 0.1], seed=0)
    print(df_train.show())
    features = df_train.columns
    features.remove(target)
    
    # generating feature names
    data_schema = session.sql(f"DESCRIBE TABLE {dataset}").collect()
    categorical_types = ['VARCHAR','CHAR','STRING','TEXT','BOOL']
    categorical_features = []
    for row in data_schema:
        for typ in categorical_types:
            if typ in row['type'] and row['name']!=target:
                categorical_features.append(row['name'])
                break
    numerical_features = list(set(features) - set(categorical_features))
    categorical_features_oe = list(map(lambda a: a+'_OE', categorical_features))
    print("numerical_features: ", numerical_features)
    print("categorical_features_oe: ", categorical_features_oe)
    
#     #Numerical pipeline
#     numeric_transform = Pipeline(steps=[
#         ("scaler", MinMaxScaler(output_cols=numerical_features))
#     ]
#     )
    
#     #Categorical pipeline
#     categoric_transform = Pipeline(steps=[
#         ("ord", OrdinalEncoder(output_cols=categorical_features_oe))
#     ]
#     )
    
#     #preprocessor
#     preprocessor = ColumnTransformer(
#         output_cols=categorical_features_oe+numerical_features+[target],
#         transformers=[
#             ('num', numeric_transform, numerical_features),
#             ('cat', categoric_transform, categorical_features)
#         ],
#         remainder='passthrough'
#     )
    
    
    #pipeline steps 
    categorical_pp = {
        'ord': OrdinalEncoder(input_cols=categorical_features, output_cols=categorical_features_oe) 
    }
    numerical_pp = {
        'scaler': MinMaxScaler(input_cols=numerical_features, output_cols=numerical_features)
    }
    steps = [(key, categorical_pp[key]) for key in categorical_pp if categorical_features!=[]] + \
    [(key, numerical_pp[key]) for key in numerical_pp if numerical_features!=[]]

    
    # Define a pipeline that does the preprocessing and training of 
    # dynamically generate list of selected algorithms for imports
    df_all_pred = None
    for algorithm in algos:
        algorithm = algorithm.rsplit('.', 1)
        module = importlib.import_module(algorithm[0])
        print(algorithm[1])
        attr = getattr(module, algorithm[1])
        
        pipe = Pipeline(steps=steps+[("algorithm", attr(input_cols=categorical_features_oe+numerical_features
                                              , label_cols=[target]
                                              , output_cols=[f'PREDICTIONS_{algorithm[1]}'.upper()]))]
               )

        # Fit the pipeline
        xgb_model = pipe.fit(df_train)
         
        # Test the model
        df_test_pred = xgb_model.predict(df_test)
        
        #combining predictions
        if df_all_pred is None:
            df_all_pred = df_test_pred.select(df_test_pred[f'PREDICTIONS_{algorithm[1]}'.upper()])
        else:
            df_all_pred = df_all_pred.join(df_test_pred.select(df_test_pred[f'PREDICTIONS_{algorithm[1]}'.upper()]))
            
        # metrices
        mse = mean_squared_error(df=df_test_pred, y_true_col_names=target, y_pred_col_names=f'PREDICTIONS_{algorithm[1]}'.upper())
        mae = mean_absolute_error(df=df_test_pred, y_true_col_names=target, y_pred_col_names=f'PREDICTIONS_{algorithm[1]}'.upper())
        r2 = r2_score(df=df_test_pred, y_true_col_name=target, y_pred_col_name=f'PREDICTIONS_{algorithm[1]}'.upper())
        print(f'{algorithm[1]} MSE: {mse}')
        print(f'{algorithm[1]} MAE: {mae}')
        print(f'{algorithm[1]} R2: {r2}')
        
    return df_all_pred

In [None]:
import logging, sys
from snowflake.snowpark.session import Session
from snowflake.ml.registry.registry import Registry

CONNECTION_PARAMETERS = {
    "account": "ug94937.us-east4.gcp",
    "user":"",
    "password": "",
    "role": "VAIBHAV",
    "database": "FDC_DEV_VAIBHAV",
    "warehouse": "FOSFOR_INSIGHT_WH",
    "schema": "PUBLIC",
}



def create_stage(session, stage_name="demo"):
    try:
        session.sql(f"create or replace stage {stage_name}").collect()
        return f"@{stage_name}"
    except Exception as ex:
        print("Error while creating snowflake session", ex)
        raise ex

def get_session():
    """
    Method creates snowflake session object.
    :return:
    """
    try:
        return Session.builder.configs(CONNECTION_PARAMETERS).create()
    except Exception as ex:
        print("Error while creating snowflake session", ex)
        raise ex


# Stored Procedure
def train_ml_models(session: Session) -> list:
    from snowflake.ml.modeling.pipeline import Pipeline
    from snowflake.ml.modeling.preprocessing import MinMaxScaler, OrdinalEncoder
    from snowflake.ml.modeling.metrics import mean_squared_error, mean_absolute_error, r2_score
    from snowflake.ml.modeling.xgboost import XGBRegressor
    # from snowflake.snowpark import Session, FileOperation
    # 2] Model Recipe Execution
    # Random split
    df_train, df_test = session.table("diamonds").drop('ROW').random_split(weights=[0.9, 0.1], seed=0)
    cat_cols = ["CUT", "COLOR", "CLARITY"]
    cat_cols_oe = ["CUT_OE", "COLOR_OE", "CLARITY_OE"]
    num_cols = ["CARAT", "DEPTH", "TABLE_PCT", "X", "Y", "Z"]
    # Define a pipeline that does the preprocessing and training of
    # a XGBRegressor model
    pipe = Pipeline(steps=[("ord", OrdinalEncoder(input_cols=cat_cols, output_cols=cat_cols_oe)),
                           ("scaler", MinMaxScaler(input_cols=num_cols, output_cols=num_cols)),
                           ("regressor", XGBRegressor(input_cols=cat_cols_oe + num_cols, label_cols=["PRICE"],
                                                      output_cols=['PREDICTION'], n_jobs=-1))
                           ])
    # Fit the pipeline
    xgb_model = pipe.fit(df_train)
    # Test the model
    df_test_pred = xgb_model.predict(df_test)
    mse = mean_squared_error(df=df_test_pred, y_true_col_names="PRICE", y_pred_col_names="PREDICTION")
    mae = mean_absolute_error(df=df_test_pred, y_true_col_names="PRICE", y_pred_col_names="PREDICTION")
    r2 = r2_score(df=df_test_pred, y_true_col_name="PRICE", y_pred_col_name="PREDICTION")
    print("Execution Completed")
    print(f'MSE: {mse}')
    print(f'MAE: {mae}')
    print(f'R2: {r2}')

    # LOG MODEL INTO SNOWFLAKE REGISTRY
    from snowflake.ml.registry.registry import Registry
    reg = Registry(session=session)
    # Log the model
    model_name = "diamonds_model_v30"
    try:
        mv = reg.log_model(model=xgb_model,
                           model_name=model_name,
                           comment="test",
                           version_name="run1",
                           python_version="3.9.19",
                           conda_dependencies=["scikit-learn==1.3.2"],
                           metrics={"model_metrics": {"score": 96}, "project_id": "0001", "type": "EXP"})
    except Exception as ex:
        pass
    return [{"EXP_NAME":""+model_name,
             "Version":"Run1",
             "matrices":{"model_metrics": {"MSE": mse, "MAE": mae, "r2": r2}, "project_id": "0001", "type": "EXP"},
             "Alogirthm_Type":"Regression",
             "Alogithm": "XGBRegressor",
             "RUN_STATUS":"SUCESS",
             "registry_exp_name":""}]


# Initilization
logging.basicConfig(stream=sys.stdout, level=logging.INFO)
print("Creating Snowflake Session object...")
session = get_session()
stage = create_stage(session)
print("Session has been created !")

print("Creating stored procedure...")
session.sproc.register(func=train_ml_models,
                       name="train_ml_models",
                       packages=["snowflake-snowpark-python", "snowflake-ml-python"],
                       isPermanant=False,
                       stage_location=stage,
                       replace=True)
print("Stored procedure has been created successfully!")

print("Executing Stored Procedure")
procedure_response = session.call("train_ml_models")
print("Stored Procedure Executed Successfully !")
print(procedure_response)

#Log in mlflow
print("Logging in mlflow completed !")

In [None]:
test = run_exp(os.environ.get('SF_Password'),
               ['snowflake.ml.modeling.linear_model.SGDRegressor',
                'snowflake.ml.modeling.svm.LinearSVR',
               ],
               'ALCOHOL_QUALITY', 
               'QUALITY')