Connect to Snowflake

In [None]:
from snowflake.snowpark import Session
from snowflake.ml.utils.connection_params import SnowflakeLoginOptions

session = Session.builder.configs(SnowflakeLoginOptions("test_conn")).create()
print(session.sql('select current_warehouse(), current_database(), current_schema()').collect())

Read model from stage

In [None]:
session.clear_imports()
session.clear_packages()

import cachetools
@cachetools.cached(cache={})
def read_model(model_file):
    import sys, os, joblib
    
    import_dir = sys._xoptions.get("snowflake_import_directory")
    if import_dir:
        with open(os.path.join(import_dir, model_file), 'rb') as file:
            m = joblib.load(file)
            return m

In [None]:
features = ['LONGITUDE', 'LATITUDE', 'HOUSING_MEDIAN_AGE', 'TOTAL_ROOMS',
       'TOTAL_BEDROOMS', 'POPULATION', 'HOUSEHOLDS', 'MEDIAN_INCOME', 'OCEAN_PROXIMITY']

@F.udf(name="predict_house_value", is_permanent=True, stage_location=f'@{stage_name}/udf/', replace=True
              , imports=[return_dict['model_path']]
              , packages=['scikit-learn', 'pandas', 'joblib', 'cachetools'])
def predict_house_value(pd_df: T.PandasDataFrame[float, float, float, float, float, float, float, float
                                                 , str]) -> T.PandasSeries[float]:
       pd_df.columns = features
       m = read_file('housing_fores_reg.joblib') 
       return m.predict(pd_df)

In [None]:
# to call the udf for inferencing we will use call_function

snowdf_test = session.table("HOUSING_DATA_TEST")
inputs = snowdf_test.drop("MEDIAN_HOUSE_VALUE")
                    
snowdf_results = snowdf_test.select(*inputs,
                    F.call_function("predict_house_value",*inputs).alias('PREDICTION'), 
                    (F.col('MEDIAN_HOUSE_VALUE')).alias('ACTUAL_LABEL')
                    )

snowdf_results.show()