Connect to Snowflake

In [1]:
# see https://github.com/Snowflake-Labs/sfguide-snowpark-scikit-learn/blob/main/3_snowpark_end_to_end_ml.ipynb

from snowflake.snowpark import Session
from snowflake.ml.utils.connection_params import SnowflakeLoginOptions
session = Session.builder.configs(SnowflakeLoginOptions("test_conn")).create()

SnowflakeLoginOptions() is in private preview since 0.2.0. Do not use it in production. 


Read model from stage

In [2]:
session.clear_imports()
session.clear_packages()

import cachetools
@cachetools.cached(cache={})
def load_file(filename):

    import sys, os, joblib
    import_dir = sys._xoptions.get("snowflake_import_directory")
    if import_dir:
        with open(os.path.join(import_dir, filename), 'rb') as file:
            return joblib.load(file)

Create vectorized UDF to predict house values

In [3]:
from snowflake.snowpark import functions as F
from snowflake.snowpark import types as T

@F.udf(name="predict_house_value", is_permanent=True, replace=True,
       stage_location='int_stage/udf/',
       imports=['@int_stage/models/2024-04-24-131730/housing_fores_reg.joblib'],
       packages=['scikit-learn', 'pandas', 'joblib', 'cachetools'])
def predict_house_value(df: T.PandasDataFrame[float, float,
       float, float, float, float, float, float, str]) -> T.PandasSeries[float]:

       df.columns = ['LONGITUDE', 'LATITUDE', 'HOUSING_MEDIAN_AGE', 'TOTAL_ROOMS',
              'TOTAL_BEDROOMS', 'POPULATION', 'HOUSEHOLDS', 'MEDIAN_INCOME', 'OCEAN_PROXIMITY']
       m = load_file('housing_fores_reg.joblib')
       return m.predict(df)



Call the vectorized UDF for all test entries

In [4]:
df = session.table("HOUSING_TEST")
inputs = df.drop("MEDIAN_HOUSE_VALUE")
                    
df = df.select(*inputs,
    F.call_function("predict_house_value", *inputs).alias('PREDICTION'), 
    (F.col('MEDIAN_HOUSE_VALUE')).alias('ACTUAL_LABEL'))
df.show()

------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|"LONGITUDE"  |"LATITUDE"  |"HOUSING_MEDIAN_AGE"  |"TOTAL_ROOMS"  |"TOTAL_BEDROOMS"  |"POPULATION"  |"HOUSEHOLDS"  |"MEDIAN_INCOME"  |"OCEAN_PROXIMITY"  |"PREDICTION"  |"ACTUAL_LABEL"  |
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|-122.26      |37.85       |50.0                  |1120.0         |283.0             |697.0         |264.0         |2.1250           |NEAR BAY           |135441.0      |140000.0        |
|-122.27      |37.84       |48.0                  |1922.0         |409.0             |1026.0        |335.0         |1.7969           |NEAR BAY           |118197.0      |110400.0        |
|-122.26      |37.83       |52.0                  |1470.0        