Connect to Snowflake for the IMDB datasets

In [1]:
from snowflake.snowpark.session import Session
from snowflake.ml.utils.connection_params import SnowflakeLoginOptions

pars = SnowflakeLoginOptions("test_conn")
pars["database"] = "IMDB"
session = Session.builder.configs(pars).create()
session.query_tag = "sentiment-1"

SnowflakeLoginOptions() is in private preview since 0.2.0. Do not use it in production. 


Deploy all the code as a Snowflake stored proc

In [2]:
from snowflake.snowpark.functions import sproc
from snowflake.snowpark.types import Variant
from train_imdb_module import train_imdb

@sproc(name='train_imdb_sp', is_permanent=True, stage_location='@files', replace=True,
    imports=["@FILES/train_imdb_module.py"],
    packages=['snowflake-snowpark-python',
    'scikit-learn', 'pandas', 'numpy', 'nltk', 'joblib', 'cachetools'])
def train_imdb_sp(session: Session, train_dataset_name: str, tmp_folder: str) -> Variant:
    return train_imdb(session, train_dataset_name, tmp_folder)



Execute Snowflake stored proc and list @MODELS stage files

In [3]:
ret = session.call("train_imdb_sp", "TRAIN_DATASET", "/tmp/")
print(ret)

session.sql("LS @MODELS").show()

----------------------------------------------------------------------------------------------------------------
|"name"                          |"size"    |"md5"                             |"last_modified"                |
----------------------------------------------------------------------------------------------------------------
|models/model_review.joblib.gz   |10820048  |1c63425ac807b5048c1e1f7ddc72da23  |Wed, 24 Apr 2024 15:52:14 GMT  |
|models/model_review1.joblib.gz  |10825280  |d26d8c66788af5757c780759f54fd289  |Wed, 24 Apr 2024 19:58:06 GMT  |
|models/model_review2.joblib.gz  |10831088  |f4f7de7178e43463789bf213a44d96f5  |Wed, 24 Apr 2024 19:28:46 GMT  |
|models/model_review3.joblib.gz  |10822480  |43d3eba9ea59d5c444348793f3d9c4c0  |Wed, 24 Apr 2024 19:38:02 GMT  |
|models/vect_review.joblib.gz    |27852416  |92d019222e89c2db309379b0584dc958  |Wed, 24 Apr 2024 15:51:49 GMT  |
|models/vect_review1.joblib.gz   |27852400  |10210349ddd5cae45d3b11017bdd6fa4  |Wed, 24 Apr 2024