Connect to Snowflake with Snowpark session (and SNOWSQL config)

In [1]:
from snowflake.snowpark.functions import sproc, udf, udtf, call_udf, col, lit
from snowflake.snowpark.types import IntegerType, StructType, StructField
from snowflake.snowpark import Session
from snowflake.ml.utils.connection_params import SnowflakeLoginOptions

session = Session.builder.configs(SnowflakeLoginOptions("test_conn")).create()

SnowflakeLoginOptions() is in private preview since 0.2.0. Do not use it in production. 


Anonymous Stored Procedure (with lambda)

In [2]:
add_one = sproc(
  lambda session, x: session.sql(f"select {x} + 1").collect()[0][0],
  input_types=[IntegerType()], return_type=IntegerType(),
  packages=["snowflake-snowpark-python==1.13.0"])

ret = add_one(1)
print(f"add_one: {ret}")



add_one: 2


Generated temp stored proc:

```
CREATE TEMPORARY PROCEDURE ...(arg1 INT)
  RETURNS INT
  LANGUAGE PYTHON 
  VOLATILE
  RUNTIME_VERSION=3.9
  PACKAGES=('snowflake-snowpark-python','cloudpickle==2.0.0')
  HANDLER='compute'
  EXECUTE AS OWNER
AS $$
import pickle
func = pickle.loads(bytes.fromhex('80059...02e'))
# ...
def compute(session,arg1):
  return func(session,arg1)
$$

CALL ...(1::INT)
```


Named stored procedure (with lambda)

In [None]:
add_two = sproc(
  lambda session, x: session.sql(f"select {x} + 2").collect()[0][0],
  input_types=[IntegerType()], return_type=IntegerType(),
  name="add_two_proc", replace=True,
  packages=["snowflake-snowpark-python"])

ret = session.call("add_two_proc", 1)
print(f"add_two: {ret}")

Generated temp stored proc:

```
CREATE OR REPLACE TEMPORARY PROCEDURE add_two_proc(arg1 INT)
  RETURNS INT
  LANGUAGE PYTHON 
  VOLATILE
  RUNTIME_VERSION=3.9
  PACKAGES=('snowflake-snowpark-python','cloudpickle==2.0.0')
  HANDLER='compute'
  EXECUTE AS OWNER
AS $$
import pickle
func = pickle.loads(bytes.fromhex('800595...948652302e'))
# ...
def compute(session,arg1):
  return func(session,arg1)
$$

CALL add_two_proc(1::INT)
```

Registered stored procedure

In [None]:
@sproc(name="add_three", replace=True,
  is_permanent=True, stage_location="@int_stage",
  packages=["snowflake-snowpark-python"])
def add_three(session: Session, x: int) -> int:
  return session.sql(f"select {x} + 3").collect()[0][0]

ret = session.sql("call add_three(1)").collect()[0][0]
print(f"add_three: {ret}")

Generated stored proc:

```
CREATE OR REPLACE PROCEDURE add_three(arg1 BIGINT)
  RETURNS BIGINT
  LANGUAGE PYTHON 
  VOLATILE
  RUNTIME_VERSION=3.9
  PACKAGES=('snowflake-snowpark-python','cloudpickle==2.0.0')
  HANDLER='compute'
  EXECUTE AS OWNER
AS $$
import pickle
func = pickle.loads(bytes.fromhex('8005953d...48652302e'))
# ...
def compute(session,arg1):
  return func(session,arg1)
$$

call add_three(1)
```

Anonymous UDF (with lambda)

In [None]:
add_five = udf(lambda x: x+5,
  input_types=[IntegerType()], return_type=IntegerType())

df = session.create_dataframe([[1]]).to_df("a")
ret = df.select(add_five(col("a"))).collect()[0][0]
print(f"add_five: {ret}")

Generated temp UDF:

```
CREATE TEMPORARY FUNCTION ...(arg1 INT)
  RETURNS INT
  LANGUAGE PYTHON 
  VOLATILE
  RUNTIME_VERSION=3.9
  PACKAGES=('cloudpickle==2.0.0')
  HANDLER='compute'
AS $$
import pickle
func = pickle.loads(bytes.fromhex('800595...2302e'))
# ...
from threading import RLock
lock = RLock()

class InvokedFlag:
    def __init__(self):
        self.invoked = False

def lock_function_once(f, flag):
    def wrapper(*args, **kwargs):
        if not flag.invoked:
            with lock:
                if not flag.invoked:
                    result = f(*args, **kwargs)
                    flag.invoked = True
                    return result
                return f(*args, **kwargs)
        return f(*args, **kwargs)
    return wrapper

invoked = InvokedFlag()
def compute(arg1):
  return lock_function_once(func, invoked)(arg1)
$$;

SELECT ...("A")
FROM (SELECT "_1" AS "A"
  FROM (SELECT $1 AS "_1" FROM VALUES (1::INT)))
```

Named UDF (with lambda)

In [None]:
add_six = udf(lambda x: x+6,
  input_types=[IntegerType()], return_type=IntegerType(),
  name="add_six_proc", replace=True)

ret = session.sql("select add_six_proc(1)").collect()[0][0]
print(f"add_six: {ret}")

Generated temp UDF:

```
CREATE OR REPLACE TEMPORARY FUNCTION add_six_proc(arg1 INT)
  RETURNS INT
  LANGUAGE PYTHON 
  VOLATILE
  RUNTIME_VERSION=3.9
  PACKAGES=('snowflake-snowpark-python','cloudpickle==2.0.0')
  HANDLER='compute'
AS $$
import pickle
func = pickle.loads(bytes.fromhex('800595...8652302e'))
# ...
from threading import RLock
lock = RLock()
class InvokedFlag:
    def __init__(self):
        self.invoked = False

def lock_function_once(f, flag):
    def wrapper(*args, **kwargs):
        if not flag.invoked:
            with lock:
                if not flag.invoked:
                    result = f(*args, **kwargs)
                    flag.invoked = True
                    return result
                return f(*args, **kwargs)
        return f(*args, **kwargs)
    return wrapper

invoked = InvokedFlag()
def compute(arg1):
  return lock_function_once(func, invoked)(arg1)
$$

select add_six_proc(1)
```

Registered UDF

In [None]:
@udf(name="add_seven", replace=True,
  is_permanent=True, stage_location="@int_stage")
def add_seven(x: int) -> int:
  return x+7

df = session.create_dataframe([[1]], schema=["a"])
ret = df.select(call_udf("add_seven", col("a"))).collect()[0][0]
print(f"add_seven: {ret}")

Generated UDF:

```
CREATE OR REPLACE FUNCTION add_seven(arg1 BIGINT)
  RETURNS BIGINT
  LANGUAGE PYTHON 
  VOLATILE
  RUNTIME_VERSION=3.9
  PACKAGES=('snowflake-snowpark-python','cloudpickle==2.0.0')
  HANDLER='compute'
AS $$
import pickle
func = pickle.loads(bytes.fromhex('800595...652302e'))
# ...
from threading import RLock
lock = RLock()
class InvokedFlag:
    def __init__(self):
        self.invoked = False

def lock_function_once(f, flag):
    def wrapper(*args, **kwargs):
        if not flag.invoked:
            with lock:
                if not flag.invoked:
                    result = f(*args, **kwargs)
                    flag.invoked = True
                    return result
                return f(*args, **kwargs)
        return f(*args, **kwargs)
    return wrapper

invoked = InvokedFlag()
def compute(arg1):
  return lock_function_once(func, invoked)(arg1)
$$;

select add_seven(1)
```

Registered UDTF

In [None]:
# UDTF implementation class
class GetTwo:
  def process(self, n):
    yield(1, )
    yield(n, )

get_two = udtf(GetTwo, 
  output_schema=StructType([StructField("number", IntegerType())]),
  input_types=[IntegerType()])

ret = session.table_function(get_two(lit(3))).collect()
print(f"get_two: {ret}")

Generated temp UDTF:

```
CREATE TEMPORARY FUNCTION ...(arg1 INT)
  RETURNS TABLE (NUMBER INT)
  LANGUAGE PYTHON 
  VOLATILE
  RUNTIME_VERSION=3.9
  PACKAGES=('cloudpickle==2.0.0')
  HANDLER='compute'
AS $$
import pickle
func = pickle.loads(bytes.fromhex('80059...02e'))

from threading import RLock
lock = RLock()
class InvokedFlag:
    def __init__(self):
        self.invoked = False

def lock_function_once(f, flag):
    def wrapper(*args, **kwargs):
        if not flag.invoked:
            with lock:
                if not flag.invoked:
                    result = f(*args, **kwargs)
                    flag.invoked = True
                    return result
                return f(*args, **kwargs)
        return f(*args, **kwargs)
    return wrapper

init_invoked = InvokedFlag()
process_invoked = InvokedFlag()
end_partition_invoked = InvokedFlag()

class compute(func):
    def __init__(self):
        lock_function_once(super().__init__, init_invoked)()
    def process(self, arg1):
        return lock_function_once(super().process, process_invoked)(arg1)
$$;

SELECT * FROM TABLE(...(3::INT));
```