In [1]:
import code_library as cl
from dotenv import dotenv_values
from snowflake.snowpark.types import IntegerType, StringType, StructField, StructType, DateType

In [2]:
secrets = dotenv_values(".env")

In [3]:
connection_parameters = {
"account":secrets['account'], 
"user":secrets['user'], 
"password": secrets['password'], 
"role":secrets['role'], 
"warehouse":"COMPUTE_WH", 
"database":"DEMO_DB", 
"schema":"PUBLIC" 
}

In [4]:
session = cl.snowconnection(connection_parameters)

## Testing the second function, copy_into_table

In [5]:
config_file = {
"Database_name":"DEMO_DB",
"Schema_name":"PUBLIC",
"Target_table":"EMPLOYEE",
"Reject_table":"EMPLOYEE_REJECTS",
"target_columns":["FIRST_NAME","LAST_NAME","EMAIL","ADDRESS","CITY","DOJ"],
"on_error":"CONTINUE",
"Source_location":"@demo_db.external_stages.s3_stg/sp_csv/",
"Source_file_type":"csv"
}

In [6]:
# Declare schema for csv file and read data
schema = StructType([StructField("FIRST_NAME", StringType()),
StructField("LAST_NAME", StringType()),
StructField("EMAIL", StringType()),
StructField("ADDRESS", StringType()),
StructField("CITY", StringType()),
StructField("DOJ",DateType())])

In [7]:
print(help(StructField("DOJ",DateType())))

Help on StructField in module snowflake.snowpark.types object:

class StructField(builtins.object)
 |  StructField(column_identifier: Union[snowflake.snowpark.types.ColumnIdentifier, str], datatype: snowflake.snowpark.types.DataType, nullable: bool = True) -> None
 |  
 |  Represents the content of :class:`StructField`.
 |  
 |  Methods defined here:
 |  
 |  __eq__(self, other)
 |      Return self==value.
 |  
 |  __init__(self, column_identifier: Union[snowflake.snowpark.types.ColumnIdentifier, str], datatype: snowflake.snowpark.types.DataType, nullable: bool = True) -> None
 |      Initialize self.  See help(type(self)) for accurate signature.
 |  
 |  __repr__(self) -> str
 |      Return repr(self).
 |  
 |  ----------------------------------------------------------------------
 |  Data descriptors defined here:
 |  
 |  __dict__
 |      dictionary for instance variables (if defined)
 |  
 |  __weakref__
 |      list of weak references to the object (if defined)
 |  
 |  name
 |   

In [8]:
copied_into_result, qid = cl.copy_to_table(session, config_file, schema)

In [9]:
print(copied_into_result)

[Row(file='s3://inzs3bucketlearn/sp_csv/employee02.csv', status='LOADED', rows_parsed=5, rows_loaded=5, error_limit=5, errors_seen=0, first_error=None, first_error_line=None, first_error_character=None, first_error_column_name=None), Row(file='s3://inzs3bucketlearn/sp_csv/employee04.csv', status='LOADED', rows_parsed=5, rows_loaded=5, error_limit=5, errors_seen=0, first_error=None, first_error_line=None, first_error_character=None, first_error_column_name=None), Row(file='s3://inzs3bucketlearn/sp_csv/employee01.csv', status='LOADED', rows_parsed=5, rows_loaded=5, error_limit=5, errors_seen=0, first_error=None, first_error_line=None, first_error_character=None, first_error_column_name=None), Row(file='s3://inzs3bucketlearn/sp_csv/employee03.csv', status='LOADED', rows_parsed=5, rows_loaded=5, error_limit=5, errors_seen=0, first_error=None, first_error_line=None, first_error_character=None, first_error_column_name=None), Row(file='s3://inzs3bucketlearn/sp_csv/employee05.csv', status='PAR

In [10]:
print(qid)

01b5075c-3201-1cdd-0008-f88a0001971a


In [11]:
copied_into_result_df = session.create_dataframe(copied_into_result)

# Important thing to remember

In [16]:
#this cell is commented out to document this error
#copied_into_result_df.show()

x = """
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|"FILE"                                       |"STATUS"          |"ROWS_PARSED"  |"ROWS_LOADED"  |"ERROR_LIMIT"  |"ERRORS_SEEN"  |"FIRST_ERROR"                 |"FIRST_ERROR_LINE"  |"FIRST_ERROR_CHARACTER"  |"FIRST_ERROR_COLUMN_NAME"  |
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|s3://inzs3bucketlearn/sp_csv/employee04.csv  |PARTIALLY_LOADED  |6              |5              |6              |1              |Date 'DOJ' is not recognized  |1                   |42                       |"EMPLOYEE"["DOJ":6]        |
|s3://inzs3bucketlearn/sp_csv/employee01.csv  |PARTIALLY_LOADED  |6              |5              |6              |1              |Date 'DOJ' is not recognized  |1                   |42                       |"EMPLOYEE"["DOJ":6]        |
|s3://inzs3bucketlearn/sp_csv/employee05.csv  |PARTIALLY_LOADED  |6              |4              |6              |2              |Date 'DOJ' is not recognized  |1                   |42                       |"EMPLOYEE"["DOJ":6]        |
|s3://inzs3bucketlearn/sp_csv/employee02.csv  |PARTIALLY_LOADED  |6              |5              |6              |1              |Date 'DOJ' is not recognized  |1                   |42                       |"EMPLOYEE"["DOJ":6]        |
|s3://inzs3bucketlearn/sp_csv/employee03.csv  |PARTIALLY_LOADED  |6              |5              |6              |1              |Date 'DOJ' is not recognized  |1                   |42                       |"EMPLOYEE"["DOJ":6]        |
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
"""
y = """
#This error was rectified in the following manner: 
 >> in code library file we made the following change: 
     df = session.read.options({"skip_header":1}).schema(schema).csv("'"+Source_location+"'")
 >> You can search the above line to see it in the code initially it was like shown below, which was wrong:
        .>>  df = session.read.schema(schema).csv("'"+Source_location+"'")
"""

In [13]:
# Create stage object with integration object & file format object


result = session.sql(
    f"""
        CREATE OR REPLACE stage demo_db.external_stages.s3_stg
            URL = {cl.secrets['s3_bucket_for_stg']}
            STORAGE_INTEGRATION = s3_int
    """
    )

result.show()

-------------------------------------------
|"status"                                 |
-------------------------------------------
|Stage area S3_STG successfully created.  |
-------------------------------------------



In [14]:
copied_into_result_df.show()

-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|"FILE"                                       |"STATUS"          |"ROWS_PARSED"  |"ROWS_LOADED"  |"ERROR_LIMIT"  |"ERRORS_SEEN"  |"FIRST_ERROR"                            |"FIRST_ERROR_LINE"  |"FIRST_ERROR_CHARACTER"  |"FIRST_ERROR_COLUMN_NAME"  |
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
|s3://inzs3bucketlearn/sp_csv/employee02.csv  |LOADED            |5              |5              |5              |0              |NULL                                     |NULL                |NULL                     |NULL                       |
|s3://in