# Importing libs

In [1]:
import pandas as pd
import numpy as np
from snowflake.snowpark.session import Session
from snowflake.snowpark.functions import udf, avg, col
from snowflake.snowpark.types import IntegerType, FloatType, StringType, BooleanType
from snowflake.snowpark.files import SnowflakeFile
import sys
import logging
sys.path.append('..')
from credentials import Credentials

In [2]:
cred = Credentials()
session = Session.builder.configs(cred.__dict__).create()

In [3]:
session.use_role("SYSADMIN")
session.use_database("ACCOUNTADMIN_MGMT")
session.use_warehouse("ACCOUNTADMIN_MGMT")
session.use_schema("UTILITIES")

# Working with individual table

## Individual table

In [4]:
# Getting name of the table
db_name = 'STITCH'
schema_name = 'SALESFORCEQASIT'
table_name:str = 'CASE'

In [5]:
def transform_name(name, type, name_alias):
    """
    Transforms the given name and type into a formatted string with an alias.

    Parameters:
    name (str): The name of the column.
    type (str): The data type of the column.
    name_alias (str): The alias for the column.

    Returns:
    str: The formatted string with the alias.
    """
    if isinstance(type, str) and 'VARCHAR' in type:
        return f"CAST(SUBSTR({name},1,2000) AS VARCHAR(2000)) AS {name_alias}".format(name, name_alias)
    else:
        return f"{name} AS {name_alias}".format(name, name_alias)

### Getting information from the ACCOUNTADMIN_MGMT.UTILITIES.SALESFORCE_TABLE_VIEW_MAPPING

In [6]:
table_view_mapping = session.sql(f"SELECT * FROM ACCOUNTADMIN_MGMT.UTILITIES.SALESFORCE_TABLE_VIEW_MAPPING WHERE NAME_SALESFORCE_ENVIRONMENT = '{schema_name}' AND NAME_SALESFORCE_TABLE_ORIGINAL = '{table_name}'".format(schema_name, table_name)).collect()
table_view_mapping_dic = [row.asDict() for row in table_view_mapping][0]
table_name = table_view_mapping_dic['NAME_SALESFORCE_TABLE_ORIGINAL']
table_for_desc_name = table_view_mapping_dic['NAME_SNOWFLAKE_RESERVED_WORD']
view_name = table_view_mapping_dic['NAME_SALESFORCE_VIEW_ALIAS']

### Gets all the columns from the Metadata of the table

In [7]:
result_describe_table = [row.as_dict() for row in session.sql(f"DESCRIBE TABLE {db_name}.{schema_name}.{table_for_desc_name};".format(db_name, schema_name, table_for_desc_name)).collect()]
df_describe_table = pd.DataFrame(result_describe_table)
df_describe_table.drop(columns=['null?', 'default','primary key', 'unique key',
'check', 'expression', 'comment', 'policy name'], inplace=True)
df_describe_table

Unnamed: 0,name,type,kind
0,ACCOUNTID,VARCHAR(16777216),COLUMN
1,ACCOUNT_EXECUTIVE1__C,VARCHAR(16777216),COLUMN
2,ACCOUNT_EXECUTIVE__C,VARCHAR(16777216),COLUMN
3,ACCOUNT_ID__C,VARCHAR(16777216),COLUMN
4,ACTUAL_TIME_LENGTH_MINUTES__C,FLOAT,COLUMN
...,...,...,...
346,FSM_CALL_BACK_DUE_DATE__C,TIMESTAMP_TZ(9),COLUMN
347,FSM_RESOLVE_CONTRACTUAL_DUE_DATE__C,TIMESTAMP_TZ(9),COLUMN
348,FSM_ONSITE_CONTRACTUAL_DUE_DATE__C,TIMESTAMP_TZ(9),COLUMN
349,FSM_ONSITE_BUSINESS_DUE_DATE__C,TIMESTAMP_TZ(9),COLUMN


### Gets the columns from the mapping table

In [8]:
result_salesforce_mapping = [row.as_dict() for row in session.sql(f"SELECT NAME_SALESFORCE_ATTRIBUTE, NAME_ALIAS FROM ACCOUNTADMIN_MGMT.UTILITIES.SALESFORCE_COLUMN_MAPPING WHERE NAME_SALESFORCE_OBJECT = '{table_name}'".format()).collect()]
df_salesforce_mapping = pd.DataFrame(result_salesforce_mapping)
df_salesforce_mapping

Unnamed: 0,NAME_SALESFORCE_ATTRIBUTE,NAME_ALIAS
0,OWNER_ID__C,OWNER_ID__C
1,QUOTE_REQUESTED_BY__C,QUOTE_REQUESTED_BY__C
2,FSM_CASE_RESOLVED__C,FSM_CASE_RESOLVED__C
3,FSM_WO_INCLUDED__C,FSM_WO_INCLUDED__C
4,_SDC_EXTRACTED_AT,SDC_EXTRACTED_AT
...,...,...
365,FSM_REASON_CODE__C,FSM_REASON_CODE__C
366,FSM_CUSTOMER_CONFIRMATION__C,FSM_CUSTOMER_CONFIRMATION__C
367,FSM_ONSITE_CONTRACTUAL_DUE_DATE__C,ONSITE_CONTRACTUAL_DUE_DATE
368,FSM_ONSITE_BUSINESS_DUE_DATE__C,ONSITE_BUSINESS_DUE_DATE


In [9]:
if df_salesforce_mapping.empty:
	final_df = df_describe_table
	final_df['NAME_ALIAS'] = final_df['name']
else:
	final_df = pd.merge(df_describe_table, df_salesforce_mapping, left_on='name', right_on='NAME_SALESFORCE_ATTRIBUTE', how='left')
	final_df['NAME_ALIAS'] = final_df.apply(lambda x: x['name'] if pd.isna(x['NAME_ALIAS']) else x['NAME_ALIAS'], axis=1)

In [10]:
final_df

Unnamed: 0,name,type,kind,NAME_SALESFORCE_ATTRIBUTE,NAME_ALIAS
0,ACCOUNTID,VARCHAR(16777216),COLUMN,ACCOUNTID,ACCOUNTID
1,ACCOUNT_EXECUTIVE1__C,VARCHAR(16777216),COLUMN,ACCOUNT_EXECUTIVE1__C,ACCOUNT_EXECUTIVE1__C
2,ACCOUNT_EXECUTIVE__C,VARCHAR(16777216),COLUMN,ACCOUNT_EXECUTIVE__C,ACCOUNT_EXECUTIVE__C
3,ACCOUNT_ID__C,VARCHAR(16777216),COLUMN,ACCOUNT_ID__C,ACCOUNT_ID__C
4,ACTUAL_TIME_LENGTH_MINUTES__C,FLOAT,COLUMN,ACTUAL_TIME_LENGTH_MINUTES__C,ACTUAL_TIME_LENGTH_MINUTES__C
...,...,...,...,...,...
346,FSM_CALL_BACK_DUE_DATE__C,TIMESTAMP_TZ(9),COLUMN,FSM_CALL_BACK_DUE_DATE__C,FSM_CALL_BACK_DUE_DATE__C
347,FSM_RESOLVE_CONTRACTUAL_DUE_DATE__C,TIMESTAMP_TZ(9),COLUMN,FSM_RESOLVE_CONTRACTUAL_DUE_DATE__C,RESOLVE_CONTR_DUE_DT_C
348,FSM_ONSITE_CONTRACTUAL_DUE_DATE__C,TIMESTAMP_TZ(9),COLUMN,FSM_ONSITE_CONTRACTUAL_DUE_DATE__C,ONSITE_CONTRACTUAL_DUE_DATE
349,FSM_ONSITE_BUSINESS_DUE_DATE__C,TIMESTAMP_TZ(9),COLUMN,FSM_ONSITE_BUSINESS_DUE_DATE__C,ONSITE_BUSINESS_DUE_DATE


In [11]:
final_df['name_new'] = final_df.apply(lambda x: transform_name(x['name'], x['type'], x['NAME_ALIAS']), axis=1)

In [12]:
final_df

Unnamed: 0,name,type,kind,NAME_SALESFORCE_ATTRIBUTE,NAME_ALIAS,name_new
0,ACCOUNTID,VARCHAR(16777216),COLUMN,ACCOUNTID,ACCOUNTID,"CAST(SUBSTR(ACCOUNTID,1,2000) AS VARCHAR(2000)..."
1,ACCOUNT_EXECUTIVE1__C,VARCHAR(16777216),COLUMN,ACCOUNT_EXECUTIVE1__C,ACCOUNT_EXECUTIVE1__C,"CAST(SUBSTR(ACCOUNT_EXECUTIVE1__C,1,2000) AS V..."
2,ACCOUNT_EXECUTIVE__C,VARCHAR(16777216),COLUMN,ACCOUNT_EXECUTIVE__C,ACCOUNT_EXECUTIVE__C,"CAST(SUBSTR(ACCOUNT_EXECUTIVE__C,1,2000) AS VA..."
3,ACCOUNT_ID__C,VARCHAR(16777216),COLUMN,ACCOUNT_ID__C,ACCOUNT_ID__C,"CAST(SUBSTR(ACCOUNT_ID__C,1,2000) AS VARCHAR(2..."
4,ACTUAL_TIME_LENGTH_MINUTES__C,FLOAT,COLUMN,ACTUAL_TIME_LENGTH_MINUTES__C,ACTUAL_TIME_LENGTH_MINUTES__C,ACTUAL_TIME_LENGTH_MINUTES__C AS ACTUAL_TIME_L...
...,...,...,...,...,...,...
346,FSM_CALL_BACK_DUE_DATE__C,TIMESTAMP_TZ(9),COLUMN,FSM_CALL_BACK_DUE_DATE__C,FSM_CALL_BACK_DUE_DATE__C,FSM_CALL_BACK_DUE_DATE__C AS FSM_CALL_BACK_DUE...
347,FSM_RESOLVE_CONTRACTUAL_DUE_DATE__C,TIMESTAMP_TZ(9),COLUMN,FSM_RESOLVE_CONTRACTUAL_DUE_DATE__C,RESOLVE_CONTR_DUE_DT_C,FSM_RESOLVE_CONTRACTUAL_DUE_DATE__C AS RESOLVE...
348,FSM_ONSITE_CONTRACTUAL_DUE_DATE__C,TIMESTAMP_TZ(9),COLUMN,FSM_ONSITE_CONTRACTUAL_DUE_DATE__C,ONSITE_CONTRACTUAL_DUE_DATE,FSM_ONSITE_CONTRACTUAL_DUE_DATE__C AS ONSITE_C...
349,FSM_ONSITE_BUSINESS_DUE_DATE__C,TIMESTAMP_TZ(9),COLUMN,FSM_ONSITE_BUSINESS_DUE_DATE__C,ONSITE_BUSINESS_DUE_DATE,FSM_ONSITE_BUSINESS_DUE_DATE__C AS ONSITE_BUSI...


### Adding missing columns from mapping

In [15]:
if df_salesforce_mapping.empty:
	final_df_missing = df_describe_table
	final_df_missing['NAME_ALIAS'] = final_df_missing['name']
else:
	final_df_missing = pd.merge(df_salesforce_mapping, df_describe_table, left_on='NAME_SALESFORCE_ATTRIBUTE', right_on='name', how='left')
	final_df_missing = final_df_missing[final_df_missing['name'].isnull()]
	final_df_missing['NAME_ALIAS'] = final_df_missing.apply(lambda x: x['name'] if pd.isna(x['NAME_ALIAS']) else x['NAME_ALIAS'], axis=1)
	# Reorder columns
	final_df_missing = final_df_missing.reindex(columns=['name', 'type', 'kind','NAME_SALESFORCE_ATTRIBUTE', 'NAME_ALIAS'])

In [16]:
final_df_missing

Unnamed: 0,name,type,kind,NAME_SALESFORCE_ATTRIBUTE,NAME_ALIAS
16,,,,FSM_BO_FLEX_FIELD_3_LABEL__C,FSM_BO_FLEX_FIELD_3_LABEL__C
21,,,,FSM_BO_FLEX_FIELD_1_LABEL__C,FSM_BO_FLEX_FIELD_1_LABEL__C
27,,,,FSM_BO_FLEX_FIELD_3__C,FSM_BO_FLEX_FIELD_3__C
31,,,,FSM_BO_FLEX_FIELD_DATE_TIME_FIELDS_4__C,BO_FLEX_FIELD_DTTM_FIELDS_4_C
43,,,,FSM_BO_FLEX_FIELD_DATE_TIME_FIELDS_2__C,BO_FLEX_FIELD_DTTM_FIELDS_2_C
62,,,,FSM_TP_REF__C,FSM_TP_REF__C
103,,,,FSM_SCHEDULE_FLEX_FIELD_4__C,FSM_SCHEDULE_FLEX_FIELD_4__C
109,,,,FSM_BO_FLEX_FIELD_4__C,FSM_BO_FLEX_FIELD_4__C
128,,,,FSM_TIME_TO_DISPATCH__C,FSM_TIME_TO_DISPATCH__C
147,,,,FSM_BO_FLEX_FIELD_4_LABEL__C,FSM_BO_FLEX_FIELD_4_LABEL__C


In [None]:
def transform_name_null(name_alias) -> str:
	"""
	Adds given name_alias as null.

	Parameters:
	name_alias (str): The alias for the column.

	Returns:
	str: The formatted string with the alias.
    """
	return f"CAST(NULL AS NUMBER) AS {name_alias}".format(name_alias)

In [None]:
final_df_missing['name_new'] = final_df_missing.apply(lambda x: transform_name_null(x['NAME_ALIAS']), axis=1)

In [None]:
final_df_missing

### Merging the dataframes

In [None]:
result_df = pd.concat([final_df, final_df_missing])

In [None]:
result_df

### Transforming to list

In [None]:
column_list:list = result_df['name_new'].to_list()

In [None]:
columns_str:str = ', '.join(column_list)

In [None]:
final_query:str = f"CREATE OR REPLACE VIEW {db_name}.{schema_name}.{view_name} AS SELECT {columns_str} FROM {db_name}.{schema_name}.{table_for_desc_name};"

In [None]:
final_query

# Stored Procedure sp_create_salesforce_views

In [4]:
def transform_name(name, type, name_alias):
    """
    Transforms the given name and type into a formatted string with an alias.

    Parameters:
    name (str): The name of the column.
    type (str): The data type of the column.
    name_alias (str): The alias for the column.

    Returns:
    str: The formatted string with the alias.
    """
    if isinstance(type, str) and 'VARCHAR' in type:
        #return name + '0007'
        return f"CAST(SUBSTR({name},1,2000) AS VARCHAR(2000)) AS {name_alias}".format(name, name_alias)
    else:
        return f"{name} AS {name_alias}".format(name, name_alias)

In [5]:
def transform_name_null(name_alias) -> str:
	"""
	Adds given name_alias as null.

	Parameters:
	name_alias (str): The alias for the column.

	Returns:
	str: The formatted string with the alias.
    """
	return f"CAST(NULL AS NUMBER) AS {name_alias}".format(name_alias)

In [6]:
def sp_create_salesforce_views(session: Session, db_name: str, schema_name: str, table_name: str) -> str:
	try:
		# Setting up logger
		logger = logging.getLogger("sp_create_salesforce_views")

		# Getting name of the table
		table_view_mapping = session.sql(f"SELECT * FROM ACCOUNTADMIN_MGMT.UTILITIES.SALESFORCE_TABLE_VIEW_MAPPING WHERE NAME_SALESFORCE_ENVIRONMENT = '{schema_name}' AND NAME_SALESFORCE_TABLE_ORIGINAL = '{table_name}'".format(schema_name, table_name)).collect()
		table_view_mapping_dic = [row.asDict() for row in table_view_mapping][0]
		table_name = table_view_mapping_dic['NAME_SALESFORCE_TABLE_ORIGINAL']
		table_for_desc_name = table_view_mapping_dic['NAME_SNOWFLAKE_RESERVED_WORD']
		view_name = table_view_mapping_dic['NAME_SALESFORCE_VIEW_ALIAS']
		
		# Getting description of the table
		result_describe_table = [row.as_dict() for row in session.sql(f"DESCRIBE TABLE {db_name}.{schema_name}.{table_for_desc_name};".format(db_name, schema_name, table_for_desc_name)).collect()]

		# Removing unnecessary columns
		df_describe_table = pd.DataFrame(result_describe_table)
		df_describe_table.drop(columns=['null?', 'default','primary key', 'unique key',
		'check', 'expression', 'comment', 'policy name'], inplace=True)
		
		# Getting mapping table
		result_salesforce_mapping = [row.as_dict() for row in session.sql(f"SELECT NAME_SALESFORCE_ATTRIBUTE, NAME_ALIAS FROM ACCOUNTADMIN_MGMT.UTILITIES.SALESFORCE_COLUMN_MAPPING WHERE NAME_SALESFORCE_OBJECT = '{table_name}'".format()).collect()]
		df_salesforce_mapping = pd.DataFrame(result_salesforce_mapping)

		# Mixing with Mapping Table
		if df_salesforce_mapping.empty:
			final_df = df_describe_table
			final_df['NAME_ALIAS'] = final_df['name']
		else:
			final_df = pd.merge(df_describe_table, df_salesforce_mapping, left_on='name', right_on='NAME_SALESFORCE_ATTRIBUTE', how='left')
			final_df['NAME_ALIAS'] = final_df.apply(lambda x: x['name'] if pd.isna(x['NAME_ALIAS']) else x['NAME_ALIAS'], axis=1)

		# apply the transform_name function to create a new column called 'name_new'
		final_df['name_new'] = final_df.apply(lambda x: transform_name(x['name'], x['type'], x['NAME_ALIAS']), axis=1)

		# Getting second dataframe with missing columns
		if df_salesforce_mapping.empty:
			final_df_missing = df_describe_table
			final_df_missing['NAME_ALIAS'] = final_df_missing['name']
		else:
			final_df_missing = pd.merge(df_salesforce_mapping, df_describe_table, left_on='NAME_SALESFORCE_ATTRIBUTE', right_on='name', how='left')
			final_df_missing = final_df_missing[final_df_missing['name'].isnull()]
			final_df_missing['NAME_ALIAS'] = final_df_missing.apply(lambda x: x['name'] if pd.isna(x['NAME_ALIAS']) else x['NAME_ALIAS'], axis=1)
			# Reorder columns
			final_df_missing = final_df_missing.reindex(columns=['name', 'type', 'kind','NAME_SALESFORCE_ATTRIBUTE', 'NAME_ALIAS'])
		
		# apply the transform_name_null function to create a new column called 'name_new'
		final_df_missing['name_new'] = final_df_missing.apply(lambda x: transform_name_null(x['NAME_ALIAS']), axis=1)

		# Concatenate both dataframes
		result_df = pd.concat([final_df, final_df_missing])

		# create a list of the column names
		column_list:list = result_df['name_new'].to_list()

		columns_str:str = ', '.join(column_list)

		# append the final result to something like "CREATE OR REPLACE VIEW {db_name}.{schema_name}}.{table_name}_V AS SELECT {columns_str} FROM {db_name}.{schema_name}.{table_name};"
		final_query:str = f"CREATE OR REPLACE VIEW {db_name}.{schema_name}.{view_name} AS SELECT {columns_str} FROM {db_name}.{schema_name}.{table_for_desc_name};"
		
		session.sql(final_query).collect()

		return "SUCCESS"
	except Exception as e:	
		logger.error(f"Error: {e}", exc_info=True)
		return "FAILURE"

In [None]:
# Getting name of the table
db_name = 'STITCH'
schema_name = 'SALESFORCEQASIT'
table_name:str = 'CASE'

In [None]:
sp_create_salesforce_views(session, db_name, schema_name, table_name)

## Register sp_create_salesforce_views

In [None]:
session.sproc.register(
	func = sp_create_salesforce_views,
	name = "sp_create_salesforce_views",
	packages = ["snowflake-snowpark-python", "pandas", "numpy"],
	is_permanent = True,
	stage_location = "@ACCOUNTADMIN_MGMT.UTILITIES.ACCOUNTADMIN_MGMT_STAGE",
	execute_as = 'caller',
	replace=True,
	#source_code_display = False,
)

In [None]:
session.call("sp_create_salesforce_views", db_name, schema_name, table_name)

# All Tables

In [None]:
#'SALESFORCEQASIT','SALESFORCEFSL3'

In [8]:
def sp_run_dynamic_salesforce_views(session: Session) -> str:
    database:str = 'STITCH'

    for schema in ['SALESFORCEQASIT', 'WORKDAYSANDBOXPREVIEW']:
        print(f'------->{schema}'.format(schema))
        result = session.sql(f"SHOW TABLES IN STITCH.{schema}".format(schema)).collect()
        df = pd.DataFrame(result)
        tables:list = df['name'].to_list()
        for table in tables:
            if table not in ['_SDC_REJECTED']:
                print(table)
                session.call("sp_create_salesforce_views", database, schema, table)
    
    return "SUCCESS"

In [None]:
sp_run_dynamic_salesforce_views(session=session)

# Multiple processing

In [None]:
import multiprocessing as mp

def process_table(database, schema, table):
    if table not in ['_SDC_REJECTED']:
        print(table)
        session.call("sp_create_salesforce_views", database, schema, table)

def sp_run_dynamic_salesforce_views(session: Session) -> str:
    database:str = 'STITCH'

    for schema in ['SALESFORCEQASIT', 'WORKDAYSANDBOXPREVIEW']:
        print(f'------->{schema}'.format(schema))
        result = session.sql(f"SHOW TABLES IN STITCH.{schema}".format(schema)).collect()
        df = pd.DataFrame(result)
        tables:list = df['name'].to_list()
        pool = mp.Pool(processes=4)
        for table in tables:
            pool.apply_async(process_table, args=(database, schema, table))
        pool.close()
        pool.join()
    
    return "SUCCESS"

In [None]:
sp_run_dynamic_salesforce_views(session=session)

# Creating stored procedure

In [9]:
session.sproc.register(
	func=sp_run_dynamic_salesforce_views,
	name="sp_run_dynamic_salesforce_views",
	packages=["snowflake-snowpark-python", "pandas", "numpy"],
	replace=True,
	is_permanent=True,
	stage_location="@ACCOUNTADMIN_MGMT.UTILITIES.ACCOUNTADMIN_MGMT_STAGE",
	execute_as='caller',
	#source_code_display=False,
)



<snowflake.snowpark.stored_procedure.StoredProcedure at 0x7fffbb466100>

In [10]:
session.call("sp_run_dynamic_salesforce_views")

'SUCCESS'

## Creating Task

In [None]:
session.use_role("SYSADMIN")
session.use_database("ACCOUNTADMIN_MGMT")
session.use_warehouse("ACCOUNTADMIN_MGMT")
session.use_schema("UTILITIES")

In [None]:
task_run_dynamic_salesforce_views = """
CREATE OR REPLACE TASK ACCOUNTADMIN_MGMT.UTILITIES.TASK_RUN_DYNAMIC_SALESFORCE_VIEWS COMMENT = 'Run dynamic salesforce views'
    WAREHOUSE = 'ACCOUNTADMIN_MGMT'
    ALLOW_OVERLAPPING_EXECUTION = FALSE
	AFTER ACCOUNTADMIN_MGMT.UTILITIES.TASK_RUN_DYNAMIC_SALESFORCE_COLUMN_DETECTOR

AS
        CALL ACCOUNTADMIN_MGMT.UTILITIES.SP_RUN_DYNAMIC_SALESFORCE_VIEWS();
"""

In [None]:
session.sql(task_run_dynamic_salesforce_views).collect()

In [None]:
session.sql("ALTER TASK ACCOUNTADMIN_MGMT.UTILITIES.TASK_RUN_DYNAMIC_SALESFORCE_VIEWS RESUME;").collect()

In [None]:
session.sql("EXECUTE TASK ACCOUNTADMIN_MGMT.UTILITIES.TASK_RUN_DYNAMIC_SALESFORCE_VIEWS;").collect()

In [None]:
session.close()