In [None]:
SELECT CURRENT_ROLE()

In [None]:
from snowflake.snowpark.context import get_active_session
import pandas as pd
import requests
import os
import re
from datetime import datetime, timezone
import logging
import tempfile

In [None]:
CREATE OR REPLACE FUNCTION get_EN_secret()
RETURNS STRING
LANGUAGE PYTHON
RUNTIME_VERSION = 3.11
HANDLER = 'get_EN_secret'
EXTERNAL_ACCESS_INTEGRATIONS = (EN_INTEGRATION)
SECRETS = ('cred' = EN_API_TOKEN_DB)
AS
$$
import _snowflake

def get_EN_secret():   
  my_api_key = _snowflake.get_generic_secret_string('cred') 
  return my_api_key
$$;

In [None]:
from snowflake.snowpark.context import get_active_session
session = get_active_session()
results = session.sql('SELECT get_EN_secret()').collect()
EN_secret = results[0][0]
EN_secret

In [None]:
session = get_active_session()

try:
    # Get file name
    file_query = "LIST @CLASSYDATA.EN_API.NEW_FILES"
    list_files = session.sql(file_query).collect()
    most_recent = max([f for f in list_files if f['name'].endswith('.csv')], 
                     key=lambda x: x['last_modified'])
    most_recent = most_recent['name']
    print("Most recent file:", most_recent)
    
    # Read file (using the full path as it appears in the stage)
    df = session.read.options({"field_delimiter": ",", "skip_header": 1}).csv(f"@CLASSYDATA.EN_API.{most_recent}")
    df = df.toDF("EMAIL_ADDRESS", "CLASSY_WALK_2024")
    print("File read successful")
    print("Number of rows:", df.count())
    print("Columns:", df.columns)
    
    # Convert to pandas and check data
    pandas_df = df.to_pandas()
    print("\nData sample:")
    print(pandas_df.head())
    
except Exception as e:
    print("Error:", str(e))

# Debugging

In [None]:
from snowflake.snowpark.context import get_active_session

# Test session
session = get_active_session()
print("Session type:", type(session))
print("Session:", session)

# Test if we can run a simple SQL query
try:
    result = session.sql("SELECT CURRENT_WAREHOUSE()").collect()
    print("Current warehouse:", result[0][0])
except Exception as e:
    print("SQL Error:", str(e))

In [None]:
# Test just the stage listing
try:
    list_files = session.sql("LIST @CLASSYDATA.EN_API.NEW_FILES").collect()
    print("Files found:", len(list_files))
    for file in list_files:
        print(file['name'])
except Exception as e:
    print("Stage Error:", str(e))

In [None]:
def get_most_recent_file(session, stage_path):
    """Get the most recent CSV file from the specified stage."""
    try:
        # List files in stage
        print("Attempting to list files from stage:", stage_path)
        query = f"LIST @{stage_path}"
        print("Executing query:", query)
        
        list_files = session.sql(query).collect()
        print("Raw files list:", list_files)
        
        if not list_files:
            raise Exception("No files found in stage")
        
        # Filter for CSV files and get the most recent
        csv_files = [f for f in list_files if f['name'].lower().endswith('.csv')]
        print("CSV files found:", len(csv_files))
        print("CSV files:", csv_files)
        
        if not csv_files:
            raise Exception("No CSV files found in stage")
        
        most_recent = max(csv_files, key=lambda x: x['last_modified'])
        print("Most recent file details:", most_recent)
        
        return most_recent['name']
    except Exception as e:
        print(f"Error in get_most_recent_file: {str(e)}")
        print(f"Error type: {type(e)}")
        raise

# Test the function
try:
    session = get_active_session()
    stage_path = 'CLASSYDATA.EN_API.NEW_FILES'
    file_name = get_most_recent_file(session, stage_path)
    print("Final result - Most recent file:", file_name)
except Exception as e:
    print("Final error:", str(e))