In [2]:
import snowflake.connector
import os
import pandas as pd
from snowflake.connector.errors import NotSupportedError

def sf_query(sql: str):
    """
    Ejecuta UNA sola sentencia SQL en Snowflake.
    - Si trae resultados, devuelve un DataFrame.
    - Si no, imprime confirmación.
    Maneja SHOW/DESCRIBE/SELECT aunque fetch_pandas_all no esté soportado.
    """
    conn = snowflake.connector.connect(
        user=os.getenv("SNOWFLAKE_USER"),
        password=os.getenv("SNOWFLAKE_PASSWORD"),
        account=os.getenv("SNOWFLAKE_HOST").split(".snowflakecomputing.com")[0],
        warehouse=os.getenv("SNOWFLAKE_WAREHOUSE"),
        database=os.getenv("SNOWFLAKE_DATABASE"),
        role=os.getenv("SNOWFLAKE_ROLE"),
    )
    try:
        cur = conn.cursor()
        # Intentamos usar Arrow para que fetch_pandas_all funcione
        try:
            cur.execute("ALTER SESSION SET QUERY_RESULT_FORMAT=ARROW")
        except Exception:
            pass  # si falla, igual seguimos (haremos fallback)

        cur.execute(sql)

        if cur.description:  # hay result set (SELECT/SHOW/DESCRIBE)
            try:
                df = cur.fetch_pandas_all()  # rápido si hay Arrow/pyarrow
            except NotSupportedError:
                # Fallback sin Arrow
                rows = cur.fetchall()
                cols = [d[0] for d in cur.description]
                df = pd.DataFrame(rows, columns=cols)
            finally:
                cur.close()
            return df
        else:
            cur.close()
            print("Executed successfully.")
    finally:
        conn.close()


In [4]:
# Verificar contexto actual
sf_query("SELECT CURRENT_DATABASE(), CURRENT_SCHEMA(), CURRENT_WAREHOUSE(), CURRENT_ROLE();")




Unnamed: 0,CURRENT_DATABASE(),CURRENT_SCHEMA(),CURRENT_WAREHOUSE(),CURRENT_ROLE()
0,DM_PSET3,,WH_DM,SYSADMIN


In [5]:
sf_query("SHOW VIEWS IN SCHEMA ANALYTICS;")



Unnamed: 0,created_on,name,reserved,database_name,schema_name,owner,comment,text,is_secure,is_materialized,owner_role_type,change_tracking
0,2025-10-23 08:41:54.807000-07:00,OBT_TRIPS_V,,DM_PSET3,ANALYTICS,SYSADMIN,,CREATE OR REPLACE VIEW ANALYTICS.OBT_TRIPS_V A...,False,False,ROLE,OFF


In [6]:
# Conteo rápido
sf_query("SELECT COUNT(*) AS cnt FROM ANALYTICS.OBT_TRIPS_V;")

Unnamed: 0,CNT
0,851622729


In [7]:
sf_query("""
CREATE OR REPLACE VIEW ANALYTICS.OBT_TRIPS_CLEAN_V AS
SELECT *
FROM ANALYTICS.OBT_TRIPS_V
WHERE trip_minutes BETWEEN 1 AND 240
  AND trip_distance > 0 AND trip_distance <= 100
  AND tip_pct BETWEEN 0 AND 1;
""")


Unnamed: 0,status
0,View OBT_TRIPS_CLEAN_V successfully created.


In [10]:
sf_query("""
SELECT
  COUNT(*) AS total_clean,
  ROUND(COUNT(*) / 851622729 * 100, 2) AS pct_sobre_total
FROM ANALYTICS.OBT_TRIPS_CLEAN_V;
""")


Unnamed: 0,TOTAL_CLEAN,PCT_SOBRE_TOTAL
0,838263783,98.43


In [9]:
sf_query("""
SELECT service_type,
       ROUND(AVG(trip_minutes),1) AS avg_min,
       ROUND(AVG(trip_distance),2) AS avg_dist,
       ROUND(AVG(tip_pct),3) AS avg_tip,
       COUNT(*) AS n
FROM ANALYTICS.OBT_TRIPS_CLEAN_V
GROUP BY 1
ORDER BY 1;
""")


Unnamed: 0,SERVICE_TYPE,AVG_MIN,AVG_DIST,AVG_TIP,N
0,green,14.0,3.05,0.093,66081773
1,yellow,14.7,3.09,0.157,772182010


In [11]:
sf_query("""
SELECT pickup_hour,
       service_type,
       COUNT(*) AS n,
       ROUND(AVG(trip_minutes),1) AS avg_min,
       ROUND(AVG(trip_distance),2) AS avg_dist,
       ROUND(AVG(tip_pct),3) AS avg_tip
FROM ANALYTICS.OBT_TRIPS_CLEAN_V
GROUP BY 1,2
ORDER BY 1,2;
""")


Unnamed: 0,PICKUP_HOUR,SERVICE_TYPE,N,AVG_MIN,AVG_DIST,AVG_TIP
0,0,green,2491683,11.7,2.99,0.099
1,0,yellow,25108257,13.2,3.56,0.154
2,1,green,1920243,11.4,2.98,0.097
3,1,yellow,17740795,12.3,3.39,0.152
4,2,green,1405221,11.3,3.07,0.095
5,2,yellow,12608273,11.7,3.3,0.148
6,3,green,1096752,11.5,3.28,0.089
7,3,yellow,8966945,11.8,3.55,0.139
8,4,green,926996,12.1,3.7,0.074
9,4,yellow,6796280,12.6,4.45,0.121


In [12]:
sf_query("""
SELECT pickup_dow,
       is_weekend,
       service_type,
       COUNT(*) AS n,
       ROUND(AVG(trip_minutes),1) AS avg_min,
       ROUND(AVG(trip_distance),2) AS avg_dist,
       ROUND(AVG(tip_pct),3) AS avg_tip
FROM ANALYTICS.OBT_TRIPS_CLEAN_V
GROUP BY 1,2,3
ORDER BY 1,2,3;
""")


Unnamed: 0,PICKUP_DOW,IS_WEEKEND,SERVICE_TYPE,N,AVG_MIN,AVG_DIST,AVG_TIP
0,Fri,False,green,10577314,14.4,2.99,0.092
1,Fri,False,yellow,117861767,15.3,3.05,0.157
2,Mon,False,green,8089189,13.8,3.05,0.09
3,Mon,False,yellow,99224635,14.3,3.24,0.158
4,Sat,True,green,11208650,13.5,3.08,0.093
5,Sat,True,yellow,115251971,13.8,2.97,0.147
6,Sun,True,green,9378607,13.0,3.18,0.093
7,Sun,True,yellow,99407905,13.6,3.4,0.148
8,Thu,False,green,9507843,14.6,3.02,0.094
9,Thu,False,yellow,117380578,15.7,3.04,0.162
