In [None]:
!pip install streamlit

Via GUI, get the **Environment Data Atlas**.

In particular, we will analyze data in schema ENVIRONMENT from tables EDGARED2019, WBWDI2019Jan, and UNENVDB2018.

In [None]:
from snowflake.snowpark.session import Session
from snowflake.snowpark.functions import avg, sum, col,lit
import streamlit as st
import pandas as pd

In [6]:
# Create Session object
def create_session_object():
   connection_parameters = {
      "account": "",
      "user": "",
      "password": "",
      "role": "ACCOUNTADMIN",
      "warehouse": "COMPUTE_WH",
      "database": "ENVIRONMENT_DATA_ATLAS",
      "schema": "PUBLIC"
   }
   session = Session.builder.configs(connection_parameters).create()
   print(session.sql('select current_warehouse(), current_database(), current_schema()').collect())
   return session

Create three Snowpark DataFrames to load data from tables EDGARED2019, WBWDI2019Jan, and UNENVDB2018 from schema ENVIRONMENT

In [7]:
session = create_session_object()

2022-08-05 14:54:48.738 INFO    snowflake.connector.connection: Snowflake Connector for Python Version: 2.7.11, Python Version: 3.8.13, Platform: macOS-10.16-x86_64-i386-64bit
2022-08-05 14:54:48.748 INFO    snowflake.connector.connection: This connection is in OCSP Fail Open Mode. TLS Certificates would be checked for validity and revocation status. Any other Certificate Revocation related exceptions or OCSP Responder failures would be disregarded in favor of connectivity.
2022-08-05 14:54:48.750 INFO    snowflake.connector.connection: Setting use_openssl_only mode to False
2022-08-05 14:54:51.514 INFO    snowflake.snowpark.session: Snowpark Session information: 
"version" : 0.8.0,
"python.version" : 3.8.13,
"python.connector.version" : 2.7.11,
"python.connector.session.id" : 135057072137,
"os.name" : Darwin

2022-08-05 14:54:51.524 INFO    snowflake.connector.cursor: query: [select current_warehouse(), current_database(), current_schema()]
2022-08-05 14:54:51.948 INFO    snowflake.co

[Row(CURRENT_WAREHOUSE()='COMPUTE_WH', CURRENT_DATABASE()='ENVIRONMENT_DATA_ATLAS', CURRENT_SCHEMA()=None)]


In [8]:
# CO2 Emissions by Country
snow_df_co2 = session.table("ENVIRONMENT.EDGARED2019").filter(col('Indicator Name') == 'Fossil CO2 Emissions').filter(col('Type Name') == 'All Type')
snow_df_co2 = snow_df_co2.group_by('Location Name').agg(sum('$16').alias("Total CO2 Emissions")).filter(col('Location Name') != 'World').sort('Location Name')

# Forest Occupied Land Area by Country
snow_df_land = session.table("ENVIRONMENT.\"WBWDI2019Jan\"").filter(col('Series Name') == 'Forest area (% of land area)')
snow_df_land = snow_df_land.group_by('Country Name').agg(sum('$61').alias("Total Share of Forest Land")).sort('Country Name')

# Total Municipal Waste by Country
snow_df_waste = session.table("ENVIRONMENT.UNENVDB2018").filter(col('Variable Name') == 'Municipal waste collected')
snow_df_waste = snow_df_waste.group_by('Location Name').agg(sum('$12').alias("Total Municipal Waste")).sort('Location Name')

2022-08-05 14:55:27.322 INFO    snowflake.connector.cursor: query: [SELECT  *  FROM ( SELECT  *  FROM ( SELECT  *  FROM (ENVIRONMENT.EDGARED2019)) W...]
2022-08-05 14:55:29.056 INFO    snowflake.connector.cursor: query execution done
2022-08-05 14:55:29.059 INFO    snowflake.connector.cursor: query: [SELECT  *  FROM ( SELECT "Location Name", sum("$16") AS "Total CO2 Emissions" FR...]
2022-08-05 14:55:29.584 INFO    snowflake.connector.cursor: query execution done
2022-08-05 14:55:29.585 INFO    snowflake.connector.cursor: query: [SELECT  *  FROM ( SELECT  *  FROM (ENVIRONMENT."WBWDI2019Jan")) WHERE ("Series N...]
2022-08-05 14:55:31.012 INFO    snowflake.connector.cursor: query execution done
2022-08-05 14:55:31.014 INFO    snowflake.connector.cursor: query: [SELECT "Country Name", sum("$61") AS "Total Share of Forest Land" FROM ( SELECT ...]
2022-08-05 14:55:31.410 INFO    snowflake.connector.cursor: query execution done
2022-08-05 14:55:31.412 INFO    snowflake.connector.cursor: quer

More importantly, note that at this point nothing is executed on the server because of lazy evaluation–which reduces the amount of data exchanged between Snowflake and the client/application.

In [9]:
# Convert Snowpark DataFrames to Pandas DataFrames for Streamlit
pd_df_co2 = snow_df_co2.to_pandas()
pd_df_land = snow_df_land.to_pandas()
pd_df_waste = snow_df_waste.to_pandas()

2022-08-05 14:56:53.227 INFO    snowflake.connector.cursor: query: [SELECT  *  FROM ( SELECT  *  FROM ( SELECT "Location Name", sum("$16") AS "Total...]
2022-08-05 14:56:55.744 INFO    snowflake.connector.cursor: query execution done
2022-08-05 14:56:55.839 INFO    snowflake.connector.cursor: query: [SELECT  *  FROM ( SELECT "Country Name", sum("$61") AS "Total Share of Forest La...]
2022-08-05 14:56:59.398 INFO    snowflake.connector.cursor: query execution done
2022-08-05 14:56:59.405 INFO    snowflake.connector.cursor: query: [SELECT  *  FROM ( SELECT "Location Name", sum("$12") AS "Total Municipal Waste" ...]
2022-08-05 14:57:01.467 INFO    snowflake.connector.cursor: query execution done


In [10]:
%%writefile my_snowpark_streamlit_app.py
#!/usr/bin/env python

# Snowpark
from snowflake.snowpark.session import Session
from snowflake.snowpark.functions import avg, sum, col,lit

import os
from dotenv import load_dotenv
load_dotenv()
PASS = os.getenv('PASS')

#Streamlit
import streamlit as st

st.set_page_config(
     page_title="Environment Data Atlas",
     page_icon="🧊",
     layout="wide",
     initial_sidebar_state="expanded",
     menu_items={
         'Get Help': 'https://developers.snowflake.com',
         'About': "This is an *extremely* cool app powered by Snowpark for Python, Streamlit, and Snowflake Data Marketplace"
     }
 )

# Misc
import pandas as pd

# Create Session object
def create_session_object():
    connection_parameters = {
        "account": "",
        "user": "",
        "password": PASS,
        "role": "ACCOUNTADMIN",
        "warehouse": "COMPUTE_WH",
        "database": "ENVIRONMENT_DATA_ATLAS",
        "schema": "PUBLIC"
    }
    session = Session.builder.configs(connection_parameters).create()
    print(session.sql('select current_warehouse(), current_database(), current_schema()').collect())
    return session
  
# Create Snowpark DataFrames that loads data from Knoema: Environmental Data Atlas
def load_data(session):
    # CO2 Emissions by Country
    snow_df_co2 = session.table("ENVIRONMENT.EDGARED2019").filter(col('Indicator Name') == 'Fossil CO2 Emissions').filter(col('Type Name') == 'All Type')
    snow_df_co2 = snow_df_co2.group_by('Location Name').agg(sum('$16').alias("Total CO2 Emissions")).filter(col('Location Name') != 'World').sort('Location Name')
    
    # Forest Occupied Land Area by Country
    snow_df_land = session.table("ENVIRONMENT.\"WBWDI2019Jan\"").filter(col('Series Name') == 'Forest area (% of land area)')
    snow_df_land = snow_df_land.group_by('Country Name').agg(sum('$61').alias("Total Share of Forest Land")).sort('Country Name')
    
    # Total Municipal Waste by Country
    snow_df_waste = session.table("ENVIRONMENT.UNENVDB2018").filter(col('Variable Name') == 'Municipal waste collected')
    snow_df_waste = snow_df_waste.group_by('Location Name').agg(sum('$12').alias("Total Municipal Waste")).sort('Location Name')
    
    # Convert Snowpark DataFrames to Pandas DataFrames for Streamlit
    pd_df_co2  = snow_df_co2.to_pandas()
    pd_df_land = snow_df_land.to_pandas() 
    pd_df_waste = snow_df_waste.to_pandas()
    
    # Add header and a subheader
    st.header("Knoema: Environment Data Atlas")
    st.subheader("Powered by Snowpark for Python and Snowflake Data Marketplace | Made with Streamlit")
    
    # Use columns to display the three dataframes side-by-side along with their headers
    col1, col2, col3 = st.columns(3)
    with st.container():
        with col1:
            st.subheader('CO2 Emissions by Country')
            st.dataframe(pd_df_co2)
        with col2:
            st.subheader('Forest Occupied Land Area by Country')
            st.dataframe(pd_df_land)
        with col3:
            st.subheader('Total Municipal Waste by Country')
            st.dataframe(pd_df_waste)
    
    # Display an interactive chart to visualize CO2 Emissions by Top N Countries
    with st.container():
        st.subheader('CO2 Emissions by Top N Countries')
        with st.expander(""):
            emissions_threshold = st.number_input(label='Emissions Threshold',min_value=5000, value=20000, step=5000)
            pd_df_co2_top_n = snow_df_co2.filter(col('Total CO2 Emissions') > emissions_threshold).to_pandas()
            st.bar_chart(data=pd_df_co2_top_n.set_index('Location Name'), width=850, height=500, use_container_width=True)

if __name__ == "__main__":
    session = create_session_object()
    load_data(session)

Writing my_snowpark_streamlit_app.py


In [12]:
!streamlit run my_snowpark_streamlit_app.py

[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Local URL: [0m[1mhttp://localhost:8501[0m
[34m  Network URL: [0m[1mhttp://192.168.43.157:8501[0m
[0m
[34m[1m  For better performance, install the Watchdog module:[0m

  $ xcode-select --install
  $ pip install watchdog
            [0m
2022-08-05 15:05:47.039 Snowflake Connector for Python Version: 2.7.11, Python Version: 3.8.13, Platform: macOS-10.16-x86_64-i386-64bit
2022-08-05 15:05:47.040 This connection is in OCSP Fail Open Mode. TLS Certificates would be checked for validity and revocation status. Any other Certificate Revocation related exceptions or OCSP Responder failures would be disregarded in favor of connectivity.
2022-08-05 15:05:49.390 Snowpark Session information: 
"version" : 0.8.0,
"python.version" : 3.8.13,
"python.connector.version" : 2.7.11,
"python.connector.session.id" : 135057055765,
"os.name" : Darwin

2022-08-05 15:05:49.395 query: [select current_warehouse(), curre