In [1]:
import duckdb
import leafmap
import pandas as pd

In [2]:
con = duckdb.connect()
con.install_extension("httpfs")
con.load_extension("httpfs")
con.install_extension("spatial")
con.load_extension("spatial")

In [3]:
# world connnectedness data from https://data.humdata.org/dataset/social-connectedness-index
url = 'https://data.humdata.org/dataset/e9988552-74e4-4ff4-943f-c782ac8bca87/resource/35ca6ade-a5bd-4782-b266-797169dca74b/download/countries-countries-fb-social-connectedness-index-october-2021.tsv'

In [4]:
# Read the data from the URL into a pandas DataFrame
df = pd.read_csv(url, sep='\t')

# Convert the DataFrame into a DuckDB table
con.register('my_table', df)

# Now you can query the table using DuckDB
result = con.execute("SELECT * FROM my_table").fetchdf()

In [5]:
df

Unnamed: 0,user_loc,fr_loc,scaled_sci
0,AE,AE,1117179
1,AE,AG,5488
2,AE,AL,2101
3,AE,AM,3580
4,AE,AO,2326
...,...,...,...
34220,ZW,XK,262
34221,ZW,YT,1650
34222,ZW,ZA,366466
34223,ZW,ZM,196035


In [6]:
con.sql('''
    SELECT user_loc as country, SUM(scaled_sci) as scaled
    FROM my_table
    GROUP BY user_loc
    ORDER BY scaled DESC
    LIMIT 10            
''')

┌─────────┬────────────┐
│ country │   scaled   │
│ varchar │   int128   │
├─────────┼────────────┤
│ SC      │ 1006368457 │
│ AG      │  893365845 │
│ GD      │  835369954 │
│ KI      │  782782835 │
│ TO      │  719577449 │
│ ST      │  684169236 │
│ VC      │  667785101 │
│ FM      │  619429818 │
│ LC      │  578010894 │
│ IM      │  514740394 │
├─────────┴────────────┤
│ 10 rows    2 columns │
└──────────────────────┘

In [7]:
# SC Seychelles, AG Antigua/Barbuda, GD Grenada, KI Kiribati, TO Tonga, ST Sao Tome, VC St Vincent, FM Micronesia, LC St. Lucia, IM Isle of Man

In [8]:
# US counties data from https://data.humdata.org/dataset/social-connectedness-index
url_state = 'https://data.humdata.org/dataset/e9988552-74e4-4ff4-943f-c782ac8bca87/resource/868a2fdb-f5c8-4a98-af7c-cfc8bf0daeb3/download/us-counties-countries-fb-social-connectedness-index-october-2021.tsv'

In [9]:
# Read the data from the URL into a pandas DataFrame
df_zip = pd.read_csv(url_state, sep='\t')

# Register the DataFrame as a DuckDB table
con.register('state_table', df_zip)

# Perform the grouping and ordering operations
result = con.execute("""
    SELECT user_loc as ZIP, SUM(scaled_sci) as SCALED
    FROM state_table
    GROUP BY user_loc
    ORDER BY SCALED DESC
    LIMIT 10
""").fetchdf()
result

Unnamed: 0,ZIP,SCALED
0,66010,1235691000.0
1,69100,625999300.0
2,60010,440737100.0
3,69110,292105100.0
4,72107,279302300.0
5,72019,266206200.0
6,72105,257854100.0
7,72073,257121200.0
8,78010,255552100.0
9,72149,247285800.0


In [11]:
# Import the UC Woody Biomass Utilization Group's Current Wood Facility Databases
# Requested online, received email with zipped folders
# Start with sawmills
con.sql("SELECT * FROM ST_Read('C:/Users/vance/Downloads/CurrentSawmill/Current_Wood_Facility_Database_Primary_Wood_Processing.shp')")

┌───────┬─────────────┬──────────────┬───┬────────────┬──────────────────────┬──────────┬──────────────────────┐
│ RecID │  latitude   │  longitude   │ … │    Year    │      JoinSymbol      │ ObjectId │         geom         │
│ int32 │   double    │    double    │   │    date    │       varchar        │  int32   │       geometry       │
├───────┼─────────────┼──────────────┼───┼────────────┼──────────────────────┼──────────┼──────────────────────┤
│  9001 │ 40.79719014 │ -124.1815646 │ … │ 2020-01-01 │ Operational Large …  │       30 │ POINT (-124.181564…  │
│  9002 │ 38.90448023 │ -121.3070281 │ … │ 2020-01-01 │ Operational Large …  │       31 │ POINT (-121.307028…  │
│  9003 │ 39.48193681 │ -121.5632263 │ … │ 2020-01-01 │ Operational Cedar …  │       32 │ POINT (-121.563226…  │
│  9008 │   40.471498 │  -122.320991 │ … │ 2020-01-01 │ Operational Large …  │       33 │ POINT (-122.320991…  │
│  9012 │   40.901169 │  -124.071143 │ … │ 2020-01-01 │ Operational Fence …  │       34 │ POINT 

In [17]:
# Create sawmill table from shp file and show
con.sql('''
    CREATE TABLE IF NOT EXISTS sawmill as
    SELECT * FROM ST_Read('C:/Users/vance/Downloads/CurrentSawmill/Current_Wood_Facility_Database_Primary_Wood_Processing.shp')    
''')
con.table('sawmill')

┌───────┬─────────────┬──────────────┬───┬────────────┬──────────────────────┬──────────┬──────────────────────┐
│ RecID │  latitude   │  longitude   │ … │    Year    │      JoinSymbol      │ ObjectId │         geom         │
│ int32 │   double    │    double    │   │    date    │       varchar        │  int32   │       geometry       │
├───────┼─────────────┼──────────────┼───┼────────────┼──────────────────────┼──────────┼──────────────────────┤
│  9001 │ 40.79719014 │ -124.1815646 │ … │ 2020-01-01 │ Operational Large …  │       30 │ POINT (-124.181564…  │
│  9002 │ 38.90448023 │ -121.3070281 │ … │ 2020-01-01 │ Operational Large …  │       31 │ POINT (-121.307028…  │
│  9003 │ 39.48193681 │ -121.5632263 │ … │ 2020-01-01 │ Operational Cedar …  │       32 │ POINT (-121.563226…  │
│  9008 │   40.471498 │  -122.320991 │ … │ 2020-01-01 │ Operational Large …  │       33 │ POINT (-122.320991…  │
│  9012 │   40.901169 │  -124.071143 │ … │ 2020-01-01 │ Operational Fence …  │       34 │ POINT 

In [16]:
# View sawmill table schema
con.sql('''
    DESCRIBE sawmill
        
''')

┌─────────────┬─────────────┬─────────┬─────────┬─────────┬───────┐
│ column_name │ column_type │  null   │   key   │ default │ extra │
│   varchar   │   varchar   │ varchar │ varchar │ varchar │ int32 │
├─────────────┼─────────────┼─────────┼─────────┼─────────┼───────┤
│ RecID       │ INTEGER     │ YES     │ NULL    │ NULL    │  NULL │
│ latitude    │ DOUBLE      │ YES     │ NULL    │ NULL    │  NULL │
│ longitude   │ DOUBLE      │ YES     │ NULL    │ NULL    │  NULL │
│ Name        │ VARCHAR     │ YES     │ NULL    │ NULL    │  NULL │
│ Status      │ VARCHAR     │ YES     │ NULL    │ NULL    │  NULL │
│ Owner       │ VARCHAR     │ YES     │ NULL    │ NULL    │  NULL │
│ Cogenerati  │ VARCHAR     │ YES     │ NULL    │ NULL    │  NULL │
│ Facility_T  │ VARCHAR     │ YES     │ NULL    │ NULL    │  NULL │
│ Feedstock_  │ VARCHAR     │ YES     │ NULL    │ NULL    │  NULL │
│ Employees   │ VARCHAR     │ YES     │ NULL    │ NULL    │  NULL │
│ StAddr      │ VARCHAR     │ YES     │ NULL    

In [20]:
# Sum sawmills by county
con.sql('''
    SELECT County, COUNT(*) as Count
    FROM sawmill
    GROUP BY County
    ORDER BY count DESC
''')

┌────────────┬───────┐
│   County   │ Count │
│  varchar   │ int64 │
├────────────┼───────┤
│ Shasta     │     5 │
│ Humboldt   │     4 │
│ Sonoma     │     3 │
│ Tuolumne   │     2 │
│ Plumas     │     2 │
│ Butte      │     2 │
│ Mendocino  │     2 │
│ Siskiyou   │     2 │
│ Trinity    │     1 │
│ Lake, OR   │     1 │
│ Placer     │     1 │
│ Tulare     │     1 │
│ Santa Cruz │     1 │
├────────────┴───────┤
│ 13 rows  2 columns │
└────────────────────┘

In [24]:
# Import the Biomass dataset
con.sql("SELECT * FROM ST_Read('C:/Users/vance/Downloads/CurrentBiomass/Current_Wood_Facility_Database_Biomass.shp')")

┌───────┬─────────────┬──────────────┬──────────────────────┬───┬────────────┬──────────┬──────────────────────┐
│ RecID │  latitude   │  longitude   │         Name         │ … │ JoinSymbol │ ObjectId │         geom         │
│ int32 │   double    │    double    │       varchar        │   │  varchar   │  int32   │       geometry       │
├───────┼─────────────┼──────────────┼──────────────────────┼───┼────────────┼──────────┼──────────────────────┤
│    25 │   35.576448 │  -119.005818 │ DTE Mt. Poso Cogen   │ … │ 25_2021    │       27 │ POINT (-119.005818…  │
│    14 │   36.569581 │  -119.418984 │ Dinuba Energy        │ … │ 14_2021    │       56 │ POINT (-119.418984…  │
│    15 │   38.524353 │  -121.903077 │ Dixon Ridge Farms …  │ … │ 15_2021    │       94 │ POINT (-121.903077…  │
│    16 │   37.943945 │  -121.330053 │ DTE Stockton Bioma…  │ … │ 16_2021    │      110 │ POINT (-121.330053…  │
│    22 │   36.755903 │  -120.365273 │ Covanta Mendota Po…  │ … │ 22_2021    │      122 │ POINT 

In [25]:
# Create biomass table from shp file and show
con.sql('''
    CREATE TABLE IF NOT EXISTS biomass as
    SELECT * FROM ST_Read('C:/Users/vance/Downloads/CurrentBiomass/Current_Wood_Facility_Database_Biomass.shp')    
''')
con.table('sawmill')

┌───────┬─────────────┬──────────────┬───┬────────────┬──────────────────────┬──────────┬──────────────────────┐
│ RecID │  latitude   │  longitude   │ … │    Year    │      JoinSymbol      │ ObjectId │         geom         │
│ int32 │   double    │    double    │   │    date    │       varchar        │  int32   │       geometry       │
├───────┼─────────────┼──────────────┼───┼────────────┼──────────────────────┼──────────┼──────────────────────┤
│  9001 │ 40.79719014 │ -124.1815646 │ … │ 2020-01-01 │ Operational Large …  │       30 │ POINT (-124.181564…  │
│  9002 │ 38.90448023 │ -121.3070281 │ … │ 2020-01-01 │ Operational Large …  │       31 │ POINT (-121.307028…  │
│  9003 │ 39.48193681 │ -121.5632263 │ … │ 2020-01-01 │ Operational Cedar …  │       32 │ POINT (-121.563226…  │
│  9008 │   40.471498 │  -122.320991 │ … │ 2020-01-01 │ Operational Large …  │       33 │ POINT (-122.320991…  │
│  9012 │   40.901169 │  -124.071143 │ … │ 2020-01-01 │ Operational Fence …  │       34 │ POINT 

In [26]:
# View biomass table schema
con.sql('''
    DESCRIBE biomass
        
''')

┌─────────────┬─────────────┬─────────┬─────────┬─────────┬───────┐
│ column_name │ column_type │  null   │   key   │ default │ extra │
│   varchar   │   varchar   │ varchar │ varchar │ varchar │ int32 │
├─────────────┼─────────────┼─────────┼─────────┼─────────┼───────┤
│ RecID       │ INTEGER     │ YES     │ NULL    │ NULL    │  NULL │
│ latitude    │ DOUBLE      │ YES     │ NULL    │ NULL    │  NULL │
│ longitude   │ DOUBLE      │ YES     │ NULL    │ NULL    │  NULL │
│ Name        │ VARCHAR     │ YES     │ NULL    │ NULL    │  NULL │
│ Status      │ VARCHAR     │ YES     │ NULL    │ NULL    │  NULL │
│ Owner       │ VARCHAR     │ YES     │ NULL    │ NULL    │  NULL │
│ Cogenerati  │ VARCHAR     │ YES     │ NULL    │ NULL    │  NULL │
│ Facility_T  │ VARCHAR     │ YES     │ NULL    │ NULL    │  NULL │
│ Feedstock_  │ VARCHAR     │ YES     │ NULL    │ NULL    │  NULL │
│ MW_Namepla  │ VARCHAR     │ YES     │ NULL    │ NULL    │  NULL │
│ MW_Grid     │ VARCHAR     │ YES     │ NULL    

In [35]:
# Sum the total MW_Grid by county round to 1 decimal

con.sql('''
    SELECT County, ROUND(SUM(CAST(MW_Grid AS FLOAT)), 1) as Total_MW_Grid
    FROM biomass
    GROUP BY County
    ORDER BY Total_MW_Grid DESC
    LIMIT 15
''')

┌─────────────┬───────────────┐
│   County    │ Total_MW_Grid │
│   varchar   │    double     │
├─────────────┼───────────────┤
│ Shasta      │          94.0 │
│ San Joaquin │          45.0 │
│ Kern        │          44.0 │
│ Riverside   │          44.0 │
│ Placer      │          35.4 │
│ Jackson     │          30.0 │
│ Humboldt    │          27.8 │
│ Fresno      │          24.3 │
│ Lassen      │          24.0 │
│ Tuolumne    │          20.0 │
│ Plumas      │          16.0 │
│ Sierra      │          13.0 │
│ Siskiyou    │          12.0 │
│ Madera      │          10.0 │
│ Merced      │           0.0 │
├─────────────┴───────────────┤
│ 15 rows           2 columns │
└─────────────────────────────┘

In [None]:
# Add h3 schema or show both data sets on map
