In [10]:
import duckdb
import pandas as pd
pd.set_option('display.max_columns', None)

import urllib.request
import os

conn = duckdb.connect('data/duck.db')

def download_url_to_file(url, filepath):
    if os.path.exists(filepath):
        print(f"File already exists at {filepath}")
        return
    response = urllib.request.urlopen(url)
    with open(filepath, 'wb') as file:
        file.write(response.read())
    print(f"File saved successfully at {filepath}")

download_url_to_file('https://nces.ed.gov/ipeds/datacenter/data/HD2023_Dict.zip', 'data/HD2023_Dict.zip')
download_url_to_file('https://nces.ed.gov/ipeds/datacenter/data/IC2023_Dict.zip', 'data/IC2023_Dict.zip')
download_url_to_file('https://nces.ed.gov/ipeds/datacenter/data/IC2023_AY_Dict.zip', 'data/IC2023_AY_Dict.zip')
download_url_to_file('https://nces.ed.gov/ipeds/datacenter/data/HD2023.zip', 'data/HD2023.zip')
download_url_to_file('https://nces.ed.gov/ipeds/datacenter/data/IC2023.zip', 'data/IC2023.zip')
download_url_to_file('https://nces.ed.gov/ipeds/datacenter/data/IC2023_AY.zip', 'data/IC2023_AY.zip')

print('All Files Downloaded Successfully')

File already exists at data/HD2023_Dict.zip
File already exists at data/IC2023_Dict.zip
File already exists at data/IC2023_AY_Dict.zip
File already exists at data/HD2023.zip
File already exists at data/IC2023.zip
File already exists at data/IC2023_AY.zip
All Files Downloaded Successfully


In [11]:
import zipfile
import os

# Unzip all zip files in the 'data' directory
data_dir = 'data'
for item in os.listdir(data_dir):
    if item.endswith('.zip'):
        file_path = os.path.join(data_dir, item)
        with zipfile.ZipFile(file_path, 'r') as zip_ref:
            zip_ref.extractall(data_dir)
        print(f"Unzipped {item} successfully")

Unzipped HD2023_Dict.zip successfully
Unzipped IC2023_AY_Dict.zip successfully
Unzipped HD2023.zip successfully
Unzipped IC2023_AY.zip successfully
Unzipped IC2023.zip successfully
Unzipped IC2023_Dict.zip successfully


In [12]:
import zipfile
import os
import subprocess
import glob

# Unzip all zip files in the 'data' directory
data_dir = 'data'
for item in glob.glob(os.path.join(data_dir, '*.csv')):
    if '-utf8' in item:
        print(f"Skipping {item} as it is already converted to UTF-8")
        continue
    name_only = os.path.basename(item).split('.')[0]
    utf_name = name_only + '-utf8.csv'
    script = f"iconv -f latin1 -t UTF-8 {item} > {os.path.join(data_dir, utf_name)}"
    print(f"Running: {script}")
    subprocess.run(args=['sh', '-c', script])
    print(f"Converted {item} to {utf_name} successfully")

Skipping data/hd2023-utf8.csv as it is already converted to UTF-8
Running: iconv -f latin1 -t UTF-8 data/ic2023_ay.csv > data/ic2023_ay-utf8.csv
Converted data/ic2023_ay.csv to ic2023_ay-utf8.csv successfully
Running: iconv -f latin1 -t UTF-8 data/hd2023.csv > data/hd2023-utf8.csv
Converted data/hd2023.csv to hd2023-utf8.csv successfully
Skipping data/ic2023_ay-utf8.csv as it is already converted to UTF-8
Skipping data/ic2023-utf8.csv as it is already converted to UTF-8
Running: iconv -f latin1 -t UTF-8 data/ic2023.csv > data/ic2023-utf8.csv
Converted data/ic2023.csv to ic2023-utf8.csv successfully


In [13]:
def parse_domain(url):
    if not url:
        return None
    parsed_url = urllib.parse.urlparse(url)
    domain = parsed_url.netloc.split(':')[0]  # Get the host part and remove port if present
    return domain


schools = pd.read_csv('data/hd2023.csv', encoding='latin1')
schools['domain'] = schools['WEBADDR'].apply(parse_domain)
schools

Unnamed: 0,UNITID,INSTNM,IALIAS,ADDR,CITY,STABBR,ZIP,FIPS,OBEREG,CHFNM,CHFTITLE,GENTELE,EIN,UEIS,OPEID,OPEFLAG,WEBADDR,ADMINURL,FAIDURL,APPLURL,NPRICURL,VETURL,ATHURL,DISAURL,SECTOR,ICLEVEL,CONTROL,HLOFFER,UGOFFER,GROFFER,HDEGOFR1,DEGGRANT,HBCU,HOSPITAL,MEDICAL,TRIBAL,LOCALE,OPENPUBL,ACT,NEWID,DEATHYR,CLOSEDAT,CYACTIVE,POSTSEC,PSEFLAG,PSET4FLG,RPTMTH,INSTCAT,C21BASIC,C21IPUG,C21IPGRD,C21UGPRF,C21ENPRF,C21SZSET,C18BASIC,C15BASIC,CCBASIC,CARNEGIE,LANDGRNT,INSTSIZE,F1SYSTYP,F1SYSNAM,F1SYSCOD,CBSA,CBSATYPE,CSA,COUNTYCD,COUNTYNM,CNGDSTCD,LONGITUD,LATITUDE,DFRCGID,DFRCUSCG,domain
0,100654,Alabama A & M University,AAMU,4900 Meridian Street,Normal,AL,35762,1,5,Dr. Daniel K. Wims,President,2563725000,636001109,JDVGS67MSLH7,100200,1,www.aamu.edu/,https://www.aamu.edu/admissions-aid/index.html,https://www.aamu.edu/admissions-aid/financial-...,https://www.aamu.edu/admissions-aid/undergradu...,www.aamu.edu/admissions-aid/tuition-fees/net-p...,,,https://www.aamu.edu/administrativeoffices/VAD...,1,1,1,9,1,1,12,1,1,2,2,2,12,1,A,-2,-2,-2,1,1,1,1,1,2,18,16,18,10,4,14,18,18,18,16,1,3,2,-2,-2,26620,1,290,1089,Madison County,105,-86.568502,34.783368,106,1,
1,100663,University of Alabama at Birmingham,UAB,Administration Bldg Suite 1070,Birmingham,AL,35294-0110,1,5,Ray L. Watts,President,2059344011,636005396,YND4PLMC9AN7,105200,1,https://www.uab.edu/,https://www.uab.edu/admissions/,https://www.uab.edu/cost-aid/,https://www.uab.edu/admissions/apply,https://tcc.ruffalonl.com/University of Alabam...,https://www.uab.edu/students/veterans,https://www.uab.edu/registrar/students,https://www.uab.edu/students/disability/,1,1,1,9,1,1,11,1,2,1,1,2,12,1,A,-2,-2,-2,1,1,1,1,1,2,15,14,14,9,5,15,15,15,15,15,2,5,1,The University of Alabama System,101050,13820,1,142,1073,Jefferson County,107,-86.799345,33.505697,91,1,www.uab.edu
2,100690,Amridge University,Southern Christian University Regions University,1200 Taylor Rd,Montgomery,AL,36117-3553,1,5,Michael C.Turner,President,33438738777550,237034324,RB27R4GLDKE7,2503400,1,https://www.amridgeuniversity.edu/,https://www.amridgeuniversity.edu/admissions/,https://www.amridgeuniversity.edu/financialaid/,https://www.amridgeuniversity.edu/myamridge/,https://www2.amridgeuniversity.edu:9091/,https://www.amridgeuniversity.edu/admissions/m...,,https://www.amridgeuniversity.edu/studentservi...,2,1,2,9,1,1,12,1,2,2,2,2,12,1,A,-2,-2,-2,1,1,1,1,1,2,20,20,18,5,6,6,20,20,21,51,2,1,2,-2,-2,33860,1,388,1101,Montgomery County,102,-86.174010,32.362609,124,2,www.amridgeuniversity.edu
3,100706,University of Alabama in Huntsville,UAH University of Alabama Huntsville,301 Sparkman Dr,Huntsville,AL,35899,1,5,Chuck Karr,President,2568246120,630520830,HB6KNGVNJRU1,105500,1,www.uah.edu/,https://www.uah.edu/admissions,finaid.uah.edu/,register.uah.edu/,finaid.uah.edu/,www.uah.edu/admissions/graduate/financial-aid/...,www.uah.edu/heoa,www.uah.edu/health-and-wellness/disability-sup...,1,1,1,9,1,1,11,1,2,2,2,2,12,1,A,-2,-2,-2,1,1,1,1,1,2,15,17,17,15,4,13,16,16,15,16,2,3,1,The University of Alabama System,101050,26620,1,290,1089,Madison County,105,-86.640449,34.724557,91,1,
4,100724,Alabama State University,,915 S Jackson Street,Montgomery,AL,36104-0271,1,5,Quinton T. Ross,President,3342294100,636001101,DLJWLMSNK627,100500,1,www.alasu.edu/,www.alasu.edu/admissions/index.aspx,www.alasu.edu/undergraduate/expensesandfinanci...,www.alasu.edu/admissions/undergrad-admissions/...,www.alasu.edu/cost-aid/tuition-costs/net-price...,,https://www.alasu.edu/administration/consumer-...,www.alasu.edu/academics/researchcenters/alabam...,1,1,1,9,1,1,11,1,1,2,2,2,12,1,A,-2,-2,-2,1,1,1,1,1,2,17,13,18,10,3,14,19,19,18,21,2,2,2,-2,-2,33860,1,388,1101,Montgomery County,107,-86.295677,32.364317,97,1,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6158,499680,Stellar Career College-Crown Point,,5521 Lincoln Highway Suite 301,Crown Point,IN,46307-1117,18,3,Zulfiqar A. Satti,President,2199005700,821883694,,3759302,1,https://in.stellarcollege.edu/,,,,https://in.stellarcollege.edu/net-price-calcul...,https://in.stellarcollege.edu/about-us/,,https://in.stellarcollege.edu/about-us/,6,2,3,3,1,2,40,1,2,-2,2,2,21,1,N,-2,-2,-2,1,1,1,1,2,4,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,2,1,2,-2,-2,16980,1,176,18089,Lake County,1801,-87.401656,41.474242,221,2,in.stellarcollege.edu
6159,499699,Dragon Rises College of Oriental Medicine - Br...,,6815 14th Street West,Bradenton,FL,34207-5810,12,5,,,,-1,,3888301,1,,,,,,,,,99,-3,-3,-3,-3,-3,-3,-3,2,-2,-2,2,21,1,G,449481,-2,-2,3,1,3,9,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,2,-2,-2,-2,-2,35840,1,412,12081,Manatee County,1216,-82.575135,27.419641,-2,-2,
6160,499705,Tulsa Welding School-Dallas Campus,,700 E Airport Freeway,Irving,TX,75602-4805,48,6,David Bowman,Campus President,2142279911,872138502,,961806,1,https://www.tws.edu/,https://www.tws.edu/admissions/,https://www.tws.edu/financial-aid/,,https://www.tws.edu/tuition-planning/tuition-c...,https://www.tws.edu/tuition-planning/costs/,,https://www.tws.edu/wp-content/uploads/tws-dal...,9,3,3,1,1,2,0,2,2,-2,2,2,11,1,N,-2,-2,-2,1,1,1,1,2,6,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,2,1,1,Tulsa Welding School Inc.,304700,19100,1,206,48113,Dallas County,4833,-96.937120,32.836133,217,2,www.tws.edu
6161,499714,Pearl River Community College - Hancock Workfo...,,13915 Fred & Al Key Road,Kiln,MS,39556-0000,28,5,,,,-1,,243003,1,,,,,,,,,99,-3,-3,-3,-3,-3,-3,-3,2,-2,-2,2,41,1,G,176239,-2,-2,3,1,3,9,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,2,-2,-2,-2,-2,25060,1,-2,28045,Hancock County,2804,-89.450819,30.371129,-2,-2,


In [14]:
services = pd.read_csv('data/ic2023.csv', encoding='latin1')
services

Unnamed: 0,UNITID,PEO1ISTR,PEO2ISTR,PEO3ISTR,PEO4ISTR,PEO5ISTR,PEO6ISTR,PEO7ISTR,CNTLAFFI,PUBPRIME,PUBSECON,RELAFFIL,LEVEL1,LEVEL1A,LEVEL1B,LEVEL2,LEVEL3,LEVEL4,LEVEL5,LEVEL6,LEVEL7,LEVEL8,LEVEL12,LEVEL17,LEVEL18,LEVEL19,CALSYS,FT_UG,FT_FTUG,FTGDNIDP,PT_UG,PT_FTUG,PTGDNIDP,DOCPP,DOCPPSP,OPENADMP,NONCRDT1,NONCRDT2,NONCRDT3,NONCRDT4,NONCRDT5,NONCRDT6,NONCRDT7,NONCRDT8,NONCRDT9,ENRHSST,ENRHSST1,ENRHSST2,VET1,VET2,VET3,VET4,VET5,VET9,CREDITS2,CREDITS3,CREDITS4,SLO5,SLO51,SLO52,SLO521,SLO53,SLO6,SLO7,SLO8,SLO81,SLO82,SLO83,SLO9,SLOA,SLOB,YRSCOLL,STUSRV2,STUSRV3,STUSRV4,STUSRV8,STUSRV9,LIBRES1,LIBRES2,LIBRES3,LIBRES4,LIBRES5,LIBRES6,LIBRES9,TUITPL,TUITPL1,TUITPL2,TUITPL3,TUITPL4,PRMPGM,DSTNUGC,DSTNUGP,DSTNUGN,DSTNGC,DSTNGP,DSTNGN,DISTCRS,DISTPGS,DSTNCED1,DSTNCED2,DSTNCED3,DISTNCED,DISAB,XDISABPC,DISABPCT,ALLONCAM,TUITVARY,ROOM,XROOMCAP,ROOMCAP,BOARD,XMEALSWK,MEALSWK,XROOMAMT,ROOMAMT,XBORDAMT,BOARDAMT,XRMBDAMT,RMBRDAMT,XAPPFEEU,APPLFEEU,XAPPFEEG,APPLFEEG,ATHASSOC,ASSOC1,ASSOC2,ASSOC3,ASSOC4,ASSOC5,ASSOC6,SPORT1,CONFNO1,SPORT2,CONFNO2,SPORT3,CONFNO3,SPORT4,CONFNO4
0,100654,0,1,0,0,0,0,0,1,2,0,-2,0,0,0,0,0,0,1,0,1,1,0,1,0,0,1,1,1,1,1,1,1,-2,-2,2,0,0,1,0,0,0,0,0,0,1,1,0,1,1,1,1,1,0,0,1,0,1,1,0,0,0,1,1,1,1,0,1,0,1,0,-2,1,1,1,1,0,1,1,1,1,1,1,0,1,0,0,1,0,2,1,1,0,1,1,0,1,1,1,1,0,2,1,A,.,2,1,1,R,3620,1,R,21,R,3790,R,4202,A,.,R,30,R,45,1,1,0,0,0,0,0,1,133,1,133,1,133,1,133
1,100663,0,1,1,0,0,0,0,1,2,0,-2,1,0,1,1,0,0,1,1,1,1,0,1,1,0,1,1,1,1,1,1,1,1,1,2,0,0,1,0,0,0,1,1,0,1,1,0,1,1,1,1,1,0,1,1,0,1,1,0,0,1,1,0,1,1,0,1,0,1,0,-2,1,1,1,0,0,1,1,1,1,1,1,0,1,0,0,1,0,1,1,1,0,1,1,0,1,1,1,1,0,2,2,R,6.94,2,1,1,R,3480,2,A,.,R,8550,R,6030,A,.,R,40,R,50,1,1,0,0,0,0,0,1,372,1,372,1,372,1,372
2,100690,0,1,0,0,0,0,0,4,-2,-2,74,0,0,0,0,1,0,1,0,1,0,0,1,0,0,1,1,2,1,1,1,1,-2,-2,1,0,0,0,0,0,0,0,0,1,1,1,0,1,1,1,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,-2,1,1,0,0,0,1,1,1,1,1,1,0,2,0,0,0,0,2,1,1,0,1,1,0,1,1,1,1,0,1,1,A,.,-2,2,2,A,.,3,A,.,A,.,A,.,A,.,R,50,R,50,2,0,0,0,0,0,0,2,-2,2,-2,2,-2,2,-2
3,100706,0,1,1,1,0,0,0,1,2,0,-2,1,0,1,0,0,0,1,1,1,1,0,1,1,0,1,1,1,1,1,1,1,1,2,2,1,1,0,1,0,0,0,1,0,1,1,0,1,1,1,1,1,0,0,1,0,1,1,0,0,0,1,1,1,1,0,1,0,0,0,-2,1,1,1,1,0,1,1,1,1,1,1,0,1,0,1,1,0,2,1,1,0,1,1,0,1,1,1,1,0,2,2,R,4.80,2,1,1,R,2148,1,R,21,A,.,A,.,R,11122,R,30,R,60,1,1,0,0,0,0,0,2,-2,1,146,1,146,1,146
4,100724,1,1,0,0,0,0,1,1,2,0,-2,0,0,0,0,0,0,1,1,1,1,0,1,1,0,1,1,1,1,1,1,1,1,2,2,0,0,0,0,0,0,0,0,1,1,1,0,0,1,1,1,0,0,1,1,0,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,1,1,1,0,0,1,1,1,1,1,1,0,1,0,0,1,0,1,1,1,0,1,1,0,1,1,1,1,0,2,1,A,.,2,1,1,R,2079,1,R,19,R,3500,R,4190,A,.,R,30,R,30,1,1,0,0,0,0,0,1,133,1,133,1,133,1,133
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6044,499662,1,0,-2,0,0,0,0,2,-2,-2,-2,0,0,0,0,0,1,-2,-2,-2,-2,0,-2,-2,-2,1,1,1,-2,1,2,-2,-2,-2,1,0,0,0,0,0,0,0,0,1,2,-2,-2,0,1,1,0,1,0,0,0,1,0,0,0,0,0,0,1,-2,-2,-2,-2,0,0,0,-2,1,1,1,0,0,1,1,1,0,1,0,0,1,0,0,1,0,2,0,0,1,-2,-2,-2,0,0,0,-2,1,2,1,A,.,2,2,2,A,.,3,A,.,A,.,A,.,A,.,R,25,A,.,-2,-2,-2,-2,-2,-2,-2,2,-2,2,-2,2,-2,2,-2
6045,499671,0,1,0,0,0,0,0,2,-2,-2,-2,0,0,0,0,1,0,1,0,0,0,0,0,0,0,6,1,1,2,1,1,2,-2,-2,1,0,0,0,0,0,0,0,0,1,2,-2,-2,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,-2,1,1,1,0,0,0,1,1,1,1,1,0,1,0,0,1,0,2,1,0,0,-2,-2,-2,1,0,1,-2,0,2,1,A,.,2,-2,2,A,.,3,A,.,A,.,A,.,A,.,R,0,A,.,2,0,0,0,0,0,0,2,-2,2,-2,2,-2,2,-2
6046,499680,1,0,-2,0,0,0,0,2,-2,-2,-2,1,0,1,0,1,0,-2,-2,-2,-2,0,-2,-2,-2,7,1,1,-2,2,2,-2,-2,-2,1,0,0,0,0,0,0,0,0,1,2,-2,-2,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,-2,0,0,1,0,0,0,1,1,0,0,0,0,1,0,0,1,0,2,1,0,0,-2,-2,-2,1,0,1,-2,0,2,1,A,.,2,-2,2,A,.,3,A,.,A,.,A,.,A,.,R,100,A,.,2,0,0,0,0,0,0,2,-2,2,-2,2,-2,2,-2
6047,499705,1,0,-2,0,0,0,0,2,-2,-2,-2,1,0,1,0,0,0,-2,-2,-2,-2,0,-2,-2,-2,6,1,1,-2,2,2,-2,-2,-2,1,0,1,0,0,0,0,0,0,0,2,-2,-2,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,-2,-2,-2,-2,1,0,0,-2,0,1,1,0,0,1,1,1,0,1,1,0,1,0,0,1,0,2,0,0,1,-2,-2,-2,0,0,0,-2,1,2,1,A,.,2,-2,2,A,.,3,A,.,A,.,A,.,A,.,R,0,A,.,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2


In [15]:
charges = pd.read_csv('data/ic2023_ay.csv', encoding='latin1')
charges

Unnamed: 0,UNITID,XTUIT1,TUITION1,XFEE1,FEE1,XHRCHG1,HRCHG1,XTUIT2,TUITION2,XFEE2,FEE2,XHRCHG2,HRCHG2,XTUIT3,TUITION3,XFEE3,FEE3,XHRCHG3,HRCHG3,XTUIT5,TUITION5,XFEE5,FEE5,XHRCHG5,HRCHG5,XTUIT6,TUITION6,XFEE6,FEE6,XHRCHG6,HRCHG6,XTUIT7,TUITION7,XFEE7,FEE7,XHRCHG7,HRCHG7,XISPRO1,ISPROF1,XISPFE1,ISPFEE1,XOSPRO1,OSPROF1,XOSPFE1,OSPFEE1,XISPRO2,ISPROF2,XISPFE2,ISPFEE2,XOSPRO2,OSPROF2,XOSPFE2,OSPFEE2,XISPRO3,ISPROF3,XISPFE3,ISPFEE3,XOSPRO3,OSPROF3,XOSPFE3,OSPFEE3,XISPRO4,ISPROF4,XISPFE4,ISPFEE4,XOSPRO4,OSPROF4,XOSPFE4,OSPFEE4,XISPRO5,ISPROF5,XISPFE5,ISPFEE5,XOSPRO5,OSPROF5,XOSPFE5,OSPFEE5,XISPRO6,ISPROF6,XISPFE6,ISPFEE6,XOSPRO6,OSPROF6,XOSPFE6,OSPFEE6,XISPRO7,ISPROF7,XISPFE7,ISPFEE7,XOSPRO7,OSPROF7,XOSPFE7,OSPFEE7,XISPRO8,ISPROF8,XISPFE8,ISPFEE8,XOSPRO8,OSPROF8,XOSPFE8,OSPFEE8,XISPRO9,ISPROF9,XISPFE9,ISPFEE9,XOSPRO9,OSPROF9,XOSPFE9,OSPFEE9,XCHG1AT0,CHG1AT0,XCHG1AF0,CHG1AF0,XCHG1AY0,CHG1AY0,XCHG1AT1,CHG1AT1,XCHG1AF1,CHG1AF1,XCHG1AY1,CHG1AY1,XCHG1AT2,CHG1AT2,XCHG1AF2,CHG1AF2,XCHG1AY2,CHG1AY2,XCHG1AT3,CHG1AT3,XCHG1AF3,CHG1AF3,XCHG1AY3,CHG1AY3,CHG1TGTD,CHG1FGTD,XCHG2AT0,CHG2AT0,XCHG2AF0,CHG2AF0,XCHG2AY0,CHG2AY0,XCHG2AT1,CHG2AT1,XCHG2AF1,CHG2AF1,XCHG2AY1,CHG2AY1,XCHG2AT2,CHG2AT2,XCHG2AF2,CHG2AF2,XCHG2AY2,CHG2AY2,XCHG2AT3,CHG2AT3,XCHG2AF3,CHG2AF3,XCHG2AY3,CHG2AY3,CHG2TGTD,CHG2FGTD,XCHG3AT0,CHG3AT0,XCHG3AF0,CHG3AF0,XCHG3AY0,CHG3AY0,XCHG3AT1,CHG3AT1,XCHG3AF1,CHG3AF1,XCHG3AY1,CHG3AY1,XCHG3AT2,CHG3AT2,XCHG3AF2,CHG3AF2,XCHG3AY2,CHG3AY2,XCHG3AT3,CHG3AT3,XCHG3AF3,CHG3AF3,XCHG3AY3,CHG3AY3,CHG3TGTD,CHG3FGTD,XCHG4AY0,CHG4AY0,XCHG4AY1,CHG4AY1,XCHG4AY2,CHG4AY2,XCHG4AY3,CHG4AY3,XCHG5AY0,CHG5AY0,XCHG5AY1,CHG5AY1,XCHG5AY2,CHG5AY2,XCHG5AY3,CHG5AY3,XCHG6AY0,CHG6AY0,XCHG6AY1,CHG6AY1,XCHG6AY2,CHG6AY2,XCHG6AY3,CHG6AY3,XCHG7AY0,CHG7AY0,XCHG7AY1,CHG7AY1,XCHG7AY2,CHG7AY2,XCHG7AY3,CHG7AY3,XCHG8AY0,CHG8AY0,XCHG8AY1,CHG8AY1,XCHG8AY2,CHG8AY2,XCHG8AY3,CHG8AY3,XCHG9AY0,CHG9AY0,XCHG9AY1,CHG9AY1,XCHG9AY2,CHG9AY2,XCHG9AY3,CHG9AY3
0,100654,R,8610,R,1414,R,287,R,8610,R,1414,R,287,R,17220,R,1414,R,574,R,10128,R,1414,R,422,R,10128,R,1414,R,422,R,20160,R,1414,R,840,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,R,8610,R,1414,R,10024,R,8610,R,1414,R,10024,R,8610,R,1414,R,10024,R,8610,R,1414,R,10024,.,.,R,8610,R,1414,R,10024,R,8610,R,1414,R,10024,R,8610,R,1414,R,10024,R,8610,R,1414,R,10024,.,.,R,17220,R,1414,R,18634,R,17220,R,1414,R,18634,R,17220,R,1414,R,18634,R,17220,R,1414,R,18634,.,.,R,1600,R,1600,R,1600,R,2192,R,9240,R,9520,R,9520,R,11402,R,3090,R,3090,R,3090,R,3864,R,9240,R,9520,R,9520,R,11402,R,3090,R,3090,R,3090,R,3864,R,3440,R,3440,R,3440,R,4271
1,100663,R,8832,R,0,R,368,R,8832,R,0,R,368,R,21864,R,0,R,911,R,8424,R,0,R,468,R,8424,R,0,R,468,R,20556,R,0,R,1142,Y,.,Y,.,Y,.,Y,.,R,31902,R,0,R,74362,R,0,R,32134,R,0,R,62714,R,0,R,29841,R,0,R,54012,R,0,Y,.,Y,.,Y,.,Y,.,Y,.,Y,.,Y,.,Y,.,Y,.,Y,.,Y,.,Y,.,Y,.,Y,.,Y,.,Y,.,Y,.,Y,.,Y,.,Y,.,R,8568,R,0,R,8568,R,8568,R,0,R,8568,R,8832,R,0,R,8832,R,8832,R,0,R,8832,.,.,R,8568,R,0,R,8568,R,8568,R,0,R,8568,R,8832,R,0,R,8832,R,8832,R,0,R,8832,.,.,R,20400,R,0,R,20400,R,20400,R,0,R,20400,R,21216,R,0,R,21216,R,21864,R,0,R,21864,.,.,R,1200,R,1200,R,1200,R,1200,R,12307,R,12307,R,13120,R,13590,R,5555,R,5555,R,5757,R,5757,R,12307,R,12307,R,14330,R,14580,R,5555,R,5555,R,6007,R,6007,R,5555,R,5555,R,6007,R,6007
2,100690,R,9480,R,990,R,455,R,9480,R,990,R,455,R,9480,R,990,R,455,R,11700,R,1390,R,750,R,11700,R,1390,R,750,R,11700,R,1390,R,750,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,.,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,.,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,.,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.
3,100706,R,10120,R,1758,R,421,R,10120,R,1758,R,421,R,23012,R,1758,R,958,R,10632,R,1054,R,708,R,10632,R,1054,R,708,R,24430,R,1054,R,1628,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,R,9730,R,1608,R,11338,R,9730,R,1758,R,11488,R,10120,R,1758,R,11878,R,10120,R,1650,R,11770,.,.,R,9730,R,1608,R,11338,R,9730,R,1758,R,11488,R,10120,R,1758,R,11878,R,10120,R,1650,R,11770,.,.,R,22126,R,1608,R,23734,R,22126,R,1758,R,23884,R,23012,R,1758,R,24770,R,23012,R,1650,R,24662,.,.,R,2200,R,2300,R,2416,R,2416,R,10652,R,10764,R,11122,R,11122,R,4076,R,4156,R,4462,R,4462,R,10652,R,10764,R,11122,R,11122,R,4076,R,4156,R,4462,R,4462,R,4076,R,4156,R,4462,R,4462
4,100724,R,8328,R,2920,R,347,R,8328,R,2920,R,347,R,16656,R,2920,R,694,R,7416,R,2920,R,412,R,7416,R,2920,R,412,R,14832,R,2920,R,824,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,R,8328,R,2740,R,11068,R,8328,R,2740,R,11068,R,8328,R,2740,R,11068,R,8328,R,2920,R,11248,.,.,R,8328,R,2740,R,11068,R,8328,R,2740,R,11068,R,8328,R,2740,R,11068,R,8328,R,2920,R,11248,.,.,R,16656,R,2740,R,19396,R,16656,R,2740,R,19396,R,16656,R,2740,R,19396,R,16656,R,2920,R,19576,.,.,R,1600,R,1600,R,1600,R,1600,R,6050,R,6050,R,6050,R,7690,R,3392,R,3392,R,3392,R,3392,R,7320,R,7320,R,7320,R,7320,R,4228,R,5130,R,5130,R,5130,R,4228,R,5130,R,5130,R,5130
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3820,499404,R,22480,R,1280,R,843,R,22480,R,1280,R,843,R,22480,R,1280,R,843,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,R,22480,R,1280,R,23760,.,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,R,22480,R,1280,R,23760,.,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,R,22480,R,1280,R,23760,.,.,A,.,A,.,A,.,R,0,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,R,11844,A,.,A,.,A,.,R,10503,A,.,A,.,A,.,R,7037
3821,499422,R,22613,R,1294,A,.,R,22613,R,1294,A,.,R,22613,R,1294,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,.,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,.,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,.,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.
3822,499538,R,7395,R,0,A,.,R,7395,Z,0,A,.,R,7395,Z,0,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,R,6650,R,0,R,6650,R,6995,R,0,R,6995,R,7250,R,0,R,7250,R,7396,R,0,R,7396,0,0,R,6650,R,0,R,6650,R,6995,R,0,R,6995,R,7250,R,0,R,7250,R,7396,R,0,R,7396,0,0,R,6650,R,0,R,6650,R,6995,R,0,R,6995,R,7250,R,0,R,7250,R,7396,R,0,R,7396,0,0,R,500,R,500,R,500,R,500,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,R,3500,R,3500,R,3500,R,3500,R,2556,R,2556,R,2556,R,2556,R,2304,R,2304,R,2304,R,2304
3823,499635,R,5550,R,1500,R,185,R,5550,R,1500,R,185,R,12025,R,2100,R,325,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,R,5550,R,1500,R,7050,.,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,R,5550,R,1500,R,7050,.,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,R,12025,R,2100,R,14125,.,.,A,.,A,.,A,.,R,1500,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,A,.,R,10320,A,.,A,.,A,.,R,2380,A,.,A,.,A,.,R,2140


In [None]:
from IPython.core.magic import (register_line_magic, register_cell_magic, register_line_cell_magic, needs_local_scope)
from IPython.core.magic_arguments import (argument, magic_arguments, parse_argstring)
import duckdb

@magic_arguments()
@argument('query', nargs='*', help="The SQL query to execute")
@argument('--db', '-d', default=None, help="Which connection to use")
@argument('--output', '-o', default=None, help="Name of the variable to store output DataFrame in local scope")
@register_line_cell_magic
@needs_local_scope
def quack(line, cell=None, local_ns=None):
    "Magic that works both as %lcmagic and as %%lcmagic"

    args = parse_argstring(quack, line)
    query = cell if cell else ' '.join(args.query)

    if args.db:
        conn = local_ns[args.db]
    elif 'conn' in local_ns:
        conn = local_ns['conn']
    else:
        conn = duckdb.connect(':memory:')
        local_ns['conn'] = conn

    result_df = conn.execute(query).fetchdf()

    if args.output:
        local_ns[args.output] = result_df
    return result_df

# Usage example:
# %%quack -o df SELECT * FROM my_table

In [17]:
%%quack --output peds_df
SELECT
    schools.unitid as college_id,
    trim(INSTNM) AS college_name,
    trim(IALIAS) AS short_name,
    city AS city,
    stabbr AS state,
    LATITUDE::float as latitude,
    LONGITUD::float as longitude,
    (try_cast(TUITION3 AS DECIMAL(10,2)) + try_cast(FEE3 as decimal(10,2)))::bigint AS total_cost,
    domain
FROM schools
JOIN charges ON schools.UNITID = charges.UNITID
JOIN services ON schools.UNITID = services.UNITID

Unnamed: 0,college_id,college_name,short_name,city,state,latitude,longitude,total_cost,domain
0,100654,Alabama A & M University,AAMU,Normal,AL,34.783367,-86.568504,18634.0,
1,100663,University of Alabama at Birmingham,UAB,Birmingham,AL,33.505695,-86.799347,21864.0,www.uab.edu
2,100706,University of Alabama in Huntsville,UAH University of Alabama Huntsville,Huntsville,AL,34.724556,-86.640450,24770.0,
3,100751,The University of Alabama,,Tuscaloosa,AL,33.211876,-87.545975,33200.0,
4,100830,Auburn University at Montgomery,AUM||Auburn University at Montgomery|Auburn Mo...,Montgomery,AL,32.367359,-86.177544,20092.0,
...,...,...,...,...,...,...,...,...,...
3820,452948,Galen College of Nursing-Cincinnati,,Cincinnati,OH,39.283691,-84.365944,16400.0,
3821,488846,University of the People,,Pasadena,CA,34.146168,-118.138046,1200.0,
3822,499538,Puerto Rico Advance Institute,,Bayamon,PR,18.399160,-66.154488,7395.0,
3823,499635,EDP University of Puerto Rico-Caguas,,Caguas,PR,18.242649,-66.025307,14125.0,www.edpuniversity.edu


In [18]:
import plotly.express as px

# Calculate the average total cost per state
average_cost_per_state = peds_df.groupby('state')['total_cost'].mean().reset_index()

# Create the heat map
fig = px.choropleth(
    average_cost_per_state,
    locations='state',
    locationmode='USA-states',
    color='total_cost',
    color_continuous_scale='Viridis',
    scope='usa',
    labels={'total_cost': 'Mean Avg. Total Cost'},
    title='Average Out of State Cost'
)

fig.show()

In [19]:
import feed
import json
from importlib import reload; reload(feed)

data_file = os.path.join('data', 'utr-mens.json')
if not os.path.exists(data_file):
    print('Fetching data...')
    colleges = await feed.utr_colleges(top=10000)
    with open(data_file, 'w') as f:
        json.dump(colleges, f, indent=2)
print(f'Loading data from {data_file}...')
json.loads(open(data_file).read())
print(f"Number of colleges: {len(colleges['hits'])}")

Loading data from data/utr-mens.json...


NameError: name 'colleges' is not defined

In [None]:
import pandas as pd
import json

# Load the JSON data from the file
with open(data_file, 'r') as f:
    data = json.load(f)

# Extract the 'hits' property which contains the rows
hits = data['hits']

# Normalize the JSON data to flatten the attributes into new columns
utr = pd.json_normalize(hits)

utr['domain'] = utr['source.url'].apply(parse_domain)
# Display the dataframe
utr.head()

Unnamed: 0,fields,index,score,type,version,id,nested,parent,routing,sorts,explanation,matchedQueries,source.clubMemberRole,source.id,source.name,source.description,source.locationId,source.tierTypeId,source.clubSubTypeId,source.paymentsEnabled,source.sanctioned,source.url,source.memberCount,source.eventCount,source.private,source.isTrialing,source.trialEndDateUtc,source.isSearchable,source.canRunEvents,source.isHighSchool,source.isCollege,source.gender,source.bannerUrl,source.profilePhotoUrl,source.location.locationId,source.location.latLng,source.location.display,source.location.googleFormattedName,source.location.googlePlaceId,source.location.placeName,source.location.streetAddress,source.location.cityStateZip,source.location.cityName,source.location.cityAbbr,source.location.stateName,source.location.stateAbbr,source.location.countryName,source.location.countryCode2,source.location.countryCode3,source.location.countryIoc,source.statusType,source.school.displayName,source.school.altName1,source.school.altName2,source.school.altName3,source.school.altName4,source.school.nickname,source.school.altNickname1,source.school.altNickname2,source.school.ladyNickname,source.school.private,source.school.type,source.school.power6Avg,source.school.conference.id,source.school.conference.conferenceName,source.school.conference.shortName,source.school.conference.divisionId,source.school.conference.division.id,source.school.conference.division.divisionName,source.school.conference.division.shortName,source.school.rosterHasUnclaimedPlayers,source.school.name,source.school.clubId,source.school.shortName,source.school.profilePhotoUrl,source.school.rosterCount,source.school.rosterYear,source.school.power6,source.school.power6High,source.school.power6Low,source.school.power6Men,source.school.power6MenHigh,source.school.power6MenLow,source.school.power6Women,source.school.power6WomenHigh,source.school.power6WomenLow,source.school.isCollege,source.school.tierType,source.school.clubSubTypeId,source.school.roster,source.school.clubProfileImages.default,source.school.clubProfileImages.thumbnail.oneX,source.school.clubProfileImages.thumbnail.twoX,source.school.clubProfileImages.thumbnail.threeX,source.school.clubProfileImages.card.oneX,source.school.clubProfileImages.card.twoX,source.school.clubProfileImages.card.threeX,source.school.clubProfileImages.profile.oneX,source.school.clubProfileImages.profile.twoX,source.school.clubProfileImages.profile.threeX,source.school.clubProfileImages.icon.oneX,source.school.clubProfileImages.icon.twoX,source.school.clubProfileImages.icon.threeX,source.school.currentMember,source.clubMembersSimplified,source.clubImages.default,source.clubImages.banner.oneX,source.clubImages.banner.twoX,source.clubImages.banner.threeX,source.clubImages.dashboard.oneX,source.clubImages.dashboard.twoX,source.clubImages.dashboard.threeX,source.clubProfileImages.default,source.clubProfileImages.thumbnail.oneX,source.clubProfileImages.thumbnail.twoX,source.clubProfileImages.thumbnail.threeX,source.clubProfileImages.card.oneX,source.clubProfileImages.card.twoX,source.clubProfileImages.card.threeX,source.clubProfileImages.profile.oneX,source.clubProfileImages.profile.twoX,source.clubProfileImages.profile.threeX,source.clubProfileImages.icon.oneX,source.clubProfileImages.icon.twoX,source.clubProfileImages.icon.threeX,source.clubTags,source.school.conference,domain
0,,clubs_v2710,,default,,929,,,,[82.06],,[],,929,University of Texas (Austin) - M,,130,6,1,False,True,http://www.texassports.com/index.aspx?path=mten,359,6,False,False,,False,True,False,True,Male,,929/images/profile/db64fe26-b41e-4793-97e0-5e9...,,"[30.267153, -97.7430608]","Austin, TX","Austin, TX, USA",,,,"Austin, TX",Austin,Austin,Texas,TX,United States,US,USA,USA,,University of Texas (Austin),,,,,,,,,False,College,,26.0,Southeastern Conference,SEC,,1.0,NCAA - Division I,D1,True,University of Texas (Austin),0,Texas,,0,,,,,,,,,,,False,,,,,,,,,,,,,,,,,,[],,,,,,,,929/images/profile/db64fe26-b41e-4793-97e0-5e9...,929/images/profile/db64fe26-b41e-4793-97e0-5e9...,929/images/profile/db64fe26-b41e-4793-97e0-5e9...,929/images/profile/db64fe26-b41e-4793-97e0-5e9...,929/images/profile/db64fe26-b41e-4793-97e0-5e9...,929/images/profile/db64fe26-b41e-4793-97e0-5e9...,929/images/profile/db64fe26-b41e-4793-97e0-5e9...,929/images/profile/db64fe26-b41e-4793-97e0-5e9...,929/images/profile/db64fe26-b41e-4793-97e0-5e9...,929/images/profile/db64fe26-b41e-4793-97e0-5e9...,929/images/profile/db64fe26-b41e-4793-97e0-5e9...,929/images/profile/db64fe26-b41e-4793-97e0-5e9...,929/images/profile/db64fe26-b41e-4793-97e0-5e9...,,,www.texassports.com
1,,clubs_v2710,,default,,1029,,,,[81.2],,[],,1029,Texas Christian University - M,,1478,6,1,False,True,http://www.gofrogs.com,292,4,False,False,,False,True,False,True,Male,,1029/images/profile/03e39e36-3b67-4d27-bfb8-35...,,"[32.7554883, -97.3307658]","Fort Worth, TX","Fort Worth, TX, USA",,,,"Fort Worth, TX",Fort Worth,Fort Worth,Texas,TX,United States,US,USA,USA,,Texas Christian University,,,,,,,,,True,College,,10.0,Big 12 Conference,Big 12,,1.0,NCAA - Division I,D1,True,Texas Christian University,0,TCU,,0,,,,,,,,,,,False,,,,,,,,,,,,,,,,,,[],,,,,,,,1029/images/profile/03e39e36-3b67-4d27-bfb8-35...,1029/images/profile/03e39e36-3b67-4d27-bfb8-35...,1029/images/profile/03e39e36-3b67-4d27-bfb8-35...,1029/images/profile/03e39e36-3b67-4d27-bfb8-35...,1029/images/profile/03e39e36-3b67-4d27-bfb8-35...,1029/images/profile/03e39e36-3b67-4d27-bfb8-35...,1029/images/profile/03e39e36-3b67-4d27-bfb8-35...,1029/images/profile/03e39e36-3b67-4d27-bfb8-35...,1029/images/profile/03e39e36-3b67-4d27-bfb8-35...,1029/images/profile/03e39e36-3b67-4d27-bfb8-35...,1029/images/profile/03e39e36-3b67-4d27-bfb8-35...,1029/images/profile/03e39e36-3b67-4d27-bfb8-35...,1029/images/profile/03e39e36-3b67-4d27-bfb8-35...,,,www.gofrogs.com
2,,clubs_v2710,,default,,919,,,,[80.36],,[],,919,Ohio State University - M,,759,4,1,False,True,http://www.ohiostatebuckeyes.com/sports/m-tennis,231,2,False,False,,False,False,False,True,Male,,919/images/profile/dbe366ab-6a68-4b5d-bf67-b15...,,"[39.9611755, -82.99879419999999]","Columbus, OH","Columbus, OH, USA",,,,"Columbus, OH",Columbus,Columbus,Ohio,OH,United States,US,USA,USA,,Ohio State University,OSU,The Ohio State University,,,Buckeyes,,,,False,College,,9.0,Big Ten Conference,Big Ten,,1.0,NCAA - Division I,D1,True,Ohio State University,0,Ohio State,,0,,,,,,,,,,,False,,,,,,,,,,,,,,,,,,[],,,,,,,,919/images/profile/dbe366ab-6a68-4b5d-bf67-b15...,919/images/profile/dbe366ab-6a68-4b5d-bf67-b15...,919/images/profile/dbe366ab-6a68-4b5d-bf67-b15...,919/images/profile/dbe366ab-6a68-4b5d-bf67-b15...,919/images/profile/dbe366ab-6a68-4b5d-bf67-b15...,919/images/profile/dbe366ab-6a68-4b5d-bf67-b15...,919/images/profile/dbe366ab-6a68-4b5d-bf67-b15...,919/images/profile/dbe366ab-6a68-4b5d-bf67-b15...,919/images/profile/dbe366ab-6a68-4b5d-bf67-b15...,919/images/profile/dbe366ab-6a68-4b5d-bf67-b15...,919/images/profile/dbe366ab-6a68-4b5d-bf67-b15...,919/images/profile/dbe366ab-6a68-4b5d-bf67-b15...,919/images/profile/dbe366ab-6a68-4b5d-bf67-b15...,,,www.ohiostatebuckeyes.com
3,,clubs_v2710,,default,,995,,,,[79.94],,[],,995,Wake Forest University - M,,1293,6,1,False,True,http://www.wakeforestsports.com/sports/m-tenni...,279,11,False,False,,False,True,False,True,Male,995/images/banner/73cfd429-6c29-4c46-9315-9c3a...,995/images/profile/fc40ad3e-7f8c-466f-9c1d-bcb...,,"[36.09985959999999, -80.244216]","Winston-Salem, NC","Winston-Salem, NC, USA",,,,"Winston-Salem, NC",Winston-Salem,Winston-Salem,North Carolina,NC,United States,US,USA,USA,,Wake Forest University,WF,Wake,,,Demon Deacons,Deacs,,,True,College,,3.0,Atlantic Coast Conference,ACC,,1.0,NCAA - Division I,D1,True,Wake Forest University,0,Wake Forest,,0,,,,,,,,,,,False,,,,,,,,,,,,,,,,,,[],995/images/banner/73cfd429-6c29-4c46-9315-9c3a...,995/images/banner/73cfd429-6c29-4c46-9315-9c3a...,995/images/banner/73cfd429-6c29-4c46-9315-9c3a...,995/images/banner/73cfd429-6c29-4c46-9315-9c3a...,995/images/banner/73cfd429-6c29-4c46-9315-9c3a...,995/images/banner/73cfd429-6c29-4c46-9315-9c3a...,995/images/banner/73cfd429-6c29-4c46-9315-9c3a...,995/images/profile/fc40ad3e-7f8c-466f-9c1d-bcb...,995/images/profile/fc40ad3e-7f8c-466f-9c1d-bcb...,995/images/profile/fc40ad3e-7f8c-466f-9c1d-bcb...,995/images/profile/fc40ad3e-7f8c-466f-9c1d-bcb...,995/images/profile/fc40ad3e-7f8c-466f-9c1d-bcb...,995/images/profile/fc40ad3e-7f8c-466f-9c1d-bcb...,995/images/profile/fc40ad3e-7f8c-466f-9c1d-bcb...,995/images/profile/fc40ad3e-7f8c-466f-9c1d-bcb...,995/images/profile/fc40ad3e-7f8c-466f-9c1d-bcb...,995/images/profile/fc40ad3e-7f8c-466f-9c1d-bcb...,995/images/profile/fc40ad3e-7f8c-466f-9c1d-bcb...,995/images/profile/fc40ad3e-7f8c-466f-9c1d-bcb...,995/images/profile/fc40ad3e-7f8c-466f-9c1d-bcb...,,,www.wakeforestsports.com
4,,clubs_v2710,,default,,935,,,,[79.89],,[],,935,Stanford University - M,,6639,6,1,False,True,http://www.gostanford.com/index.aspx?path=mten,1054,5,False,False,,False,True,False,True,Male,935/images/banner/703d3f94-f7e1-4de2-b5d8-ee06...,935/images/profile/f436c4f7-b383-4c66-a417-1a1...,,"[37.42410599999999, -122.1660756]","Stanford, CA","Stanford, CA, USA",,,,"Stanford, CA",Stanford,Stanford,California,CA,United States,US,USA,USA,,Stanford University,,,,,,,,,True,College,,3.0,Atlantic Coast Conference,ACC,,1.0,NCAA - Division I,D1,True,Stanford University,0,Stanford,,0,,,,,,,,,,,False,,,,,,,,,,,,,,,,,,[],935/images/banner/703d3f94-f7e1-4de2-b5d8-ee06...,935/images/banner/703d3f94-f7e1-4de2-b5d8-ee06...,935/images/banner/703d3f94-f7e1-4de2-b5d8-ee06...,935/images/banner/703d3f94-f7e1-4de2-b5d8-ee06...,935/images/banner/703d3f94-f7e1-4de2-b5d8-ee06...,935/images/banner/703d3f94-f7e1-4de2-b5d8-ee06...,935/images/banner/703d3f94-f7e1-4de2-b5d8-ee06...,935/images/profile/f436c4f7-b383-4c66-a417-1a1...,935/images/profile/f436c4f7-b383-4c66-a417-1a1...,935/images/profile/f436c4f7-b383-4c66-a417-1a1...,935/images/profile/f436c4f7-b383-4c66-a417-1a1...,935/images/profile/f436c4f7-b383-4c66-a417-1a1...,935/images/profile/f436c4f7-b383-4c66-a417-1a1...,935/images/profile/f436c4f7-b383-4c66-a417-1a1...,935/images/profile/f436c4f7-b383-4c66-a417-1a1...,935/images/profile/f436c4f7-b383-4c66-a417-1a1...,935/images/profile/f436c4f7-b383-4c66-a417-1a1...,935/images/profile/f436c4f7-b383-4c66-a417-1a1...,935/images/profile/f436c4f7-b383-4c66-a417-1a1...,935/images/profile/f436c4f7-b383-4c66-a417-1a1...,,,www.gostanford.com


In [None]:
%%quack --output matches

SELECT
    (
        SELECT
            array_agg(peds_df.college_id)
        FROM peds_df
        WHERE regexp_replace(peds_df.college_name, '\W', '', 'g') IN (
            regexp_replace("source.school.name", '\W', '', 'g'),
            regexp_replace("source.school.displayName", '\W', '', 'g'),
            regexp_replace("source.school.altName1", '\W', '', 'g'),
            regexp_replace("source.school.altName2", '\W', '', 'g')
        )
    ) AS peds_college_ids,
    *
FROM utr

Unnamed: 0,peds_college_ids,fields,index,score,type,version,id,nested,parent,routing,sorts,explanation,matchedQueries,source.clubMemberRole,source.id,source.name,source.description,source.locationId,source.tierTypeId,source.clubSubTypeId,source.paymentsEnabled,source.sanctioned,source.url,source.memberCount,source.eventCount,source.private,source.isTrialing,source.trialEndDateUtc,source.isSearchable,source.canRunEvents,source.isHighSchool,source.isCollege,source.gender,source.bannerUrl,source.profilePhotoUrl,source.location.locationId,source.location.latLng,source.location.display,source.location.googleFormattedName,source.location.googlePlaceId,source.location.placeName,source.location.streetAddress,source.location.cityStateZip,source.location.cityName,source.location.cityAbbr,source.location.stateName,source.location.stateAbbr,source.location.countryName,source.location.countryCode2,source.location.countryCode3,source.location.countryIoc,source.statusType,source.school.displayName,source.school.altName1,source.school.altName2,source.school.altName3,source.school.altName4,source.school.nickname,source.school.altNickname1,source.school.altNickname2,source.school.ladyNickname,source.school.private,source.school.type,source.school.power6Avg,source.school.conference.id,source.school.conference.conferenceName,source.school.conference.shortName,source.school.conference.divisionId,source.school.conference.division.id,source.school.conference.division.divisionName,source.school.conference.division.shortName,source.school.rosterHasUnclaimedPlayers,source.school.name,source.school.clubId,source.school.shortName,source.school.profilePhotoUrl,source.school.rosterCount,source.school.rosterYear,source.school.power6,source.school.power6High,source.school.power6Low,source.school.power6Men,source.school.power6MenHigh,source.school.power6MenLow,source.school.power6Women,source.school.power6WomenHigh,source.school.power6WomenLow,source.school.isCollege,source.school.tierType,source.school.clubSubTypeId,source.school.roster,source.school.clubProfileImages.default,source.school.clubProfileImages.thumbnail.oneX,source.school.clubProfileImages.thumbnail.twoX,source.school.clubProfileImages.thumbnail.threeX,source.school.clubProfileImages.card.oneX,source.school.clubProfileImages.card.twoX,source.school.clubProfileImages.card.threeX,source.school.clubProfileImages.profile.oneX,source.school.clubProfileImages.profile.twoX,source.school.clubProfileImages.profile.threeX,source.school.clubProfileImages.icon.oneX,source.school.clubProfileImages.icon.twoX,source.school.clubProfileImages.icon.threeX,source.school.currentMember,source.clubMembersSimplified,source.clubImages.default,source.clubImages.banner.oneX,source.clubImages.banner.twoX,source.clubImages.banner.threeX,source.clubImages.dashboard.oneX,source.clubImages.dashboard.twoX,source.clubImages.dashboard.threeX,source.clubProfileImages.default,source.clubProfileImages.thumbnail.oneX,source.clubProfileImages.thumbnail.twoX,source.clubProfileImages.thumbnail.threeX,source.clubProfileImages.card.oneX,source.clubProfileImages.card.twoX,source.clubProfileImages.card.threeX,source.clubProfileImages.profile.oneX,source.clubProfileImages.profile.twoX,source.clubProfileImages.profile.threeX,source.clubProfileImages.icon.oneX,source.clubProfileImages.icon.twoX,source.clubProfileImages.icon.threeX,source.clubTags,source.school.conference,domain
0,[176080],,clubs_v2710,,default,,1268,,,,[79.7],,[],,1268,Mississippi State University - M,,3361,6,1,False,True,,56,25,False,False,,False,True,False,True,Male,1268/images/banner/47add3d8-d8dc-4035-beb2-a7f...,1268/images/profile/e382d392-6515-4fdd-ad25-d7...,,"[33.4503998, -88.81838719999999]","Starkville, MS","Starkville, MS 39759, USA",,,,"Starkville, MS 39759",Starkville,Starkville,Mississippi,MS,United States,US,USA,USA,,Mississippi State University,Miss St,MSU,,,Bulldogs,,,,False,College,,26.0,Southeastern Conference,SEC,,1.0,NCAA - Division I,D1,True,Mississippi State University,0,Mississippi State,,0,,,,,,,,,,,False,,,,,,,,,,,,,,,,,,[],1268/images/banner/47add3d8-d8dc-4035-beb2-a7f...,1268/images/banner/47add3d8-d8dc-4035-beb2-a7f...,1268/images/banner/47add3d8-d8dc-4035-beb2-a7f...,1268/images/banner/47add3d8-d8dc-4035-beb2-a7f...,1268/images/banner/47add3d8-d8dc-4035-beb2-a7f...,1268/images/banner/47add3d8-d8dc-4035-beb2-a7f...,1268/images/banner/47add3d8-d8dc-4035-beb2-a7f...,1268/images/profile/e382d392-6515-4fdd-ad25-d7...,1268/images/profile/e382d392-6515-4fdd-ad25-d7...,1268/images/profile/e382d392-6515-4fdd-ad25-d7...,1268/images/profile/e382d392-6515-4fdd-ad25-d7...,1268/images/profile/e382d392-6515-4fdd-ad25-d7...,1268/images/profile/e382d392-6515-4fdd-ad25-d7...,1268/images/profile/e382d392-6515-4fdd-ad25-d7...,1268/images/profile/e382d392-6515-4fdd-ad25-d7...,1268/images/profile/e382d392-6515-4fdd-ad25-d7...,1268/images/profile/e382d392-6515-4fdd-ad25-d7...,1268/images/profile/e382d392-6515-4fdd-ad25-d7...,1268/images/profile/e382d392-6515-4fdd-ad25-d7...,1268/images/profile/e382d392-6515-4fdd-ad25-d7...,,,
1,[104179],,clubs_v2710,,default,,1025,,,,[78.77],,[],,1025,University of Arizona - M,,253,6,1,False,True,http://www.arizonawildcats.com/index.aspx?path...,110,22,False,False,,False,True,False,True,Male,1025/images/banner/056601d4-99a1-40cd-b43b-93c...,1025/images/profile/a7bd3b10-e517-4755-9ffe-a1...,,"[32.2539787, -110.9741769]","Tucson, AZ","Tucson, AZ, USA",,,,"Tucson, AZ",Tucson,Tucson,Arizona,AZ,United States,US,USA,USA,,University of Arizona,,,,,,,,,False,College,,10.0,Big 12 Conference,Big 12,,1.0,NCAA - Division I,D1,True,University of Arizona,0,Arizona,,0,,,,,,,,,,,False,,,,,,,,,,,,,,,,,,[],1025/images/banner/056601d4-99a1-40cd-b43b-93c...,1025/images/banner/056601d4-99a1-40cd-b43b-93c...,1025/images/banner/056601d4-99a1-40cd-b43b-93c...,1025/images/banner/056601d4-99a1-40cd-b43b-93c...,1025/images/banner/056601d4-99a1-40cd-b43b-93c...,1025/images/banner/056601d4-99a1-40cd-b43b-93c...,1025/images/banner/056601d4-99a1-40cd-b43b-93c...,1025/images/profile/a7bd3b10-e517-4755-9ffe-a1...,1025/images/profile/a7bd3b10-e517-4755-9ffe-a1...,1025/images/profile/a7bd3b10-e517-4755-9ffe-a1...,1025/images/profile/a7bd3b10-e517-4755-9ffe-a1...,1025/images/profile/a7bd3b10-e517-4755-9ffe-a1...,1025/images/profile/a7bd3b10-e517-4755-9ffe-a1...,1025/images/profile/a7bd3b10-e517-4755-9ffe-a1...,1025/images/profile/a7bd3b10-e517-4755-9ffe-a1...,1025/images/profile/a7bd3b10-e517-4755-9ffe-a1...,1025/images/profile/a7bd3b10-e517-4755-9ffe-a1...,1025/images/profile/a7bd3b10-e517-4755-9ffe-a1...,1025/images/profile/a7bd3b10-e517-4755-9ffe-a1...,1025/images/profile/a7bd3b10-e517-4755-9ffe-a1...,,,www.arizonawildcats.com
2,[166027],,clubs_v2710,,default,,870,,,,[78.06],,[],,870,Harvard University - M,Join us for our Summer and Fall UTR Sports Cam...,117,6,1,False,True,http://www.gocrimson.com/sports/mten/index,541,13,False,False,,False,True,False,True,Male,870/images/banner/c95a6cb2-7c8d-47a1-8b86-d0b7...,870/images/profile/d8a767ca-9eea-4ef8-897f-0dd...,,"[42.3736158, -71.10973349999999]","Cambridge, MA","Cambridge, MA, USA",,,,"Cambridge, MA",Cambridge,Cambridge,Massachusetts,MA,United States,US,USA,USA,,Harvard University,,,,,Crimson,,,,True,College,,16.0,Ivy League,Ivy,,1.0,NCAA - Division I,D1,True,Harvard University,0,Harvard,,0,,,,,,,,,,,False,,,,,,,,,,,,,,,,,,[],870/images/banner/c95a6cb2-7c8d-47a1-8b86-d0b7...,870/images/banner/c95a6cb2-7c8d-47a1-8b86-d0b7...,870/images/banner/c95a6cb2-7c8d-47a1-8b86-d0b7...,870/images/banner/c95a6cb2-7c8d-47a1-8b86-d0b7...,870/images/banner/c95a6cb2-7c8d-47a1-8b86-d0b7...,870/images/banner/c95a6cb2-7c8d-47a1-8b86-d0b7...,870/images/banner/c95a6cb2-7c8d-47a1-8b86-d0b7...,870/images/profile/d8a767ca-9eea-4ef8-897f-0dd...,870/images/profile/d8a767ca-9eea-4ef8-897f-0dd...,870/images/profile/d8a767ca-9eea-4ef8-897f-0dd...,870/images/profile/d8a767ca-9eea-4ef8-897f-0dd...,870/images/profile/d8a767ca-9eea-4ef8-897f-0dd...,870/images/profile/d8a767ca-9eea-4ef8-897f-0dd...,870/images/profile/d8a767ca-9eea-4ef8-897f-0dd...,870/images/profile/d8a767ca-9eea-4ef8-897f-0dd...,870/images/profile/d8a767ca-9eea-4ef8-897f-0dd...,870/images/profile/d8a767ca-9eea-4ef8-897f-0dd...,870/images/profile/d8a767ca-9eea-4ef8-897f-0dd...,870/images/profile/d8a767ca-9eea-4ef8-897f-0dd...,870/images/profile/d8a767ca-9eea-4ef8-897f-0dd...,,,www.gocrimson.com
3,[190415],,clubs_v2710,,default,,1087,,,,[77.95],,[],,1087,Cornell University - M,,2640,6,1,False,True,http://cornellbigred.com/index.aspx?path=mten&,212,10,False,False,,False,True,False,True,Male,1087/images/banner/7fff4fb4-2d13-465d-bcf6-535...,1087/images/profile/a8e09b99-b50b-4b44-a6af-ed...,,"[42.4439614, -76.5018807]","Ithaca, NY","Ithaca, NY, USA",,,,"Ithaca, NY",Ithaca,Ithaca,New York,NY,United States,US,USA,USA,,Cornell University,,,,,Big Red,,,,True,College,,16.0,Ivy League,Ivy,,1.0,NCAA - Division I,D1,True,Cornell University,0,Cornell,,0,,,,,,,,,,,False,,,,,,,,,,,,,,,,,,[],1087/images/banner/7fff4fb4-2d13-465d-bcf6-535...,1087/images/banner/7fff4fb4-2d13-465d-bcf6-535...,1087/images/banner/7fff4fb4-2d13-465d-bcf6-535...,1087/images/banner/7fff4fb4-2d13-465d-bcf6-535...,1087/images/banner/7fff4fb4-2d13-465d-bcf6-535...,1087/images/banner/7fff4fb4-2d13-465d-bcf6-535...,1087/images/banner/7fff4fb4-2d13-465d-bcf6-535...,1087/images/profile/a8e09b99-b50b-4b44-a6af-ed...,1087/images/profile/a8e09b99-b50b-4b44-a6af-ed...,1087/images/profile/a8e09b99-b50b-4b44-a6af-ed...,1087/images/profile/a8e09b99-b50b-4b44-a6af-ed...,1087/images/profile/a8e09b99-b50b-4b44-a6af-ed...,1087/images/profile/a8e09b99-b50b-4b44-a6af-ed...,1087/images/profile/a8e09b99-b50b-4b44-a6af-ed...,1087/images/profile/a8e09b99-b50b-4b44-a6af-ed...,1087/images/profile/a8e09b99-b50b-4b44-a6af-ed...,1087/images/profile/a8e09b99-b50b-4b44-a6af-ed...,1087/images/profile/a8e09b99-b50b-4b44-a6af-ed...,1087/images/profile/a8e09b99-b50b-4b44-a6af-ed...,1087/images/profile/a8e09b99-b50b-4b44-a6af-ed...,,,cornellbigred.com
4,[157289],,clubs_v2710,,default,,931,,,,[74.85],,[],,931,University of Louisville - M,,10,6,1,False,True,http://gocards.com/index.aspx,154,16,False,False,,False,True,False,True,Male,,931/images/profile/b23b86d7-8dde-48ed-9796-8bb...,,"[38.2468618, -85.7663724]","Louisville, KY","Louisville, KY, USA",,,,"Louisville, KY",Louisville,Louisville,Kentucky,KY,United States,US,USA,USA,,University of Louisville,UofL,,,,Cardinals,Cards,,,False,College,,3.0,Atlantic Coast Conference,ACC,,1.0,NCAA - Division I,D1,True,University of Louisville,0,Louisville,,0,,,,,,,,,,,False,,,,,,,,,,,,,,,,,,[],,,,,,,,931/images/profile/b23b86d7-8dde-48ed-9796-8bb...,931/images/profile/b23b86d7-8dde-48ed-9796-8bb...,931/images/profile/b23b86d7-8dde-48ed-9796-8bb...,931/images/profile/b23b86d7-8dde-48ed-9796-8bb...,931/images/profile/b23b86d7-8dde-48ed-9796-8bb...,931/images/profile/b23b86d7-8dde-48ed-9796-8bb...,931/images/profile/b23b86d7-8dde-48ed-9796-8bb...,931/images/profile/b23b86d7-8dde-48ed-9796-8bb...,931/images/profile/b23b86d7-8dde-48ed-9796-8bb...,931/images/profile/b23b86d7-8dde-48ed-9796-8bb...,931/images/profile/b23b86d7-8dde-48ed-9796-8bb...,931/images/profile/b23b86d7-8dde-48ed-9796-8bb...,931/images/profile/b23b86d7-8dde-48ed-9796-8bb...,,,gocards.com
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
951,,,clubs_v2710,,default,,1806,,,,[0.0],,[],,1806,Georgia Southwestern State - M,,3048,4,1,False,True,,7,0,False,False,,False,False,False,True,Male,,,,"[32.0723861, -84.23268759999999]","Americus, GA","Americus, GA, USA",,,,"Americus, GA",Americus,Americus,Georgia,GA,United States,US,USA,USA,,Georgia Southwestern State,,,,,,,,,False,College,,48.0,Peach Belt Conference,PBC,,2.0,NCAA - Division II,D2,True,Georgia Southwestern State,0,,,0,,,,,,,,,,,False,,,,,,,,,,,,,,,,,,[],,,,,,,,,,,,,,,,,,,,,,,
952,,,clubs_v2710,,default,,611837,,,,[0.0],,[],,611837,Melbourne University - M,,124760,4,1,False,True,,15,0,False,False,,False,False,False,True,Male,,,,"[-37.7962849, 144.9598069]","Parkville, Australia","Parkville VIC 3010, Australia",,,,"Parkville, VIC 3010",Parkville,Parkville,Victoria,VIC,Australia,AU,AUS,AUS,,Melbourne University,,,,,,,,,,College,,156.0,Australian University,,,8.0,Australian University,AU,False,Melbourne University,0,Melbourne University,,0,,,,,,,,,,,False,,,,,,,,,,,,,,,,,,[],,,,,,,,,,,,,,,,,,,,,,,
953,,,clubs_v2710,,default,,2795,,,,[0.0],,[],,2795,St. Louis College of Pharmacy - M,,522,6,1,False,False,,5,0,False,False,,False,True,False,True,Male,2795/images/banner/30eb0ae0-ba79-4e23-b5ec-83a...,2795/images/profile/6afce14d-c47e-4a4b-b05e-d0...,,"[38.6270025, -90.19940419999999]","St. Louis, MO","St. Louis, MO, USA",,,,"St. Louis, MO",St. Louis,St. Louis,Missouri,MO,United States,US,USA,USA,,St. Louis College of Pharmacy,,,,,,,,,True,College,,101.0,American Midwest Conference,AMC,,4.0,NAIA,NAIA,True,St. Louis College of Pharmacy,0,,,0,,,,,,,,,,,False,,,,,,,,,,,,,,,,,,[],2795/images/banner/30eb0ae0-ba79-4e23-b5ec-83a...,2795/images/banner/30eb0ae0-ba79-4e23-b5ec-83a...,2795/images/banner/30eb0ae0-ba79-4e23-b5ec-83a...,2795/images/banner/30eb0ae0-ba79-4e23-b5ec-83a...,2795/images/banner/30eb0ae0-ba79-4e23-b5ec-83a...,2795/images/banner/30eb0ae0-ba79-4e23-b5ec-83a...,2795/images/banner/30eb0ae0-ba79-4e23-b5ec-83a...,2795/images/profile/6afce14d-c47e-4a4b-b05e-d0...,2795/images/profile/6afce14d-c47e-4a4b-b05e-d0...,2795/images/profile/6afce14d-c47e-4a4b-b05e-d0...,2795/images/profile/6afce14d-c47e-4a4b-b05e-d0...,2795/images/profile/6afce14d-c47e-4a4b-b05e-d0...,2795/images/profile/6afce14d-c47e-4a4b-b05e-d0...,2795/images/profile/6afce14d-c47e-4a4b-b05e-d0...,2795/images/profile/6afce14d-c47e-4a4b-b05e-d0...,2795/images/profile/6afce14d-c47e-4a4b-b05e-d0...,2795/images/profile/6afce14d-c47e-4a4b-b05e-d0...,2795/images/profile/6afce14d-c47e-4a4b-b05e-d0...,2795/images/profile/6afce14d-c47e-4a4b-b05e-d0...,2795/images/profile/6afce14d-c47e-4a4b-b05e-d0...,,,
954,,,clubs_v2710,,default,,2831,,,,[0.0],,[],,2831,Univ. Of Maryland Eastern Shore - M,,1434,6,1,False,True,,7,0,False,False,,False,True,False,True,Male,,,,"[39.0457549, -76.64127119999999]",MD,"Maryland, USA",,,,MD,,,Maryland,MD,United States,US,USA,USA,,University Of Maryland Eastern Shore,,,,,,,,,False,College,,19.0,Mid-Eastern Athletic Conference,MEAC,,1.0,NCAA - Division I,D1,True,University Of Maryland Eastern Shore,0,Maryland Eastern Shore,,0,,,,,,,,,,,False,,,,,,,,,,,,,,,,,,[],,,,,,,,,,,,,,,,,,,,,,,


In [None]:
%%quack

-- output matched

SELECT
    (
        SELECT
            array_agg(peds_df.college_id)
        FROM peds_df
        WHERE (utr.domain is not null AND peds_df.domain is not null AND utr.domain = peds_df.domain)
    ) AS peds_college_ids,
    * EXCLUDE (peds_college_ids)
FROM matches AS utr
WHERE peds_college_ids IS NULL

Unnamed: 0,peds_college_ids,fields,index,score,type,version,id,nested,parent,routing,sorts,explanation,matchedQueries,source.clubMemberRole,source.id,source.name,source.description,source.locationId,source.tierTypeId,source.clubSubTypeId,source.paymentsEnabled,source.sanctioned,source.url,source.memberCount,source.eventCount,source.private,source.isTrialing,source.trialEndDateUtc,source.isSearchable,source.canRunEvents,source.isHighSchool,source.isCollege,source.gender,source.bannerUrl,source.profilePhotoUrl,source.location.locationId,source.location.latLng,source.location.display,source.location.googleFormattedName,source.location.googlePlaceId,source.location.placeName,source.location.streetAddress,source.location.cityStateZip,source.location.cityName,source.location.cityAbbr,source.location.stateName,source.location.stateAbbr,source.location.countryName,source.location.countryCode2,source.location.countryCode3,source.location.countryIoc,source.statusType,source.school.displayName,source.school.altName1,source.school.altName2,source.school.altName3,source.school.altName4,source.school.nickname,source.school.altNickname1,source.school.altNickname2,source.school.ladyNickname,source.school.private,source.school.type,source.school.power6Avg,source.school.conference.id,source.school.conference.conferenceName,source.school.conference.shortName,source.school.conference.divisionId,source.school.conference.division.id,source.school.conference.division.divisionName,source.school.conference.division.shortName,source.school.rosterHasUnclaimedPlayers,source.school.name,source.school.clubId,source.school.shortName,source.school.profilePhotoUrl,source.school.rosterCount,source.school.rosterYear,source.school.power6,source.school.power6High,source.school.power6Low,source.school.power6Men,source.school.power6MenHigh,source.school.power6MenLow,source.school.power6Women,source.school.power6WomenHigh,source.school.power6WomenLow,source.school.isCollege,source.school.tierType,source.school.clubSubTypeId,source.school.roster,source.school.clubProfileImages.default,source.school.clubProfileImages.thumbnail.oneX,source.school.clubProfileImages.thumbnail.twoX,source.school.clubProfileImages.thumbnail.threeX,source.school.clubProfileImages.card.oneX,source.school.clubProfileImages.card.twoX,source.school.clubProfileImages.card.threeX,source.school.clubProfileImages.profile.oneX,source.school.clubProfileImages.profile.twoX,source.school.clubProfileImages.profile.threeX,source.school.clubProfileImages.icon.oneX,source.school.clubProfileImages.icon.twoX,source.school.clubProfileImages.icon.threeX,source.school.currentMember,source.clubMembersSimplified,source.clubImages.default,source.clubImages.banner.oneX,source.clubImages.banner.twoX,source.clubImages.banner.threeX,source.clubImages.dashboard.oneX,source.clubImages.dashboard.twoX,source.clubImages.dashboard.threeX,source.clubProfileImages.default,source.clubProfileImages.thumbnail.oneX,source.clubProfileImages.thumbnail.twoX,source.clubProfileImages.thumbnail.threeX,source.clubProfileImages.card.oneX,source.clubProfileImages.card.twoX,source.clubProfileImages.card.threeX,source.clubProfileImages.profile.oneX,source.clubProfileImages.profile.twoX,source.clubProfileImages.profile.threeX,source.clubProfileImages.icon.oneX,source.clubProfileImages.icon.twoX,source.clubProfileImages.icon.threeX,source.clubTags,source.school.conference,domain
0,"[100654, 100706, 100751, 100830, 100858, 10114...",,clubs_v2710,,default,,1420,,,,[62.83],,[],,1420,"Union College, Kentucky - M",,4976,6,1,False,False,www.gounionbulldogs.com,36,16,False,False,,False,True,False,True,Male,1420/images/banner/5ea00392-2a4a-47ca-a690-0fc...,1420/images/profile/ff121eff-2e50-4b1d-bae4-eb...,,"[36.8664765, -83.8888138]","Barbourville, KY","Barbourville, KY, USA",,,,"Barbourville, KY",Barbourville,Barbourville,Kentucky,KY,United States,US,USA,USA,,"Union College, Kentucky",,,,,,,,,True,College,,102.0,Appalachian Athletic Conference,AAC,,4.0,NAIA,NAIA,True,"Union College, Kentucky",0,,,0,,,,,,,,,,,False,,,,,,,,,,,,,,,,,,[],1420/images/banner/5ea00392-2a4a-47ca-a690-0fc...,1420/images/banner/5ea00392-2a4a-47ca-a690-0fc...,1420/images/banner/5ea00392-2a4a-47ca-a690-0fc...,1420/images/banner/5ea00392-2a4a-47ca-a690-0fc...,1420/images/banner/5ea00392-2a4a-47ca-a690-0fc...,1420/images/banner/5ea00392-2a4a-47ca-a690-0fc...,1420/images/banner/5ea00392-2a4a-47ca-a690-0fc...,1420/images/profile/ff121eff-2e50-4b1d-bae4-eb...,1420/images/profile/ff121eff-2e50-4b1d-bae4-eb...,1420/images/profile/ff121eff-2e50-4b1d-bae4-eb...,1420/images/profile/ff121eff-2e50-4b1d-bae4-eb...,1420/images/profile/ff121eff-2e50-4b1d-bae4-eb...,1420/images/profile/ff121eff-2e50-4b1d-bae4-eb...,1420/images/profile/ff121eff-2e50-4b1d-bae4-eb...,1420/images/profile/ff121eff-2e50-4b1d-bae4-eb...,1420/images/profile/ff121eff-2e50-4b1d-bae4-eb...,1420/images/profile/ff121eff-2e50-4b1d-bae4-eb...,1420/images/profile/ff121eff-2e50-4b1d-bae4-eb...,1420/images/profile/ff121eff-2e50-4b1d-bae4-eb...,1420/images/profile/ff121eff-2e50-4b1d-bae4-eb...,,,
1,"[100654, 100706, 100751, 100830, 100858, 10114...",,clubs_v2710,,default,,2708,,,,[45.5],,[],,2708,Doane University - M,,4276,4,1,False,True,doaneathletics.com,14,3,False,False,,False,False,False,True,Male,,,,"[40.6252173, -96.9615141]","Crete, NE","Crete, NE 68333, USA",,,,"Crete, NE 68333",Crete,Crete,Nebraska,NE,United States,US,USA,USA,,Doane College,,,,,,,,,True,College,,106.0,Great Plains Athletic Conference,GPAC,,4.0,NAIA,NAIA,True,Doane College,0,,,0,,,,,,,,,,,False,,,,,,,,,,,,,,,,,,[],,,,,,,,,,,,,,,,,,,,,,,
2,"[100654, 100706, 100751, 100830, 100858, 10114...",,clubs_v2710,,default,,2961,,,,[56.96],,[],,2961,Lawrence Tech - M,,3807,6,1,False,True,ltuathletics.com,9,17,False,False,,False,True,False,True,Male,2961/images/banner/d0b46c66-774d-41e0-a4fe-44d...,2961/images/profile/8be1c557-f612-49c6-9fa7-b1...,,"[42.4733688, -83.2218731]","Southfield, MI","Southfield, MI, USA",,,,"Southfield, MI",Southfield,Southfield,Michigan,MI,United States,US,USA,USA,,Lawrence Tech,,,,,,,,,True,College,,119.0,Wolverine-Hoosier Athletic Conference,WHAC,,4.0,NAIA,NAIA,True,Lawrence Tech,0,,,0,,,,,,,,,,,False,,,,,,,,,,,,,,,,,,[],2961/images/banner/d0b46c66-774d-41e0-a4fe-44d...,2961/images/banner/d0b46c66-774d-41e0-a4fe-44d...,2961/images/banner/d0b46c66-774d-41e0-a4fe-44d...,2961/images/banner/d0b46c66-774d-41e0-a4fe-44d...,2961/images/banner/d0b46c66-774d-41e0-a4fe-44d...,2961/images/banner/d0b46c66-774d-41e0-a4fe-44d...,2961/images/banner/d0b46c66-774d-41e0-a4fe-44d...,2961/images/profile/8be1c557-f612-49c6-9fa7-b1...,2961/images/profile/8be1c557-f612-49c6-9fa7-b1...,2961/images/profile/8be1c557-f612-49c6-9fa7-b1...,2961/images/profile/8be1c557-f612-49c6-9fa7-b1...,2961/images/profile/8be1c557-f612-49c6-9fa7-b1...,2961/images/profile/8be1c557-f612-49c6-9fa7-b1...,2961/images/profile/8be1c557-f612-49c6-9fa7-b1...,2961/images/profile/8be1c557-f612-49c6-9fa7-b1...,2961/images/profile/8be1c557-f612-49c6-9fa7-b1...,2961/images/profile/8be1c557-f612-49c6-9fa7-b1...,2961/images/profile/8be1c557-f612-49c6-9fa7-b1...,2961/images/profile/8be1c557-f612-49c6-9fa7-b1...,2961/images/profile/8be1c557-f612-49c6-9fa7-b1...,,,
3,"[100654, 100706, 100751, 100830, 100858, 10114...",,clubs_v2710,,default,,2843,,,,[33.47],,[],,2843,College Of Lake County - M,College of Lake County is a community college ...,2492,6,1,False,False,www.clclancers.com,8,0,False,False,,False,True,False,True,Male,2843/images/banner/d97e2834-72bf-4285-881e-826...,,,"[42.34446639999999, -88.04174599999999]","Grayslake, IL","Grayslake, IL 60030, USA",,,,"Grayslake, IL 60030",Grayslake,Grayslake,Illinois,IL,United States,US,USA,USA,,College Of Lake County,,,,,,,,,False,College,,152.0,Skyway Community College Conference,SCCC,,5.0,NJCAA,NJCAA,True,College Of Lake County,0,,,0,,,,,,,,,,,False,,,,,,,,,,,,,,,,,,[],2843/images/banner/d97e2834-72bf-4285-881e-826...,2843/images/banner/d97e2834-72bf-4285-881e-826...,2843/images/banner/d97e2834-72bf-4285-881e-826...,2843/images/banner/d97e2834-72bf-4285-881e-826...,2843/images/banner/d97e2834-72bf-4285-881e-826...,2843/images/banner/d97e2834-72bf-4285-881e-826...,2843/images/banner/d97e2834-72bf-4285-881e-826...,,,,,,,,,,,,,,,,
4,,,clubs_v2710,,default,,2657,,,,[0.0],,[],,2657,Lindenwood University (Illinois) - M,,1575,4,1,False,False,http://www.lindenwoodlynx.com,9,0,False,False,,False,False,False,True,Male,,,,"[38.5200504, -89.9839935]","Belleville, IL","Belleville, IL, USA",,,,"Belleville, IL",Belleville,Belleville,Illinois,IL,United States,US,USA,USA,,Lindenwood University (Illinois),,,,,,,,,True,College,,88.0,Ohio Athletic Conference,OAC,,3.0,NCAA - Division III,D3,True,Lindenwood University (Illinois),0,,,0,,,,,,,,,,,False,,,,,,,,,,,,,,,,,,[],,,,,,,,,,,,,,,,,,,,,,,www.lindenwoodlynx.com
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
257,,,clubs_v2710,,default,,983,,,,[78.32],,[],,983,University of Alabama - M,,4072,6,1,False,True,http://www.rolltide.com,92,10,False,False,,False,True,False,True,Male,983/images/banner/329e90e8-7012-4c2d-b5da-a8de...,983/images/profile/e3499d55-2205-422f-8dd3-510...,,"[33.2098407, -87.56917349999999]","Tuscaloosa, AL","Tuscaloosa, AL, USA",,,,"Tuscaloosa, AL",Tuscaloosa,Tuscaloosa,Alabama,AL,United States,US,USA,USA,,University of Alabama,UA,Bama,,,Crimson Tide,Tide,,,False,College,,26.0,Southeastern Conference,SEC,,1.0,NCAA - Division I,D1,True,University of Alabama,0,Alabama,,0,,,,,,,,,,,False,,,,,,,,,,,,,,,,,,[],983/images/banner/329e90e8-7012-4c2d-b5da-a8de...,983/images/banner/329e90e8-7012-4c2d-b5da-a8de...,983/images/banner/329e90e8-7012-4c2d-b5da-a8de...,983/images/banner/329e90e8-7012-4c2d-b5da-a8de...,983/images/banner/329e90e8-7012-4c2d-b5da-a8de...,983/images/banner/329e90e8-7012-4c2d-b5da-a8de...,983/images/banner/329e90e8-7012-4c2d-b5da-a8de...,983/images/profile/e3499d55-2205-422f-8dd3-510...,983/images/profile/e3499d55-2205-422f-8dd3-510...,983/images/profile/e3499d55-2205-422f-8dd3-510...,983/images/profile/e3499d55-2205-422f-8dd3-510...,983/images/profile/e3499d55-2205-422f-8dd3-510...,983/images/profile/e3499d55-2205-422f-8dd3-510...,983/images/profile/e3499d55-2205-422f-8dd3-510...,983/images/profile/e3499d55-2205-422f-8dd3-510...,983/images/profile/e3499d55-2205-422f-8dd3-510...,983/images/profile/e3499d55-2205-422f-8dd3-510...,983/images/profile/e3499d55-2205-422f-8dd3-510...,983/images/profile/e3499d55-2205-422f-8dd3-510...,983/images/profile/e3499d55-2205-422f-8dd3-510...,,,www.rolltide.com
258,,,clubs_v2710,,default,,933,,,,[79.04],,[],,933,Texas A&M University - M,,1150,6,1,False,True,http://12thman.com/index.aspx?path=mten,206,22,False,False,,False,True,False,True,Male,933/images/banner/50b42149-60f6-423e-9545-0dae...,933/images/profile/b697a633-d8ff-4e5e-b304-046...,,"[30.627977, -96.3344068]","College Station, TX","College Station, TX, USA",,,,"College Station, TX",College Station,College Station,Texas,TX,United States,US,USA,USA,,Texas A&M University,A&M,,,,Aggies,,,,False,College,,26.0,Southeastern Conference,SEC,,1.0,NCAA - Division I,D1,True,Texas A&M University,0,Texas A&M,,0,,,,,,,,,,,False,,,,,,,,,,,,,,,,,,[],933/images/banner/50b42149-60f6-423e-9545-0dae...,933/images/banner/50b42149-60f6-423e-9545-0dae...,933/images/banner/50b42149-60f6-423e-9545-0dae...,933/images/banner/50b42149-60f6-423e-9545-0dae...,933/images/banner/50b42149-60f6-423e-9545-0dae...,933/images/banner/50b42149-60f6-423e-9545-0dae...,933/images/banner/50b42149-60f6-423e-9545-0dae...,933/images/profile/b697a633-d8ff-4e5e-b304-046...,933/images/profile/b697a633-d8ff-4e5e-b304-046...,933/images/profile/b697a633-d8ff-4e5e-b304-046...,933/images/profile/b697a633-d8ff-4e5e-b304-046...,933/images/profile/b697a633-d8ff-4e5e-b304-046...,933/images/profile/b697a633-d8ff-4e5e-b304-046...,933/images/profile/b697a633-d8ff-4e5e-b304-046...,933/images/profile/b697a633-d8ff-4e5e-b304-046...,933/images/profile/b697a633-d8ff-4e5e-b304-046...,933/images/profile/b697a633-d8ff-4e5e-b304-046...,933/images/profile/b697a633-d8ff-4e5e-b304-046...,933/images/profile/b697a633-d8ff-4e5e-b304-046...,933/images/profile/b697a633-d8ff-4e5e-b304-046...,,,12thman.com
259,,,clubs_v2710,,default,,1525,,,,[44.87],,[],,1525,Catholic University of America - M,,215,6,1,False,False,https://catholicathletics.com,26,8,False,False,,False,True,False,True,Male,1525/images/banner/58f8d382-24e1-44a8-b3b0-3ab...,,,"[38.9071923, -77.0368707]","Washington, DC","Washington, DC, USA",,,,"Washington, DC",Washington,Washington,District of Columbia,DC,United States,US,USA,USA,,Catholic University of America,,,,,,,,,True,College,,72.0,Landmark Conference,Landmark,,3.0,NCAA - Division III,D3,True,Catholic University of America,0,,,0,,,,,,,,,,,False,,,,,,,,,,,,,,,,,,[],1525/images/banner/58f8d382-24e1-44a8-b3b0-3ab...,1525/images/banner/58f8d382-24e1-44a8-b3b0-3ab...,1525/images/banner/58f8d382-24e1-44a8-b3b0-3ab...,1525/images/banner/58f8d382-24e1-44a8-b3b0-3ab...,1525/images/banner/58f8d382-24e1-44a8-b3b0-3ab...,1525/images/banner/58f8d382-24e1-44a8-b3b0-3ab...,1525/images/banner/58f8d382-24e1-44a8-b3b0-3ab...,,,,,,,,,,,,,,,,catholicathletics.com
260,,,clubs_v2710,,default,,3078,,,,[63.59],,[],,3078,Grace College (Ind.) - M,,17051,6,1,False,True,https://gclancers.com/index.aspx?path=mten,26,4,False,False,,False,True,False,True,Male,3078/images/banner/fe5d9826-1d0f-4069-b315-2e2...,3078/images/profile/efb56af4-a297-4452-9452-82...,,"[41.2272668, -85.8219345]","Winona Lake, IN","Winona Lake, IN, USA",,,,"Winona Lake, IN",Winona Lake,Winona Lake,Indiana,IN,United States,US,USA,USA,,Grace College (Ind.),,,,,,,,,True,College,,104.0,Crossroads League,CL,,4.0,NAIA,NAIA,True,Grace College (Ind.),0,,,0,,,,,,,,,,,False,,,,,,,,,,,,,,,,,,[],3078/images/banner/fe5d9826-1d0f-4069-b315-2e2...,3078/images/banner/fe5d9826-1d0f-4069-b315-2e2...,3078/images/banner/fe5d9826-1d0f-4069-b315-2e2...,3078/images/banner/fe5d9826-1d0f-4069-b315-2e2...,3078/images/banner/fe5d9826-1d0f-4069-b315-2e2...,3078/images/banner/fe5d9826-1d0f-4069-b315-2e2...,3078/images/banner/fe5d9826-1d0f-4069-b315-2e2...,3078/images/profile/efb56af4-a297-4452-9452-82...,3078/images/profile/efb56af4-a297-4452-9452-82...,3078/images/profile/efb56af4-a297-4452-9452-82...,3078/images/profile/efb56af4-a297-4452-9452-82...,3078/images/profile/efb56af4-a297-4452-9452-82...,3078/images/profile/efb56af4-a297-4452-9452-82...,3078/images/profile/efb56af4-a297-4452-9452-82...,3078/images/profile/efb56af4-a297-4452-9452-82...,3078/images/profile/efb56af4-a297-4452-9452-82...,3078/images/profile/efb56af4-a297-4452-9452-82...,3078/images/profile/efb56af4-a297-4452-9452-82...,3078/images/profile/efb56af4-a297-4452-9452-82...,3078/images/profile/efb56af4-a297-4452-9452-82...,,,gclancers.com


In [None]:
%%quack --output joined_df

SELECT
    "source.id" as utr_college_id,
--    peds_df.college_name,
--    peds_df.short_name,
    "source.school.name" as utr_college_name,
    "source.school.displayName" as utr_college_display_name,
    "source.school.power6"     as power6,
    "source.school.power6High" as power6High,
    "source.school.power6Low"  as power6Low,
    array_agg(peds_df.college_id)   as peds_college_ids,
    array_agg(peds_df.college_name) as peds_college_names
FROM utr
JOIN peds_df ON  state = "source.location.stateAbbr"
    AND (
        peds_df.college_name IN ("source.school.name", "source.school.displayName", "source.school.altName1", "source.school.altName2")
        OR (utr.domain is not null AND peds_df.domain is not null AND utr.domain = peds_df.domain)
        OR (peds_df.short_name is not null AND "source.school.shortName" is not null AND peds_df.short_name = "source.school.shortName")
        -- OR (ST_Distance(ST_Point(peds_df.LATITUDE, peds_df.longitude), ST_Point("source.location.latLng"[1], "source.location.latLng"[2])) < 0.05)
    )
WHERE peds_df.college_id IS NOT NULL
  -- AND "source.id" = 1735
GROUP BY 1, 2, 3, 4, 5, 6

Unnamed: 0,utr_college_id,utr_college_name,utr_college_display_name,power6,power6High,power6Low,peds_college_ids,peds_college_names
0,1520,Delta State University,Delta State University,,,,[175616],[Delta State University]
1,1274,University of Delaware,University of Delaware,,,,[130943],[University of Delaware]
2,2172,Wartburg College,Wartburg College,,,,[154527],[Wartburg College]
3,2607,Friends University,Friends University,,,,[155089],[Friends University]
4,672338,Converse University,Converse College,,,,[217961],[Converse University]
...,...,...,...,...,...,...,...,...
667,1825,Ithaca College,Ithaca College,,,,[191968],[Ithaca College]
668,2193,Hamilton College,Hamilton College,,,,[191515],[Hamilton College]
669,2920,Reedley College,Reedley College,,,,[117052],[Reedley College]
670,1298,Fresno City College,Fresno City College,,,,[114789],[Fresno City College]


In [None]:
%%quack --output joined_df

SELECT
    "source.id" as utr_college_id,
--    peds_df.college_name,
--    peds_df.short_name,
    "source.school.name" as utr_college_name,
    "source.school.displayName" as utr_college_display_name,
    "source.school.power6"     as power6,
    "source.school.power6High" as power6High,
    "source.school.power6Low"  as power6Low,
    array_agg(peds_df.college_id)   as peds_college_ids,
    array_agg(peds_df.college_name) as peds_college_names
FROM utr
JOIN peds_df ON  state = "source.location.stateAbbr"
    AND (
        peds_df.college_name IN ("source.school.name", "source.school.displayName", "source.school.altName1", "source.school.altName2")
        OR (utr.domain is not null AND peds_df.domain is not null AND utr.domain = peds_df.domain)
        OR (peds_df.short_name is not null AND "source.school.shortName" is not null AND peds_df.short_name = "source.school.shortName")
        -- OR (ST_Distance(ST_Point(peds_df.LATITUDE, peds_df.longitude), ST_Point("source.location.latLng"[1], "source.location.latLng"[2])) < 0.05)
    )
WHERE peds_df.college_id IS NOT NULL
  -- AND "source.id" = 1735
GROUP BY 1, 2, 3, 4, 5, 6

Unnamed: 0,utr_college_id,utr_college_name,utr_college_display_name,power6,power6High,power6Low,peds_college_ids,peds_college_names
0,2712,Community College of Rhode Island,Community College of Rhode Island,,,,[217475],[Community College of Rhode Island]
1,2534,Central College,Central College,,,,[153108],[Central College]
2,1282,William Carey University,William Carey University,,,,[176479],[William Carey University]
3,1604,Delaware State University,Delaware State University,,,,[130934],[Delaware State University]
4,2139,Edgewood College,Edgewood College,,,,[238661],[Edgewood College]
...,...,...,...,...,...,...,...,...
667,1152295,Hudson Valley Community College,Hudson Valley Community College,,,,[191719],[Hudson Valley Community College]
668,1286,Fordham University,Fordham University,,,,[191241],[Fordham University]
669,2910,Fullerton College,Fullerton College,,,,[114859],[Fullerton College]
670,2797,Diablo Valley College,Diablo Valley College,,,,[113634],[Diablo Valley College]


In [None]:
%%quack

SELECT
    utr_college_name,
    length(peds_college_ids) as count,
FROM joined_df
WHERE length(peds_college_ids) > 1


Unnamed: 0,utr_college_name,count
0,Queens University of Charlotte,80
1,Doane College,19
2,"Union College, Kentucky",24
3,Indiana University East,36
4,Milligan University,54
5,Lake Forest College,99
6,Concordia University (Illinois),99
7,Georgia Gwinnett College,74
8,College Of Lake County,99
9,Marion Military Institute,40


In [None]:
%%quack --output tmp

SELECT
    "source.school.name",
    "source.school.displayName"
FROM utr
WHERE "source.school.name" <> "source.school.displayName"
;

Unnamed: 0,source.school.name,source.school.displayName
0,North Carolina,University of North Carolina
1,UNC Charlotte,Charlotte
2,University of Louisiana at Lafayette,University Of Louisiana at Lafayette
3,Virginia Tech,Virginia Tech
4,University of Texas at San Antonio,University of Texas - San Antonio
5,Weber State,Weber State University
6,Tennessee Wesleyan,Tennessee Wesleyan College
7,Sacramento State,Sacramento State University
8,University of Nebraska Omaha,Omaha
9,Indiana University-Purdue University Indianapolis,IU-Indianapolis


In [None]:
%%quack --output tmp

SELECT
    *
FROM peds_df
WHERE college_name like '%Louisiana%'
;

Unnamed: 0,college_id,college_name,short_name,city,state,latitude,longitude,total_cost,domain
0,158088,Central Louisiana Technical Community College,,Alexandria,LA,31.310461,-92.446693,4098.0,
1,159373,Louisiana State University Health Sciences Cen...,LSUHSC LSUHSC-NO LSUMC LSU Medical Center ...,New Orleans,LA,29.957272,-90.083145,18851.0,
2,159382,Louisiana State University-Alexandria,LSUA,Alexandria,LA,31.178616,-92.414406,14725.0,
3,159407,Louisiana State University-Eunice,LSU Eunice,Eunice,LA,30.473291,-92.434525,10166.0,www.lsue.edu
4,159416,Louisiana State University-Shreveport,LSUS,Shreveport,LA,32.428467,-93.705612,20481.0,
5,159568,Louisiana Christian University,,Pineville,LA,31.324528,-92.425972,19750.0,
6,159647,Louisiana Tech University,"LA Tech, Louisiana Tech, Tech",Ruston,LA,32.528297,-92.649963,16866.0,www.latech.edu
7,159993,University of Louisiana at Monroe,ULM; University of Louisiana-Monroe; Uni of LA...,Monroe,LA,32.527748,-92.073738,21290.0,
8,160010,Northwest Louisiana Technical Community College,NLTCC or Northwest LTCC,Minden,LA,32.586037,-93.263023,4109.0,
9,160038,Northwestern State University of Louisiana,NSU | NSULA | Northwestern State University | ...,Natchitoches,LA,31.750666,-93.097794,19652.0,www.nsula.edu
