# Human Mobility in San Diego County
Jessica Embury

#### Add geometry to San Diego OD tables, aggregated to SRA, create geometry flowlines from home --> work (SRA centroids)
#### Years 2013 - 2017

In [None]:
#import needed libraries
from bs4 import BeautifulSoup
import requests
import os
import glob
import gzip
import psycopg2

In [None]:
#USER ENTERED VARIABLES

#for database 
host = ''
db = ''
user = ''
password = ''

schema_name = 'lodes'

years = ['2013', '2014', '2015', '2016', '2017']

In [None]:
#FUNCTIONS

#function for psycopg2 to connect to the PostgreSQL database server
#reference: https://github.com/NaysanSaran/pandas2postgresql/blob/master/notebooks/CompleteExample.ipynb
def connect(params_dic):
    """ Connect to the PostgreSQL database server """
    conn = None
    try:
        # connect to the PostgreSQL server
        print('Connecting to the PostgreSQL database...')
        conn = psycopg2.connect(**params_dic)

    except (Exception, psycopg2.DatabaseError) as error:
        print(error)
        sys.exit(1) 
    return conn

In [None]:
#CONNECT TO DATABASE

#specify the psql connection parameters
param_dic = {
    "host"      : host,
    "database"  : db,
    "user"      : user,
    "password"  : password,
}

#connect to the database server
conn = connect(param_dic)

#define the cursor
cur = conn.cursor()

In [None]:
#add home and work polygon block group geometry to each od table with matching year for shapefile and od table


for year in range(len(years)):
    find_tl = ("SELECT table_name FROM information_schema.tables WHERE table_schema = 'lodes' and table_name like '%tl_{}%';".format(years[year]))
    find_lodes =("SELECT table_name FROM information_schema.tables WHERE table_schema = 'lodes' and table_name like '%{}_sd_bg%';".format(years[year]))
    
    try:
        cur.execute("set search_path to lodes, public;")
        cur.execute(find_lodes)
        lodes = cur.fetchall() 
        
        cur.execute(find_tl)
        tl = cur.fetchall()
        print("{} Data has been fetched.".format(years[year])) 
    except:
        print("Error. {} Data not fetched.".format(years[year])) 
    
    lodes_list = []
    nested_list = []

    for row in range(len(lodes)):
    
        nested_list = list(lodes[row])
        split = nested_list[0]
        lodes_list.append(split)
        
    tl_list = list(tl[0])
    tl_str = tl_list[0]
    
    #print(len(lodes_list))
    #print(lodes_list)
    #print(tl_str)
    
    drop_table = ("DROP TABLE IF EXISTS {}.{}_sd;".format(schema_name,tl_str))
    print(drop_table)
        
    create_table = ("CREATE TABLE {}.{}_sd AS SELECT * FROM {}.{} WHERE countyfp = '073';".format(schema_name, tl_str, schema_name, tl_str))
    print(create_table)
    
    try:
        cur.execute(drop_table)
        print('Table dropped if exists.')
    except:
        print("Error. Table (if exists) not dropped.")
            
    try:
        cur.execute(create_table)
        print('Table created.')
    except:
        print("Error. Table not created.")
 
    #add block group polygon geometry for home and work to each od table
    for table in range(len(lodes_list)):
       
        alter_table_home = ("ALTER TABLE {}.{} ADD geom_home geometry(MultiPolygon,4326);".format(schema_name, lodes_list[table]))
        alter_table_work = ("ALTER TABLE {}.{} ADD geom_work geometry(MultiPolygon,4326);".format(schema_name, lodes_list[table]))
        update_table_home = ("UPDATE {}.{} SET geom_home = geom FROM (SELECT DISTINCT(geoid), geom FROM {}.{}_sd GROUP BY geoid, geom) AS t WHERE h_geocode = t.geoid;".format(schema_name, lodes_list[table], schema_name, tl_str))
        update_table_work = ("UPDATE {}.{} SET geom_work = geom FROM (SELECT DISTINCT(geoid), geom FROM {}.{}_sd GROUP BY geoid, geom) AS t WHERE w_geocode = t.geoid;".format(schema_name, lodes_list[table], schema_name, tl_str))
        
         
        try:
            cur.execute(alter_table_home)
            print('Table altered (home).')
        except:
            print("Error. Table not altered (home).")
            
        try:
            cur.execute(update_table_home)
            print('Table updated (home).')
        except:
            print("Error. Table not updated (home).")
            
        try:
            cur.execute(alter_table_work)
            print('Table altered (work).')
        except:
            print("Error. Table not altered (work).")
            
        try:
            cur.execute(update_table_work)
            print('Table updated (work).')
        except:
            print("Error. Table not updated (work).")
            
        conn.commit()


In [None]:
#create point geometry for block group centroid for each table (2013-2017)
#Part 1

od_tables_list = []
    
#get list of all OD tables
for year in range(len(years)):
    find_od_tables =("SELECT table_name FROM information_schema.tables WHERE table_schema = 'lodes' and table_name like '%{}_sd_bg%';".format(years[year]))
    
    try:
        
        cur.execute(find_od_tables)
        od_tables = cur.fetchall() 
        
        print("{} OD Tables have been fetched.".format(years[year])) 
    except:
        print("Error. {} OD Tables not fetched.".format(years[year])) 
    
    nested_list = []

    for row in range(len(od_tables)):
    
        nested_list = list(od_tables[row])
        split = nested_list[0]
        od_tables_list.append(split)

print(od_tables_list)

In [None]:
#create point geometry for block group centroid for each table (2013-2017)
#Part 2

#add columns for point geometry, update tables with centroid points
for table in range(len(od_tables_list)):
    
    alter_home_pt = ("ALTER TABLE {}.{} ADD geom_pt_home geometry(Point,4326);".format(schema_name, od_tables_list[table]))
    update_home_pt =("UPDATE {}.{} SET geom_pt_home = ST_Centroid(geom_home);".format(schema_name, od_tables_list[table]))
    
    alter_work_pt = ("ALTER TABLE {}.{} ADD geom_pt_work geometry(Point,4326);".format(schema_name, od_tables_list[table]))
    update_work_pt = ("UPDATE {}.{} SET geom_pt_work = ST_Centroid(geom_work);".format(schema_name, od_tables_list[table]))

    try:
        cur.execute(alter_home_pt)
        print('Table altered (home).')
    except:
        print("Error. Table not altered (home).")
            
    try:
        cur.execute(update_home_pt)
        print('Table updated (home).')
    except:
        print("Error. Table not updated (home).")
            
    try:
        cur.execute(alter_work_pt)
        print('Table altered (work).')
    except:
        print("Error. Table not altered (work).")
            
    try:
        cur.execute(update_work_pt)
        print('Table updated (work).')
    except:
        print("Error. Table not updated (work).")
        
    #commit changes for each table to database
    try:
        conn.commit()
        print('{}: changes commited.'.format(od_tables_list[table]))
    except:
        print('Error. {}: changes NOT commited.'.format(od_tables_list[table]))

In [None]:
#if list subset needed due to connection issues, errors, etc.
print(len(od_tables_list))
print(od_tables_list)

od_subset = od_tables_list[1:]
print(od_subset)

print(len(od_subset))

In [None]:
#aggregate mobility data to the sra level, create index, create flow lines, and export shapefile (2013-2017)
for table in range(len(od_subset)):
    bg_name = schema_name + '.' + od_subset[table]
    sra_table = schema_name + '.' + od_subset[table][:-2] + 'sra'
    print(sra_table)
    
    #drop sra table if exists
    drop_table = ("DROP TABLE IF EXISTS {};".format(sra_table))
    try:
        cur.execute(drop_table)
        print('Table (if exists) dropped.')
    except:
        print("Error. Table (if exists) not dropped.")
        
    #create table of mobility data aggregated to the sra level with sra polygon geometry
    create_table =("create table {} as select h_sra.sra as home_sra, h_sra.name as home_sra_name, w_sra.sra as work_sra, w_sra.name as work_sra_name, sum(bg.s000) as s000, sum(bg.sa01) as sa01, sum(bg.sa02) as sa02, sum(bg.sa03) as sa03, sum(bg.se01) as se01, sum(bg.se02) as se02, sum(bg.se03) as se03, sum(bg.si01) as si01, sum(bg.si02) as si02, sum(bg.si03) as si03, h_sra.geom as geom_home, w_sra.geom as geom_work from {} as bg join lodes.sd_sra as h_sra on ST_Within(bg.geom_pt_home, h_sra.geom) join lodes.sd_sra as w_sra on ST_Within(bg.geom_pt_work, w_sra.geom) group by h_sra.sra, h_sra.name, h_sra.geom, w_sra.sra, w_sra.name, w_sra.geom;".format(sra_table, bg_name))
    try:
        cur.execute(create_table)
        print('Table created.')
    except:
        print("Error. Table created.")
        
    #add sra home centroid
    alter_home = ("alter table {} add geom_pt_home geometry(Point,4326);".format(sra_table))
    update_home = ("update {} set geom_pt_home = ST_Centroid(geom_home);".format(sra_table))
    try:
        cur.execute(alter_home)
        print('Table altered (home).')
        
        cur.execute(update_home)
        print('Table updated (home).')
    except:
        print("Error altering/updating table (home).")
       
    #add sra work centroid
    alter_work = ("alter table {} add geom_pt_work geometry(Point,4326);".format(sra_table))
    update_work = ("update {} set geom_pt_work = ST_Centroid(geom_work);".format(sra_table))
    try:
        cur.execute(alter_work)
        print('Table altered (work).')
        
        cur.execute(update_work)
        print('Table updated (work).')
    except:
        print("Error altering/updating table (work).")    
        
    #create index based on work sra, home sra
    create_index = ("CREATE INDEX idx_{}wh_sra ON {} (work_sra, home_sra);".format(od_subset[table][:-2], sra_table))    
    try:
        cur.execute(create_index)
        print('Index created.')
    except:
        print("Error. Index not created.")  
        
    #create flow line geometry to show home --> work mobility
    alter_line = ("ALTER TABLE {} ADD geom_line_flow geometry(Linestring,4326);".format(sra_table))
    update_line = ("UPDATE {} SET geom_line_flow = ST_MakeLine(geom_pt_home, geom_pt_work); ".format(sra_table))
    try:
        cur.execute(alter_line)
        print('Table altered (line).')
        
        cur.execute(update_line)
        print('Table updated (line).')
    except:
        print("Error altering/updating table (line).")          
    
    #commit changes to database
    conn.commit()
    

In [None]:
#COMMIT AND CLOSE DATABASE
#commit changes to the database
conn.commit()

#close the database connection
conn.close()

#close the cursor
cur.close()