In [17]:
# initializing

import pandas as pd
import pandas_gbq as pgbq
from multiprocessing import Pool
from scipy import stats
import os
import time

directory = '/Users/joshharris/community_health/sql/community_facts'
sql_modules = list(map(lambda x: os.path.splitext(x)[0], os.listdir(directory)))
google_project_id = 'solid-ridge-104914'
dataset = 'community_networks'

In [None]:
#if community facts are already saved, use those otherwise re-calculate and save

def bigquery_worker(i):
    sql_module_type = os.path.basename(directory)
    sql_module = sql_modules[i]
    print('Starting module ',i,': ',sql_module, sep='')
    try:
        result = pgbq.read_gbq('select * from '+dataset+'.'+sql_module_type+'_'+sql_module, google_project_id, dialect='standard', verbose=False)
        print(sql_module_type,'_',sql_module,' table found.', sep='')
    except:
        print(sql_module_type+'_'+sql_module+' table does not exist, generating now.')
        with open(directory+'/'+sql_module+'.txt') as query_file:
            query = query_file.read()
        result = pgbq.read_gbq(query, google_project_id, dialect='standard', verbose=False)
        print('Saving '+dataset+'.'+sql_module_type+'_'+sql_module)
        pgbq.to_gbq(result,  dataset+'.'+sql_module_type+'_'+sql_module, google_project_id, if_exists='replace', verbose=True)
    return (result) 
        

In [None]:
#Run community facts sql modules and join them together

if __name__ == '__main__':
    with Pool(processes=len(sql_modules)) as pool:
        result = pool.map(bigquery_worker, (range(len(sql_modules),)))
        pool.close()
        pool.join()
        
joined = result[0].set_index('cid', drop=False)
print('Joining modules', sep='')
for i in range(len(result)-1):
    joined = joined.join(result[i+1].set_index('cid'), how='left', lsuffix='_left')
print('Saving '+dataset+'.community_facts_main')
pgbq.to_gbq(joined, dataset+'.community_facts_main', google_project_id, if_exists='replace', verbose=False )
print('Done', sep='')

In [30]:
pgbq.to_gbq(joined, dataset+'.'+sql_module_type+'_main', google_project_id, if_exists='replace', verbose=False )

In [28]:
joined

Unnamed: 0_level_0,cid,name,age,avg_communities_per_user,avg_distance_from_center,median_distance_from_center,items_posted_last_month,m3_total_retention,MAU,nodes,inventory,pct_female_mau,pct_with_about_me,total_signups_last_month
cid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
377,377,Fredericksburg Virginia e-Yard Sale,1024,3.934555,26.830970,15.98,72.0,0.040816,118.0,4041.0,5930.0,0.771186,0.050847,37.0
97,97,"Aviano's Junk to Treasure, Italy",1536,3.098016,5.205498,1.92,3623.0,0.541935,3005.0,10829.0,8233.0,0.475208,0.018968,180.0
90,90,"Waycross, GA & Surrounding Area Swap & Shop",1537,6.067757,40.317255,35.90,11.0,0.100000,48.0,2313.0,1527.0,0.708333,0.041667,24.0
94,94,"Rochester, NH Area Online Sale",1537,3.592873,30.601274,28.55,108.0,0.187500,84.0,3881.0,8864.0,0.833333,0.083333,16.0
95,95,"Croswell/Lexington, MI Area Swap",1537,2.864897,27.472516,30.69,630.0,0.105263,450.0,7613.0,29245.0,0.795556,0.062222,18.0
93,93,"Oshawa/Whitby, ON. Buy, Sell, Advice Site",1537,2.586072,15.959416,6.60,57549.0,0.396064,23449.0,83768.0,451559.0,0.795386,0.065760,1660.0
91,91,"Kosciusko County, IN Buy Sell Trade",1537,4.527241,22.923498,16.82,32.0,0.074074,65.0,3712.0,3443.0,0.569231,0.030769,29.0
96,96,"CenTex Exchange-Hill, Bosque & McLennan Counties",1537,4.597444,41.747614,39.33,22.0,0.100000,51.0,2827.0,3441.0,0.803922,0.039216,13.0
276,276,Stouffville ON Community Buy and Sell,1283,3.268380,17.941787,18.10,4820.0,0.294118,2500.0,24410.0,89525.0,0.806800,0.094000,128.0
376,376,"Anderson County / Garnett, KS Buy and Sell",1028,14.403846,30.364408,34.16,8.0,0.000000,51.0,2205.0,3171.0,0.803922,0.039216,5.0
