Check that data imported to postgres correctly

In [1]:
import json
import pandas as pd
import pandas.io.sql as pd_sql
from functions.auth.connections import postgres_connection

connection_uri = postgres_connection('mountain_project')
pd.set_option('max_columns', None)
pd.set_option('max_rows', 5)

# Get Data

In [2]:
query = """
SELECT id_,
       route, location, url, avg_stars, your_stars, route_type,
       rating, pitches, length_, area_latitude, area_longitude, url_id,
       grade, area_name
FROM routes
;
"""
df = pd_sql.read_sql(query, connection_uri) # grab data as a dataframe
df = df.reset_index(drop=True)
df

Unnamed: 0,id_,route,location,url,avg_stars,your_stars,route_type,rating,pitches,length_,area_latitude,area_longitude,url_id,grade,area_name
0,1,The Way Down,Birthday Boulders > Buttermilks Main > Butterm...,https://www.mountainproject.com/route/10627753...,1.3,-1,Boulder,V-easy,1,10.0,37.33004,-118.57901,106277533,-1.00,buttermilks
1,2,East Face,Sunshine Boulder > Buttermilks Main > Buttermi...,https://www.mountainproject.com/route/10725330...,1.9,-1,Boulder,V-easy,1,,37.32859,-118.57516,107253304,-1.00,buttermilks
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2387,2388,Roasted and Raw,Moonstone > Inner Circle > The Underground > Q...,https://www.mountainproject.com/route/10805614...,4.0,-1,Boulder,V12,1,16.0,34.06942,-116.09196,108056142,12.00,joshua_tree
2388,2389,Iron Resolution,Iron Resolution Block > Gunsmoke Area > Barker...,https://www.mountainproject.com/route/10610451...,3.5,-1,Boulder,V12+,1,25.0,34.02850,-116.14510,106104512,12.25,joshua_tree


In [3]:
query = """
SELECT id_,
       url_id, star_ratings, suggested_ratings, on_to_do_lists, ticks
FROM ratings
;
"""
df = pd_sql.read_sql(query, connection_uri) # grab data as a dataframe
df = df.reset_index(drop=True)
df

Unnamed: 0,id_,url_id,star_ratings,suggested_ratings,on_to_do_lists,ticks
0,1,106277533,30,5,8,52
1,2,107253304,14,4,8,30
...,...,...,...,...,...,...
2387,2388,108056142,1,1,6,0
2388,2389,106104512,2,2,20,0


In [4]:
query = """
SELECT id_,
       url_id,
       log_star_ratings, log_ticks, log_avg_stars, log_length, log_grade, log_on_to_do_lists,
       sqrt_star_ratings, sqrt_ticks, sqrt_avg_stars, sqrt_length, sqrt_grade
FROM feature_transforms
;
"""
df = pd_sql.read_sql(query, connection_uri) # grab data as a dataframe
df = df.reset_index(drop=True)
df

Unnamed: 0,id_,url_id,log_star_ratings,log_ticks,log_avg_stars,log_length,log_grade,log_on_to_do_lists,sqrt_star_ratings,sqrt_ticks,sqrt_avg_stars,sqrt_length,sqrt_grade
0,1,106277533,1.491362,1.724276,0.361728,1.041393,0.000000,0.954243,5.477226,7.211103,1.140175,3.162278,0.000000
1,2,107253304,1.176091,1.491362,0.462398,,0.000000,0.954243,3.741657,5.477226,1.378405,,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2387,2388,108056142,0.301030,0.000000,0.698970,1.230449,1.146128,0.845098,1.000000,0.000000,2.000000,4.000000,3.605551
2388,2389,106104512,0.477121,0.000000,0.653213,1.414973,1.153815,1.322219,1.414214,0.000000,1.870829,5.000000,3.640055


In [5]:
query = """
SELECT b.url_id,

       b.route, b.location, b.url, b.avg_stars, b.your_stars, b.route_type,
       b.rating, b.pitches, b.length_, b.area_latitude, b.area_longitude,
       b.grade, b.area_name,
       
       r.star_ratings, r.suggested_ratings, r.on_to_do_lists, r.ticks,
       
       t.log_star_ratings, t.log_ticks, t.log_avg_stars, t.log_length, t.log_grade, t.log_on_to_do_lists,
       t.sqrt_star_ratings, t.sqrt_ticks, t.sqrt_avg_stars, t.sqrt_length, t.sqrt_grade
       
FROM routes b
LEFT JOIN ratings r ON b.url_id = r.url_id
LEFT JOIN feature_transforms t ON b.url_id = t.url_id
;
"""
df = pd_sql.read_sql(query, connection_uri) # grab data as a dataframe
df = df.reset_index(drop=True)
df

Unnamed: 0,url_id,route,location,url,avg_stars,your_stars,route_type,rating,pitches,length_,area_latitude,area_longitude,grade,area_name,star_ratings,suggested_ratings,on_to_do_lists,ticks,log_star_ratings,log_ticks,log_avg_stars,log_length,log_grade,log_on_to_do_lists,sqrt_star_ratings,sqrt_ticks,sqrt_avg_stars,sqrt_length,sqrt_grade
0,106277533,The Way Down,Birthday Boulders > Buttermilks Main > Butterm...,https://www.mountainproject.com/route/10627753...,1.3,-1,Boulder,V-easy,1,10.0,37.33004,-118.57901,-1.00,buttermilks,30,5,8,52,1.491362,1.724276,0.361728,1.041393,0.000000,0.954243,5.477226,7.211103,1.140175,3.162278,0.000000
1,107253304,East Face,Sunshine Boulder > Buttermilks Main > Buttermi...,https://www.mountainproject.com/route/10725330...,1.9,-1,Boulder,V-easy,1,,37.32859,-118.57516,-1.00,buttermilks,14,4,8,30,1.176091,1.491362,0.462398,,0.000000,0.954243,3.741657,5.477226,1.378405,,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2387,108056142,Roasted and Raw,Moonstone > Inner Circle > The Underground > Q...,https://www.mountainproject.com/route/10805614...,4.0,-1,Boulder,V12,1,16.0,34.06942,-116.09196,12.00,joshua_tree,1,1,6,0,0.301030,0.000000,0.698970,1.230449,1.146128,0.845098,1.000000,0.000000,2.000000,4.000000,3.605551
2388,106104512,Iron Resolution,Iron Resolution Block > Gunsmoke Area > Barker...,https://www.mountainproject.com/route/10610451...,3.5,-1,Boulder,V12+,1,25.0,34.02850,-116.14510,12.25,joshua_tree,2,2,20,0,0.477121,0.000000,0.653213,1.414973,1.153815,1.322219,1.414214,0.000000,1.870829,5.000000,3.640055
