In [1]:
import pandas as pd
import pandas.io.sql as pd_sql
import psycopg2 as pg

# psychopg connection parameters
params = {'user': 'postgres', 'host': 'localhost', 'port': 5432, 'password': None}
connection = pg.connect(**params, dbname='mountain_project') # Connect

In [2]:
# Imports
buttermilks_clean_df = pd.read_csv("data/buttermilks.csv")
buttermilks_scrape_df = pd.read_csv("data/buttermilks-scrape.csv")
buttermilks_transform_df = pd.read_csv("data/buttermilks-transform.csv")
buttermilks_df = buttermilks_clean_df.join(buttermilks_scrape_df).join(buttermilks_transform_df)

druid_stones_clean_df = pd.read_csv("data/druid_stones.csv")
druid_stones_scrape_df = pd.read_csv("data/druid_stones-scrape.csv")
druid_stones_transform_df = pd.read_csv("data/druid_stones-transform.csv")
druid_stones_df = druid_stones_clean_df.join(druid_stones_scrape_df).join(druid_stones_transform_df)

happy_boulders_clean_df = pd.read_csv("data/happy_boulders.csv")
happy_boulders_scrape_df = pd.read_csv("data/happy_boulders-scrape.csv")
happy_boulders_transform_df = pd.read_csv("data/happy_boulders-transform.csv")
happy_boulders_df = happy_boulders_clean_df.join(happy_boulders_scrape_df).join(happy_boulders_transform_df)

sad_boulders_clean_df = pd.read_csv("data/sad_boulders.csv")
sad_boulders_scrape_df = pd.read_csv("data/sad_boulders-scrape.csv")
sad_boulders_transform_df = pd.read_csv("data/sad_boulders-transform.csv")
sad_boulders_df = sad_boulders_clean_df.join(sad_boulders_scrape_df).join(sad_boulders_transform_df)

In [3]:
# Concatenate, format, then select columns
bishop_df = buttermilks_df.append(druid_stones_df).append(happy_boulders_df).append(sad_boulders_df)
bishop_df = bishop_df.dropna().reset_index(drop=True)
bishop_df = bishop_df[["avg_stars", "length_", "grade", "star_ratings", "on_to_do_lists", "ticks"]]
bishop_df = bishop_df.rename(columns = {'avg_stars':'Avg Stars', 'grade':'Grade', 'length_':'Length', 'star_ratings':'StarRatings', 'on_to_do_lists':'OnToDoLists', 'ticks':'Ticks'})
bishop_df.head()

Unnamed: 0,Avg Stars,Length,Grade,StarRatings,OnToDoLists,Ticks
0,1.3,10.0,-1.0,30,8,52
1,2.0,15.0,-1.0,1,0,1
2,2.2,18.0,-0.25,41,11,87
3,2.8,18.0,-0.25,84,49,168
4,1.8,12.0,-0.25,46,12,102


In [7]:
query = """
(SELECT f.id_, f.avg_stars, f.length_, f.grade, s.star_ratings, s.on_to_do_lists, s.ticks
FROM buttermilks f, buttermilks_scrape s
WHERE f.id_=s.id_ AND f.* IS NOT NULL
ORDER BY id_)

UNION ALL

(SELECT f.id_, f.avg_stars, f.length_, f.grade, s.star_ratings, s.on_to_do_lists, s.ticks
FROM druid_stones f, druid_stones_scrape s
WHERE f.id_=s.id_ AND f.* IS NOT NULL
ORDER BY id_)

UNION ALL

(SELECT f.id_, f.avg_stars, f.length_, f.grade, s.star_ratings, s.on_to_do_lists, s.ticks
FROM happy_boulders f, happy_boulders_scrape s
WHERE f.id_=s.id_ AND f.* IS NOT NULL
ORDER BY id_)

UNION ALL

(SELECT f.id_, f.avg_stars, f.length_, f.grade, s.star_ratings, s.on_to_do_lists, s.ticks
FROM sad_boulders f, sad_boulders_scrape s
WHERE f.id_=s.id_ AND f.* IS NOT NULL
ORDER BY id_)
;
"""
bishop_db = pd_sql.read_sql(query, connection) # grab data as a dataframe
bishop_db = bishop_db.reset_index(drop=True)
bishop_db = bishop_db.drop(columns = 'id_').rename(columns = {'avg_stars':'Avg Stars', 'grade':'Grade', 'length_':'Length', 'star_ratings':'StarRatings', 'on_to_do_lists':'OnToDoLists', 'ticks':'Ticks'})

In [7]:
bishop_df.head() == bishop_db.head()

Unnamed: 0,Avg Stars,Length,Grade,StarRatings,OnToDoLists,Ticks
0,True,True,True,True,True,True
1,True,True,True,True,True,True
2,True,True,True,True,True,True
3,True,True,True,True,True,True
4,True,True,True,True,True,True
