# Run this on an AMI as in the instructions.md file
Then the postgres tables will be set up and this code will pull directly from those. 

In [91]:
# Install vincenty
# https://pypi.python.org/pypi/vincenty/0.1.4
!pip install vincenty

[33mYou are using pip version 8.1.2, however version 9.0.1 is available.
You should consider upgrading via the 'pip install --upgrade pip' command.[0m


In [146]:
# IMPORTS
from __future__ import absolute_import, print_function, unicode_literals
import psycopg2
from sqlalchemy import create_engine
import socket
import vincenty
import pandas as pd

Installing vincenty only needs to happen once in this notebook.  And it should not be in the final .py file.  The install will be part of the initial implementation instructions.

In [93]:
#import vincenty

Extract pandas dataframes from postgres tables

In [94]:
# Set up connection to postgres tables
db_loc = 'postgresql+psycopg2://postgres:pass@localhost:5432/solarenergy'
engine = create_engine(db_loc)

In [95]:
# Extract data and load dataframes
plants_df = pd.read_sql('SELECT * FROM plants', engine, index_col = "plant_id")
solar_df = pd.read_sql('SELECT * FROM solar_locations', engine, index_col = "loc_id")
generation_df = pd.read_sql('SELECT * FROM generation', engine, index_col = "index")
stations_df = pd.read_sql('SELECT * FROM weather_stations', engine, index_col = "wban_id")
uscrn_df = pd.read_sql('SELECT * FROM uscrn_monthly', engine)
plant_locations_df = pd.read_sql('SELECT loc_id, latitude, longitude FROM generation GROUP BY loc_id, latitude, longitude', engine, index_col = 'loc_id')
weather_locations_df = pd.read_sql('SELECT wban_id, latitude, longitude FROM weather_stations GROUP BY wban_id, latitude, longitude', engine, index_col = 'wban_id')

In [96]:
# Take a look at any of the dataframes as needed
generation_df

Unnamed: 0_level_0,loc_id,latitude,longitude,lat_lon,full_date,year,month,mwh
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0,ELEC.PLANT.GEN.60058-SUN-ALL.M,34.831511,-87.842204,"34.831511,-87.842204",201609,2016,09,467.0
1,ELEC.PLANT.GEN.116-SUN-ALL.M,33.4225,-111.9122,"33.4225,-111.9122",201609,2016,09,56.0
2,ELEC.PLANT.GEN.116-SUN-ALL.M,33.4225,-111.9122,"33.4225,-111.9122",201608,2016,09,64.0
3,ELEC.PLANT.GEN.116-SUN-ALL.M,33.4225,-111.9122,"33.4225,-111.9122",201607,2016,09,70.0
4,ELEC.PLANT.GEN.116-SUN-ALL.M,33.4225,-111.9122,"33.4225,-111.9122",201606,2016,09,70.0
5,ELEC.PLANT.GEN.116-SUN-ALL.M,33.4225,-111.9122,"33.4225,-111.9122",201605,2016,09,82.0
6,ELEC.PLANT.GEN.116-SUN-ALL.M,33.4225,-111.9122,"33.4225,-111.9122",201604,2016,09,64.0
7,ELEC.PLANT.GEN.116-SUN-ALL.M,33.4225,-111.9122,"33.4225,-111.9122",201603,2016,09,57.0
8,ELEC.PLANT.GEN.116-SUN-ALL.M,33.4225,-111.9122,"33.4225,-111.9122",201602,2016,09,46.0
9,ELEC.PLANT.GEN.116-SUN-ALL.M,33.4225,-111.9122,"33.4225,-111.9122",201601,2016,09,33.0


In [97]:
plant_locations_df

Unnamed: 0_level_0,latitude,longitude
loc_id,Unnamed: 1_level_1,Unnamed: 2_level_1
ELEC.PLANT.GEN.57141-SUN-ALL.M,40.591099,-105.145904
ELEC.PLANT.GEN.58915-SUN-ALL.M,35.889419,-114.928333
ELEC.PLANT.GEN.57554-SUN-ALL.M,38.957901,-104.806627
ELEC.PLANT.GEN.58516-SUN-ALL.M,32.936944,-111.738611
ELEC.PLANT.GEN.59778-SUN-ALL.M,35.148503,-79.636372
ELEC.PLANT.GEN.58754-SUN-ALL.M,34.723056,-118.289722
ELEC.PLANT.GEN.58708-SUN-ALL.M,33.45,-111.98
ELEC.PLANT.GEN.58701-SUN-ALL.M,35.738889,-81.207778
ELEC.PLANT.GEN.58588-SUN-ALL.M,35.313333,-106.657778
ELEC.PLANT.GEN.57335-SUN-ALL.M,36.1053,-79.9692


In [98]:
weather_locations_df

Unnamed: 0_level_0,latitude,longitude
wban_id,Unnamed: 1_level_1,Unnamed: 2_level_1
23802,31.88,-87.73
53010,38.14,-109.61
63858,32.45,-87.24
04131,43.66,-110.71
94085,44.01,-100.35
03739,37.29,-75.92
03063,37.86,-103.82
73801,33.21,-87.59
94081,45.51,-103.3
53026,32.51,-105.33


In [99]:
weather_locations_df.itertuples(index=True)

plant_locations_df.itertuples(index=True)

<itertools.imap at 0x7f125268ed10>

In [100]:
for row in weather_locations_df.itertuples():
    print(row)


Pandas(Index=u'23802', latitude=u'31.88', longitude=u'-87.73')
Pandas(Index=u'53010', latitude=u'38.14', longitude=u'-109.61')
Pandas(Index=u'63858', latitude=u'32.45', longitude=u'-87.24')
Pandas(Index=u'04131', latitude=u'43.66', longitude=u'-110.71')
Pandas(Index=u'94085', latitude=u'44.01', longitude=u'-100.35')
Pandas(Index=u'03739', latitude=u'37.29', longitude=u'-75.92')
Pandas(Index=u'03063', latitude=u'37.86', longitude=u'-103.82')
Pandas(Index=u'73801', latitude=u'33.21', longitude=u'-87.59')
Pandas(Index=u'94081', latitude=u'45.51', longitude=u'-103.3')
Pandas(Index=u'53026', latitude=u'32.51', longitude=u'-105.33')
Pandas(Index=u'54903', latitude=u'44.06', longitude=u'-90.17')
Pandas(Index=u'63829', latitude=u'31.19', longitude=u'-84.44')
Pandas(Index=u'03077', latitude=u'34.82', longitude=u'-109.89')
Pandas(Index=u'53002', latitude=u'32.85', longitude=u'-104.45')
Pandas(Index=u'53132', latitude=u'31.59', longitude=u'-110.5')
Pandas(Index=u'63850', latitude=u'33.78', longit

In [159]:
for item in weather_locations_df.itertuples():
    weather_id = item[0]
    #solar_loc = (float(item[1]), float(item[2]))
    weather_loc = (float(item[1]), float(item[2]))
    #weather_lat = float(item[1].encode('ascii'))
    #weather_long = float(item[2].encode('ascii'))
    #weather_loc = (int(item[1].encode('utf-8')), int(item[2].encode('utf-8')))
    #vincenty(solar_loc, weather_loc, miles=True)


In [160]:
for item in plant_locations_df.itertuples():
    solar_id = item[0]
    solar_loc = (float(item[1]), float(item[2]))
    

In [161]:
vincenty(solar_loc, weather_loc)

TypeError: 'module' object is not callable

In [None]:
# PSEUDOCODE

# NOTE: IT MIGHT BE easier to try the vincenty package with one solar location and the weather station list first.
# But maybe not. Use your judgment, of course.  

# First figure out how to extract unique lat/lon rows from solar_locations dataframe.
# This is turning out to be more complicated than I thought.  
# I've done this before but it's not working like I thought
# First I was thinking of creating a new dataframe with just the loc_id and lat_lon colums
unique1 = pd.DataFrame(generation_df["loc_id"])
unique2 = pd.DataFrame(generation_df["lat_lon"])
unique_locations_df = pd.concat(unique1, generation_df["lat_lon"])
# And that wasn't working but I'm sure there is a better way to do that.

# And then it should be possible to remove all the duplicates.
# I think this is as simple as: 
df.drop_duplicates()



In [None]:
pd.concat([df1['c'], df2['c']], axis=1, keys=['df1', 'df2'])

In [None]:
# Then iterate through all those locations:
for item in unique_locations_df.itertuples():
    id = item[0]
    name = item[1]
    # could print at this point to see what's happening

In [None]:
# Then extract the latitude and longtitue for each weather stations (also as a tuple)
# also pull the wban_id as an identifier.
# create another for loop, like above, but to access weather stations

# for each solar power plant, cycle through all the weather stations:
    # keep track of the distance returned AND the weather station 
    # if a new distance returned is closer, keep track of that.
    # use vincenty package

# Return each solar power plant connected with a weather station
# Out put will a dictionary
# Where keys are solar stations and values are weather stations