# Grab historical weather data & elevation form addresses in California

In [5]:
#Import custom Google Maps and Weatherbit utlity functions
from utility import *

import pandas as pd
from tabulate import tabulate
import time
from datetime import timedelta

## Import csv of California vineyard locations
http://www.discovercaliforniawines.com provides a directory of vineyards in California. I used this directory to collect addresses of vineyards around California. 

Some of the addresses listed were showrooms, so I filtered those out of the list since we only want to collect information on where the grapes are grown.

## Adding in non-vineyards
I then added in addresses of locations in California which aren't vineyards. I realize that this isn't perfect because a **certain location might be great for a vineyard, but just doesn't happen to have one located there.** Because of this, I don't expect my model to reach extremley high accuracy results because the data will be a bit noisy. 

In [6]:
#Import the csv of addresses (shuffle the rows)
address_df = pd.read_csv('california_vineyards.csv', encoding='cp1252').sample(frac=1)

#Pretty print the address dataframe
print(tabulate(address_df.head(10), headers=['Address', 'IsVineyard', 'Name'], tablefmt= 'grid'))

#Split dataframe into a training & test set
train_df = address_df[0:-30]
test_df = address_df[-30:]

+-----+-----------+---------------------------------+-------------------------------------------------------+
|     |   Address | IsVineyard                      | Name                                                  |
| 842 |         1 | Talley Vineyards                | 3031 Lopez Drive, Arroyo Grande, CA 93420             |
+-----+-----------+---------------------------------+-------------------------------------------------------+
| 149 |         0 | nan                             | 16186 Candace Ln, Nevada City, CA 95959               |
+-----+-----------+---------------------------------+-------------------------------------------------------+
| 420 |         0 | nan                             | 40633 Little River Airport Rd, Little River, CA 95456 |
+-----+-----------+---------------------------------+-------------------------------------------------------+
| 571 |         1 | David Coffaro Vineyard & Winery | 7485 Dry Creek Road, Geyserville, CA 95441            |
+-----+---

# Use Google Maps & WeatherBit APIs to gather data on addresses
Note that both these APIs have rate limits. To run this yourself, you'll need to reduce the number of addresses passed to the land_data() function or upgrade from basic accounts with Google Maps & Weatherbit.

## Google Maps data collected
* Latitude & longitude
* Matrix of elevation points based on 1km area around latitude & longitude coordinates

## Weatherbit data collected
* Wind direction
* Wind speed
* Precipitation
* Average temperature
* Minimum temperature
* Max temperature
* Cloud coverage
* GHI (Global Horizontal Irradiance) - aka solar radiation
* RH (Relative humidity)

In [7]:
def land_data(df):
    import numpy as np
    year_offset = timedelta(days=364) #I know, I know, there aren't 364 days in a year, but this accounts for leapyears + my limited 1year of historical data
    end_date = pd.to_datetime('today')
    start_date = end_date - year_offset

    #1-year of dates as list
    d = pd.date_range(start=start_date, end=end_date, freq='D')

    #A variable to store the last date to use in the range of the weather api data
    last_date_str = 0

    #Initialize np.arrays variables which will eventually be fed into our keras model
    is_vineyard = np.array([])
    elevation = np.array([])
    map_coords = np.array([])
    wind_dir = np.array([])
    wind_spd = np.array([])
    precip = np.array([])
    temp = np.array([])
    min_temp = np.array([])
    max_temp = np.array([])
    clouds = np.array([])
    ghi = np.array([])
    rh = np.array([])

    for index, row in df.iterrows():
        address = row['Address']
        print('Collecting data for address: ' + str(address))

        #Get numerical latitute and longitude values
        lat, lng = lat_lng(address)

        #Create blank arrays to store weather data for each address
        address_wind_dir = np.array([])
        address_wind_spd = np.array([])
        address_precip = np.array([])
        address_temp = np.array([])
        address_min_temp = np.array([])
        address_max_temp = np.array([])
        address_clouds = np.array([])
        address_ghi = np.array([])
        address_rh = np.array([])

        #Collect weather data for all dates over the last year
        for date in d[:]:

            #format the date as a string - truncate to the first 10 characters
            date_str = str(date)[:10]  

            #Get day as int
            day = int(date_str[-2:])

            #Grab data every 5 days
            skip_days = 5

            #Only grab digits if they
            if ((last_date_str != 0) & (day % skip_days == 0)):

                #Get a dictionary of weather data based off a day
                weather_data = weather_hist(last_date_str, date_str, lat, lng)

                #Grab elements from the weather_data dictionary
                address_wind_dir = np.append(address_wind_dir, weather_data.get('wind_dir'))
                address_wind_spd = np.append(address_wind_spd, weather_data.get('wind_spd'))
                address_precip = np.append(address_precip, weather_data.get('precip'))
                address_temp = np.append(address_temp, weather_data.get('temp'))
                address_min_temp = np.append(address_min_temp, weather_data.get('min_temp'))
                address_max_temp = np.append(address_max_temp, weather_data.get('max_temp'))
                address_clouds = np.append(address_clouds, weather_data.get('clouds'))
                address_ghi = np.append(address_ghi, weather_data.get('ghi'))
                address_rh = np.append(address_rh, weather_data.get('rh'))

            #Save this date to be used as the start date for the next API call
            last_date_str = date_str

        last_date_str = 0

        #Append boolean is_vineyard value to an array which will be our dependant variable into our model 
        is_vineyard = np.append(is_vineyard, row['Vineyard'])   

        #Append matrix of elevation points for lat long values
        if(len(elevation) == 0):
            elevation = np.array([elevation_matrix(lat, lng)])
        else:
            elevation = np.concatenate([elevation, np.array([elevation_matrix(lat, lng)])], axis=0)

        #Append latitude & longitude values to an array which we'll feed into our model 
        if(len(map_coords) == 0):
            map_coords = (np.array([lat,lng]))
        else:
            map_coords = np.vstack([map_coords,np.array([lat,lng])])

        #Append each address's weather data to arrays which we'll feed into our model
        if(len(wind_dir) == 0):
            wind_dir = ([address_wind_dir])
        else:
            wind_dir = np.vstack([wind_dir,address_wind_dir])

        if(len(wind_spd) == 0):
            wind_spd = ([address_wind_spd])
        else:
            wind_spd = np.vstack([wind_spd,address_wind_spd]) 

        if(len(precip) == 0):
            precip = ([address_precip])        
        else:
            precip = np.vstack([precip,address_precip]) 

        if(len(temp) == 0):
            temp = ([address_temp])
        else:
            temp = np.vstack([temp,address_temp]) 

        if(len(min_temp) == 0):
            min_temp = ([address_min_temp])
        else:
            min_temp = np.vstack([min_temp,address_min_temp])

        if(len(max_temp) == 0):
            max_temp = ([address_max_temp])
        else:
            max_temp = np.vstack([max_temp,address_max_temp]) 

        if(len(clouds) == 0):
            clouds = ([address_clouds])
        else:
            clouds = np.vstack([clouds,address_clouds]) 

        if(len(ghi) == 0):
            ghi = ([address_ghi])
        else:
            ghi = np.vstack([ghi,address_ghi])

        if(len(rh) == 0):
            rh = ([address_rh])
        else:
            rh = np.vstack([rh,address_rh])
    
    #Return variables
    return is_vineyard, map_coords, elevation, wind_dir, wind_spd, precip, temp, min_temp, max_temp, clouds, ghi, rh

In [8]:
is_vineyard_train, map_coords_train, elevation_train, wind_dir_train, wind_spd_train, precip_train, temp_train, min_temp_train, max_temp_train, clouds_train, ghi_train, rh_train = land_data(train_df)

Collecting data for address: 3031 Lopez Drive, Arroyo Grande, CA 93420
Collecting data for address: 16186 Candace Ln, Nevada City, CA 95959
Collecting data for address: 40633 Little River Airport Rd, Little River, CA 95456


KeyboardInterrupt: 

## Save train & test variables

In [9]:
# Save variable objects:
with open('vineyard_train.pkl', 'wb') as f: 
    pickle.dump([is_vineyard_train, map_coords_train, elevation_train, wind_dir_train, wind_spd_train, precip_train, temp_train, min_temp_train, max_temp_train, clouds_train, ghi_train, rh_train], f)

In [10]:
is_vineyard_test, map_coords_test, elevation_test, wind_dir_test, wind_spd_test, precip_test, temp_test, min_temp_test, max_temp_test, clouds_test, ghi_test, rh_test = land_data(test_df)

Collecting data for address: 8419 Airola, Vallecito, CA 95251
Collecting data for address: 850 Rutherford Road, Rutherford, CA 94574
Collecting data for address: 8500 Dry Creek Road, Geyserville, CA 95441
Collecting data for address: 8533 Dry Creek Road, Healdsburg, CA 95448
Collecting data for address: 8585 Cross Canyons Road, San Miguel, CA 93451
Collecting data for address: 8599 Ocean View Rd, Ventura, CA 93001
Collecting data for address: 8605 State Highway 16, Brooks, CA 95606
Collecting data for address: 8644 Highway 128, Healdsburg, CA 95448
Collecting data for address: 8711 Silverado Trail, St. Helena, CA 94574
Collecting data for address: 8761 Dry Creek Road, Healdsburg, CA 96448
Collecting data for address: 8900 Sunset Rd, Joshua Tree, CA 92252
Collecting data for address: 8910 Adelaida Road, Paso Robles, CA 93446
Collecting data for address: 90 Grey Fox Lane, Oroville, CA 95966
Collecting data for address: 9010 E. Harney Lane Lodi, CA 95240
Collecting data for address: 91 Ed

In [11]:
# Save variable objects:
with open('vineyard_test.pkl', 'wb') as f: 
    pickle.dump([is_vineyard_test, map_coords_test, elevation_test, wind_dir_test, wind_spd_test, precip_test, temp_test, min_temp_test, max_temp_test, clouds_test, ghi_test, rh_test], f)