# Grab historical weather data & elevation form addresses in California

In [1]:
#Import custom Google Maps and Weatherbit utlity functions
from utility import *
import pandas as pd
from tabulate import tabulate

## Import csv of California vineyard locations
http://www.discovercaliforniawines.com provides a directory of vineyards in California. I used this directory to collect addresses of vineyards around California. 

Some of the addresses listed were showrooms, so I filtered those out of the list since we only want to collect information on where the grapes are grown.

## Adding in non-vineyards
I then added in addresses of locations in California which aren't vineyards. I realize that this isn't perfect because a **certain location might be great for a vineyard, but just doesn't happen to have one located there.** Because of this, I don't expect my model to reach extremley high accuracy results because the data will be a bit noisy. 

In [2]:
#Import the csv of addresses (shuffle the rows)
address_df = pd.read_csv('california_vineyards.csv', encoding='cp1252').sample(frac=1)

#Pretty print the address dataframe
print(tabulate(address_df.head(10), headers=['Address', 'Name', 'Address'], tablefmt= 'grid'))

+-----+-----------+---------------------------------+--------------------------------------------+
|     |   Address | Name                            | Address                                    |
|  45 |         0 | nan                             | 4971 Rocky Rd, San Andreas, CA 95249       |
+-----+-----------+---------------------------------+--------------------------------------------+
| 635 |         1 | Heritage Oak Winery             | 10112 E. Woodbridge Road, Acampo, CA 95220 |
+-----+-----------+---------------------------------+--------------------------------------------+
| 405 |         0 | nan                             | 0 Paseo Chaparro, Murrieta, CA 92562       |
+-----+-----------+---------------------------------+--------------------------------------------+
| 884 |         1 | William Hill Estate Winery      | 1761 Atlas Peak Road, Napa, CA 94558       |
+-----+-----------+---------------------------------+--------------------------------------------+
|  31 |   

# Use Google Maps & WeatherBit APIs to gather data on addresses
Note that both these APIs have rate limits. To run this yourself, you'll need to reduce the number of addresses passed to the land_data() function or upgrade from basic accounts with Google Maps & Weatherbit.

## Google Maps data collected
* Latitude & longitude
* Matrix of elevation points based on 1km area around latitude & longitude coordinates

## Weatherbit data collected
* Wind direction
* Wind speed
* Precipitation
* Average temperature
* Minimum temperature
* Max temperature
* Cloud coverage
* GHI (Global Horizontal Irradiance) - aka solar radiation
* RH (Relative humidity)

In [10]:
def land_data(df, new_df):
    import numpy as np
    import time
    from datetime import timedelta
    
    year_offset = timedelta(days=364) #I know, I know, there aren't 364 days in a year, but this accounts for leapyears + my limited 1year of historical data
    end_date = pd.to_datetime('today')
    start_date = end_date - year_offset

    #1-year of dates as list
    d = pd.date_range(start=start_date, end=end_date, freq='D')

    #A variable to store the last date to use in the range of the weather api data
    last_date_str = 0

    for index, row in df.iterrows():
        address = row['Address']
                
        #First check to see if this address already exists
        if(elevation_weather_df['Address'].str.contains(address).any() == False):

            #if(elevation_weather_df['Address'].str.contains(address).any() == False):
            print('Collecting data for address: ' + str(address))

            #Get numerical latitute and longitude values
            lat, lng = lat_lng(address)

            #Create blank arrays to store weather data for each address
            wind_dir = np.array([])
            wind_spd = np.array([])
            precip = np.array([])
            temp = np.array([])
            min_temp = np.array([])
            max_temp = np.array([])
            clouds = np.array([])
            ghi = np.array([])
            rh = np.array([])

            elevation = np.array([elevation_matrix(lat, lng)])

            #Collect weather data for all dates over the last year
            for date in d[:]:

                #format the date as a string - truncate to the first 10 characters
                date_str = str(date)[:10]  

                #Get day as int
                day = int(date_str[-2:])

                #Grab data every 5 days
                skip_days = 5

                if ((last_date_str != 0) & (day % skip_days == 0)):

                    #Get a dictionary of weather data based off a day
                    weather_data = weather_hist(last_date_str, date_str, lat, lng)

                    #Grab elements from the weather_data dictionary
                    #address_elevation = np.array([elevation_matrix(lat, lng)])
                    wind_dir = np.append(wind_dir, weather_data.get('wind_dir'))
                    wind_spd = np.append(wind_spd, weather_data.get('wind_spd'))
                    precip = np.append(precip, weather_data.get('precip'))
                    temp = np.append(temp, weather_data.get('temp'))
                    min_temp = np.append(min_temp, weather_data.get('min_temp'))
                    max_temp = np.append(max_temp, weather_data.get('max_temp'))
                    clouds = np.append(clouds, weather_data.get('clouds'))
                    ghi = np.append(ghi, weather_data.get('ghi'))
                    rh = np.append(rh, weather_data.get('rh'))

                #Save this date to be used as the start date for the next API call
                last_date_str = date_str

            #Update the new land details dataframe
            new_df.loc[len(new_df)] = row['Vineyard'], row['Address'], row['Name'], (lat, lng), elevation, wind_dir, wind_spd, precip, temp, min_temp, max_temp, clouds, ghi, rh 

        last_date_str = 0
        
        #The pkl file is used by build_vineyard_scout.ipynb to build our predictive model
        with open('california_vineyards_elevation_weather.pkl', 'wb') as f: 
            pickle.dump(elevation_weather_df, f)

        #The csv is exported as a friendly format in case we want to view the data outside of python
        new_df.to_csv('california_vineyards_elevation_weather.csv', index=False)
        

## Store elevation & weather data in a dataframe
Building this information takes a couple hours with multiple API calls. Because of this, I set the file up to allow for interuptions in that process. First, see if there's already a saved pkl file of the dataframe, skip any duplicated rows, and then build off of that same dataframe.

Update the pkl and csv file on each loop of an address

In [11]:
import pickle

#First see if there's a saved version of the dataframe
try:
    with open('california_vineyards_elevation_weather.pkl', 'rb') as f: 
        elevation_weather_df = pickle.load(f)
        
    print('Found saved pkl file of weather & elevation data to build off of...')
        
#If that file doesn't exist, create a new dataframe
except Exception:
    print('No existing saved pkl file of weather & elevation data to build off of. Starting from scratch')

    #Columns for blank dataframe
    land_data_cols = ['IsVineyard','Address','Name','MapCoords','ElevationMatrix','WindDir','WindSpd','Precip','Temp', 'MinTemp','MaxTemp','Clouds','GHI','RH']

    #Create blank train and test land dataframes
    elevation_weather_df = pd.DataFrame(columns=land_data_cols)


land_data(address_df, elevation_weather_df)

Found saved pkl file of weather & elevation data to build off of.
Collecting data for address: 1761 Atlas Peak Road, Napa, CA 94558
Collecting data for address: 6156 Shawnee Ln, Magalia, CA 95954
Collecting data for address: 4202 Dry Creek Road, Healdsburg, CA 95448
Collecting data for address: 1950 El Pomar Drive, Templeton, CA 93465
Collecting data for address: 3830 Skyhawk Ln, Vacaville, CA 95688
Collecting data for address: 74355 Copperhead Rd, Bradley, CA 93426
Collecting data for address: 8249 Millard Canyon Rd, Banning, CA 92220
Collecting data for address: 5251 Paddy Hill Rd, Mariposa, CA 95338
Collecting data for address: 26550 Pointe Ct, Pioneer, CA 95666
Collecting data for address: 2335 Geysers Road, Geyserville, CA 95441
Collecting data for address: 4680 Wallace Rd N, Santa Rosa, CA 95404
Collecting data for address: 8599 Ocean View Rd, Ventura, CA 93001
Collecting data for address: 19455 Cajalco Rd, Perris, CA 92570
Collecting data for address: 1099 Greenfield Road, St. H

Collecting data for address: 635 West Hollow Dr, Paso Robles, CA 93446
Collecting data for address: 22629 Country View Dr, San Jose, CA 95120
Collecting data for address: 1969 Joseph Dr. Moraga, CA 94556
Collecting data for address: 1277 Dwyer Road, Oakville, CA 94562
Collecting data for address: 2916 Laguna Rd, Forestville, CA 95436
Collecting data for address: 676 Newsom Springs Rd, Arroyo Grande, CA 93420
Collecting data for address: 350 Mayhews Rd, Fremont, CA 94536
Collecting data for address: 1 Carson, Lucerne Valley, CA 92356
Collecting data for address: 22001 Santa Susana Pass Rd, Chatsworth, CA 91311
Collecting data for address: 7388 Valaho Ln, Tujunga, CA 91042
Collecting data for address: 12500 Steiner Road, Plymouth, CA 95669
Collecting data for address: 16815 Oceanview Dr, Smith River, CA 95567
Collecting data for address: Lake Annie Rd, Fort Bidwell, CA 96112
Collecting data for address: 37320 De Portola, Temecula, CA 92590
Collecting data for address: 44 Colson Canyon Rd

Collecting data for address: 9339 Adelaida Road, Paso Robles, CA 93446
Collecting data for address: 4996 Harvest Moon, Portola, CA 96122
Collecting data for address: 22999 Perimeter Rd, Grass Valley, CA 95949
Collecting data for address: 0 Dry Canyon Cold Crk, Calabasas, CA 91302
Collecting data for address: 1311 Pine St, Calistoga, CA 94515
Collecting data for address: 91 Eden Canyon Rd, Berry Creek, CA 95916
Collecting data for address: 15000 Sonoma Highway, Glen Ellen, CA 95442
Collecting data for address: 15630 Buckeye Rd, Nevada City, CA 95959
Collecting data for address: 1150 Julian Orchards Dr, Julian, CA 92036
Collecting data for address: 849 Zinfandel Lane, St. Helena, CA 94574
Collecting data for address: 6905 Foxen Canyon Road, Los Olivos, CA 93441
Collecting data for address: 10628 Wilton Rd, Elk Grove, CA 95624
Collecting data for address: 3022 St. Helena Highway North, St. Helena, CA 94574
Collecting data for address: 4285 Blackhawk Dr, Willits, CA 95490
Collecting data f

Collecting data for address: 14300 North DeVries Road, Lodi, CA 95242
Collecting data for address: 17023 Voorhes, Ramona, CA 92065
Collecting data for address: 18596 Lomita Avenue, Sonoma, CA 95476
Collecting data for address: 3555 Roblar Ave, Santa Ynez, CA 93460
Collecting data for address: 3920 Hecker Pass Hwy, Gilroy, CA 95020
Collecting data for address: 1189 Green Valley Road, Napa, CA 94559
Collecting data for address: 000 Summit Crest Dr, Santee, CA 92071
Collecting data for address: 24805 Piuma Rd, Malibu, CA 90265
Collecting data for address: 0 Trailblazer Ln, Paso Robles, CA 93446
Collecting data for address: 11000 Shenandoah Road Plymouth, CA 95669
Collecting data for address: 6020 Foxen Canyon Road, Santa Maria, CA 93454
Collecting data for address: 33230 Wright Rd, Menifee, CA 92584
Collecting data for address: 3780 Spring Mountain Road, St. Helena, CA 94574
Collecting data for address: 4089 Silverado Trail, Napa, CA 94558
Collecting data for address: 5625 Cloverdale Rd, 

Collecting data for address: 7340 Drake Road, Paso Robles, CA 93446
Collecting data for address: 0 E Sulphur Mountain Rd, Ojai, CA 93023
Collecting data for address: 420 Old Mount Road, Felton, CA 95018
Collecting data for address: 5 Acres Cherry Acres Cir, Cool, CA 95614
Collecting data for address: 6097 Bennett Valley Road, Santa Rosa, CA 95404
Collecting data for address: 4242 Big Ranch Road, Napa, CA 94558
Collecting data for address: 12001 South Highway 99, Manteca, CA 95336
Collecting data for address: 3525 Adelaida Road, Paso Robles, CA 93446
Collecting data for address: 1 Hummingbird Lane, San Martin, CA 95046
Collecting data for address: 340 Kings Mountain Road, Woodside, CA 94062
Collecting data for address: 973 Old Topanga Canyon Rd, Topanga, CA 90290
Collecting data for address: 57332 Joshua Ln, Yucca Valley, CA 92284
Collecting data for address: 1001 Cedar Ave, Trinidad, CA 95570
Collecting data for address: 19550 Geyserville Avenue, Geyserville, CA 95441
Collecting data f

Collecting data for address: 0 Old Cazadero Rd, Guerneville, CA 95446
Collecting data for address: 14000 Tomki Rd, Redwood Valley, CA 95470
Collecting data for address: 20409 Symphony Dr, Riverside, CA 92507
Collecting data for address: 12225 Steiner Road, Plymouth, CA 95669
Collecting data for address: 170 Spring View Rd, La Selva Beach, CA 95076
Collecting data for address: 300 Via Archimedes, Geyserville, CA 95441
Collecting data for address: 620 Redwood Ave, Ukiah, CA 95482
Collecting data for address: 0 Old Coach Rd,Temecula, CA 92592
Collecting data for address: 1010 Willowcreek Ln, Fallbrook, CA 92028
Collecting data for address: 42041 20th St. West Lancaster, CA 93534
Collecting data for address: 5795 Silverado Trail, Napa, CA 94558
Collecting data for address: 7177 Nicki Trl, Mountain Ranch, CA 9524
Collecting data for address: 5443 Tesla Road, Livermore, CA 94550
Collecting data for address: 15 Cantera Run, Carmel, CA 93923
Collecting data for address: 24505 Chianti Road, Clo

Collecting data for address: 39870 De Portola Road, Temecula, CA 92592
Collecting data for address: 15913 Fay Rd, Grass Valley, CA 95949
Collecting data for address: 2401 Refugio Rd, Los Olivos, CA 93441
Collecting data for address: 1520 Kiler Canyon Rd, Paso Robles, CA 93446
Collecting data for address: 17 El Chaval Pl, Temecula, CA 92590
Collecting data for address: 00 Martins Ferry Rd, Hoopa, CA 95546
Collecting data for address: 24060 Summit Road, Los Gatos, CA 95033
Collecting data for address: 22645 Garrod Road, Saratoga, CA 95070
Collecting data for address: 120 Jacquier Ct, Placerville, CA 95667
Collecting data for address: 26600 Reiff Rd, Lower Lake, CA 95457
Collecting data for address: 12470 Yerba Buena Rd, Malibu, CA 90265
Collecting data for address: 11450 Brooks Road Windsor, CA 95492
Collecting data for address: 2121 Diamond Mountain Road, Calistoga, CA 94515
Collecting data for address: 8218 W August Rd, Turlock, CA 95380
Collecting data for address: 13500 S. Highway 10



Collecting data for address: 9592 Sonoma Hwy (Hwy 12), Kenwood, CA 95452
Collecting data for address: 19320 State Route 89, Markleeville, CA 96120
Collecting data for address: 2777 Hidden Mountain Road, Paso Robles, CA 93446
Collecting data for address: 0 Ridgeview Rd, Willits, CA 95490
Collecting data for address: 7801 St. Helena Hwy 29, Oakville, CA 94562
Collecting data for address: 4791 Dry Creek Road, Healdsburg, CA 95448
Collecting data for address: 1100 Larkmead Lane, Calistoga, CA 94515
Collecting data for address: 330 Stone Ridge Road, Angwin, CA 94508
Collecting data for address: 3810 E. Highway 46, Paso Robles, CA 93447
Collecting data for address: 7525 Orcutt Road, San Luis Obispo, CA 93401
Collecting data for address: 16186 Candace Ln, Nevada City, CA 95959
Collecting data for address: 0 Vine Hill Rd, Santa Cruz, CA 95065
Collecting data for address: 3360 River Road, Windsor, CA 95492
Collecting data for address: 2959 Gravenstein Hwy N, Sebastopol, CA 95472
Collecting data