In [1]:
# Dependencies
import pandas as pd 
import requests
import json
import random

# Google API Key
from config import gkey

file_path = "Resources/winemag-data_first150k.csv"
data = pd.read_csv("Resources/winemag-data_first150k.csv") 

# Preview the first 50 lines of the loaded data
data.head(50)


Unnamed: 0.1,Unnamed: 0,country,description,designation,points,price,province,region_1,region_2,variety,winery
0,0,US,This tremendous 100% varietal wine hails from ...,Martha's Vineyard,96,235.0,California,Napa Valley,Napa,Cabernet Sauvignon,Heitz
1,1,Spain,"Ripe aromas of fig, blackberry and cassis are ...",Carodorum Selección Especial Reserva,96,110.0,Northern Spain,Toro,,Tinta de Toro,Bodega Carmen Rodríguez
2,2,US,Mac Watson honors the memory of a wine once ma...,Special Selected Late Harvest,96,90.0,California,Knights Valley,Sonoma,Sauvignon Blanc,Macauley
3,3,US,"This spent 20 months in 30% new French oak, an...",Reserve,96,65.0,Oregon,Willamette Valley,Willamette Valley,Pinot Noir,Ponzi
4,4,France,"This is the top wine from La Bégude, named aft...",La Brûlade,95,66.0,Provence,Bandol,,Provence red blend,Domaine de la Bégude
5,5,Spain,"Deep, dense and pure from the opening bell, th...",Numanthia,95,73.0,Northern Spain,Toro,,Tinta de Toro,Numanthia
6,6,Spain,Slightly gritty black-fruit aromas include a s...,San Román,95,65.0,Northern Spain,Toro,,Tinta de Toro,Maurodos
7,7,Spain,Lush cedary black-fruit aromas are luxe and of...,Carodorum Único Crianza,95,110.0,Northern Spain,Toro,,Tinta de Toro,Bodega Carmen Rodríguez
8,8,US,This re-named vineyard was formerly bottled as...,Silice,95,65.0,Oregon,Chehalem Mountains,Willamette Valley,Pinot Noir,Bergström
9,9,US,The producer sources from two blocks of the vi...,Gap's Crown Vineyard,95,60.0,California,Sonoma Coast,Sonoma,Pinot Noir,Blue Farm


In [2]:
# Drop columns no needed
data.drop(columns=["Unnamed: 0", "description", "region_2"], inplace=True)

# Remove missing values
data.dropna(inplace=True)

In [4]:
# Sample for project purpose
sample = data.sample(n=10, axis=0, random_state = 4)
sample.reset_index(drop = True, inplace=True)

sample

Unnamed: 0,country,designation,points,price,province,region_1,variety,winery
0,Australia,Thorn,88,19.0,South Australia,Eden Valley,Riesling,Grant Burge
1,Italy,Doganella Il Tintorosso,87,15.0,Tuscany,Maremma,Red Blend,Tommasi
2,US,Reserve,87,28.0,California,Paso Robles,Cabernet Sauvignon,Castoro Cellars
3,US,Ellenbach Vineyard,90,60.0,California,Sonoma Coast,Pinot Noir,Etude
4,Italy,Il Monovitigno,88,30.0,Sicily & Sardinia,Sicilia,Nero d'Avola,Benanti
5,Italy,Centine,87,12.0,Tuscany,Toscana,Rosé,Banfi
6,Spain,Crianza,85,12.0,Central Spain,La Mancha,Red Blend,Finca Antigua
7,US,Gypsy,90,25.0,California,Santa Barbara County,Rhône-style Red Blend,Epiphany
8,US,Reserve,89,24.0,California,Livermore Valley,Syrah,Concannon
9,Italy,Le Vigne,86,118.0,Piedmont,Barolo,Nebbiolo,Sandrone


In [7]:
sample["latitud"] = ""
sample["longitud"] = ""
sample["elevation"] = ""


# Build URL using the Google Geocoding API
base_url = "https://maps.googleapis.com/maps/api/geocode/json"

# Iterate dataframe for filling Latitud and Longitud columns
for index, row in sample.iterrows():
    
    # Build winery location search
    winery_loc = row["winery"] + "," + row["region_1"]  + "," +  row["province"]
    
    # Parameters for API request
    params = {"address": winery_loc, "key": gkey}
    
    try:

        # Run request
        response = requests.get(base_url, params=params)

        # Convert to JSON
        geo_data = response.json()

        # Extract lat/lng and fill columns
        sample.loc[index, 'latitud'] = geo_data["results"][0]["geometry"]["location"]["lat"]
        sample.loc[index, 'longitud'] = geo_data["results"][0]["geometry"]["location"]["lng"]
    
        # print Winery location found
        print(f"Winery location {winery_loc} {sample.loc[index, 'latitud']} {sample.loc[index, 'longitud']} found")
        
        
    except (KeyError, IndexError):
        print("Winery not found" )
        
#test.head(10)

# Iterate dataframe for filling Elevation column
for index, row in sample.iterrows():
    lat = row["latitud"]
    lng = row["longitud"]
    
    try:
        # Build URL using the Google Elevation API
        url_ele = f"https://maps.googleapis.com/maps/api/elevation/json?locations={lat},{lng}&key={gkey}"
        
        # Run request
        response = requests.get(url_ele)

        # Convert to JSON
        geo_data = response.json()

        # Extract elevation and fill column
        sample.loc[index, 'elevation'] = geo_data["results"][0]["elevation"]
    
        # print Winery elevation found
        print(f"Winery elevation {sample.loc[index, 'elevation']} found")
        
        
    except (KeyError, IndexError):
        print("Elevation not found" )

#test.head(10)

sample.to_csv("Resources/winery_data.csv", index=False)


Winery location Grant Burge,Eden Valley,South Australia -34.5615916 138.9754373 found
Winery location Tommasi,Maremma,Tuscany 42.6086111 11.6966667 found
Winery location Castoro Cellars,Paso Robles,California 35.5772607 -120.722921 found
Winery location Etude,Sonoma Coast,California 38.2458215 -122.3150228 found
Winery location Benanti,Sicilia,Sicily & Sardinia 37.5999938 14.0153557 found
Winery location Banfi,Toscana,Tuscany 42.980621 11.400659 found
Winery location Finca Antigua,La Mancha,Central Spain 39.6310835 -2.8920419 found
Winery location Epiphany,Santa Barbara County,California 34.6669689 -120.1147479 found
Winery location Concannon,Livermore Valley,California 37.666199 -121.7397388 found
Winery location Sandrone,Barolo,Piedmont 44.6208903 7.954625300000001 found
Winery elevation 323.2424926757812 found
Winery elevation 354.9888916015625 found
Winery elevation 280.32470703125 found
Winery elevation 16.87372779846191 found
Winery elevation 622.5406494140625 found
Winery elevat