In [53]:
#Import Modules 
import os 
import csv 
import matplotlib.pyplot as plt
import pandas as pd
import scipy.stats as sts
import numpy as np
import gmaps
import requests
from citipy import citipy
import json

In [54]:
#Open Wine File 
csv_path = 'Resources/winemag-data.csv'

In [55]:
#Read wine data 
wine_data_pd = pd.read_csv(csv_path)

#Cleaning Data 
clean_data_pd = wine_data_pd.drop(['taster_name', 'taster_twitter_handle'], axis=1)
clean_data_pd = clean_data_pd.dropna(subset=['price', 'region_1','designation','points','price','title','description','variety'])
clean_data_pd.head()

Unnamed: 0.1,Unnamed: 0,country,description,designation,points,price,province,region_1,region_2,title,variety,winery
3,3,US,"Pineapple rind, lemon pith and orange blossom ...",Reserve Late Harvest,87,13.0,Michigan,Lake Michigan Shore,,St. Julian 2013 Reserve Late Harvest Riesling ...,Riesling,St. Julian
4,4,US,"Much like the regular bottling from 2012, this...",Vintner's Reserve Wild Child Block,87,65.0,Oregon,Willamette Valley,Willamette Valley,Sweet Cheeks 2012 Vintner's Reserve Wild Child...,Pinot Noir,Sweet Cheeks
5,5,Spain,Blackberry and raspberry aromas show a typical...,Ars In Vitro,87,15.0,Northern Spain,Navarra,,Tandem 2011 Ars In Vitro Tempranillo-Merlot (N...,Tempranillo-Merlot,Tandem
6,6,Italy,"Here's a bright, informal red that opens with ...",Belsito,87,16.0,Sicily & Sardinia,Vittoria,,Terre di Giurfo 2013 Belsito Frappato (Vittoria),Frappato,Terre di Giurfo
9,9,France,This has great depth of flavor with its fresh ...,Les Natures,87,27.0,Alsace,Alsace,,Jean-Baptiste Adam 2012 Les Natures Pinot Gris...,Pinot Gris,Jean-Baptiste Adam


In [56]:
#Drop all columns with NaN to find most exact locaton for region_2
us_data = clean_data_pd.dropna()
#us_data = us_data[us_data['country']=='US']

#Showing overview of data
us_data.head()

Unnamed: 0.1,Unnamed: 0,country,description,designation,points,price,province,region_1,region_2,title,variety,winery
4,4,US,"Much like the regular bottling from 2012, this...",Vintner's Reserve Wild Child Block,87,65.0,Oregon,Willamette Valley,Willamette Valley,Sweet Cheeks 2012 Vintner's Reserve Wild Child...,Pinot Noir,Sweet Cheeks
10,10,US,"Soft, supple plum envelopes an oaky structure ...",Mountain Cuvée,87,19.0,California,Napa Valley,Napa,Kirkland Signature 2011 Mountain Cuvée Caberne...,Cabernet Sauvignon,Kirkland Signature
23,23,US,This wine from the Geneseo district offers aro...,Signature Selection,87,22.0,California,Paso Robles,Central Coast,Bianchi 2011 Signature Selection Merlot (Paso ...,Merlot,Bianchi
25,25,US,Oak and earth intermingle around robust aromas...,King Ridge Vineyard,87,69.0,California,Sonoma Coast,Sonoma,Castello di Amorosa 2011 King Ridge Vineyard P...,Pinot Noir,Castello di Amorosa
33,33,US,"Rustic and dry, this has flavors of berries, c...",Puma Springs Vineyard,86,50.0,California,Dry Creek Valley,Sonoma,Envolve 2010 Puma Springs Vineyard Red (Dry Cr...,Red Blend,Envolve


In [57]:
#Finding the average score for each winery 
grouped_wine = us_data.groupby('winery')

#Finding the the province of each winery
province_wine = pd.DataFrame(grouped_wine['region_2'].unique())

#Finding the which wine comes from each country 
country_wine = pd.DataFrame(grouped_wine['country'].unique())

#Finding the average of the price 
mean_wine = pd.DataFrame(grouped_wine['price'].mean())

#Merging the dataframes
wine_pd = pd.merge(province_wine, country_wine, on = 'winery')

#Merging the tables together 
wine_pd = pd.merge(wine_pd, mean_wine, on = 'winery')

#Resetting the Index
wine_pd = wine_pd.reset_index()

#Setting Province as a string 
wine_pd['region_2'] = wine_pd['region_2'].str[0]

#Setting Country as a string 
wine_pd['country'] = wine_pd['country'].str[0]

In [58]:
#Displaying the Data Frame 
wine_pd.head()

Unnamed: 0,winery,region_2,country,price
0,10 Knots,Central Coast,US,28.0
1,100 Percent Wine,California Other,US,18.0
2,1000 Stories,California Other,US,19.0
3,12C Wines,Napa,US,92.6
4,14 Hands,Columbia Valley,US,20.897436


In [59]:
#Finding the wrost 200 wineries amongst the dataset
worst_us_wineries = wine_pd.nsmallest(200, 'price', keep = 'last')

#Displaying the 200 wrost wineries 
worst_us_wineries.count()

winery      200
region_2    200
country     200
price       200
dtype: int64

In [60]:
#Finding the best 200 wineries amongst the dataset 
best_us_wineries = wine_pd.nlargest(200, 'price', keep = 'last')

#Displaying the 50 wrost wineries 
best_us_wineries.head()

Unnamed: 0,winery,region_2,country,price
3655,Yao Ming,Napa,US,625.0
3440,Verité,Sonoma,US,373.333333
1737,Kapcsandy Family Winery,Napa,US,325.0
1577,Hundred Acre,Napa,US,300.0
164,Araujo,Napa,US,275.0


In [67]:
#Setting region_2 to a new variable 
region_2 = best_us_wineries['region_2']

#Setting country to a new variable 
country = best_us_wineries['country']

#Zipping together region_2 and country 
location = zip(region_2, country)

#Setting lat, lng, and city_list a list
lat = []
lng = []
city_list = []

#Api_key
api_key = 'd44516f8f6db4dab8c39e341cbdd2719' 

region_2

3655               Napa
3440             Sonoma
1737               Napa
1577               Napa
164                Napa
             ...       
1020               Napa
1007      Central Coast
957     Columbia Valley
792              Sonoma
656     Columbia Valley
Name: region_2, Length: 200, dtype: object

In [62]:
#Making a for loop to find the the latitude and longitude of each city in which the winery is located 
for location in location: 
    url = (f'https://api.opencagedata.com/geocode/v1/json?q={location[0]}%2C%20{location[1]}&key={api_key}')
    response = requests.get(url)
    cordinate_data = response.json()
    print(cordinate_data)
    #Appending the lat and long to their list 
    lat.append(cordinate_data['results'][0]['geometry']['lat'])
    lng.append(cordinate_data['results'][0]['geometry']['lng'])

In [63]:
#Adding Latitude and Longitude to the DataFrame 
best_us_wineries['Latitude']=lat
best_us_wineries['Longitude']=lng

#Zipping the cordinates together
cordinates = zip(lat,lng)

#Displaying the best_wineries DataFrame
best_us_wineries.head()

In [64]:
#Setting city_list as a list 
city_list = []

#Finding the city for the corresponding cordinates 
for cordinates in cordinates: 
    city = citipy.nearest_city(cordinates[0], cordinates[1]).city_name
    city_list.append(city)

#Finding the length of the list of cities 
len(city_list)

In [65]:
#Adding City to the best_us_wineries 
best_us_wineries['City'] = city_list

In [70]:
#Setting temp and hum and a list 
temp = []
hum = []

#Weather api key #Remember to put into another file 
weather_api_key = 'e7909cb56283b125242b31247d3922d8'

#Url of open weather map
url =  'http://api.openweathermap.org/data/2.5/weather?'

#Setting city to its own DataFrame 
city = best_us_wineries['City']

#Finding the temperature for the corresponding city 
for city in city:
    query_url = url + 'appid=' + weather_api_key + '&q=' + city + '&units=imperial'
    weather_response = requests.get(query_url)
    weather_data = weather_response.json()
    
    temp.append(weather_data['main']['temp_max'])

In [None]:
#Adding temperature to the best_us_wineries 
best_us_wineries['Temperature'] = temp

#Displaying the DataFrame 
best_us_wineries.head()

In [None]:
#Setting best_temp and best_price as a new variable 
best_temp = best_us_wineries['Temperature']
best_price = best_us_wineries['price']

In [None]:
region_2 = worst_us_wineries['region_2']
country = worst_us_wineries['country']

location = zip(region_2, country)

lat = []
lng = []
fips = []
city_list = []

api_key = 'd44516f8f6db4dab8c39e341cbdd2719' 

In [None]:
for location in location: 
    url = (f'https://api.opencagedata.com/geocode/v1/json?q={location[0]}%2C%20{location[1]}&key={api_key}')
    response = requests.get(url)
    cordinate_data = response.json()
    lat.append(cordinate_data['results'][0]['geometry']['lat'])
    lng.append(cordinate_data['results'][0]['geometry']['lng'])

In [31]:
worst_us_wineries['Latitude']=lat
worst_us_wineries['Longitude']=lng

ValueError: Length of values does not match length of index

In [32]:
worst_us_wineries.head()

Unnamed: 0,winery,region_2,country,price
2415,Pam's Cuties,California Other,US,4.0
2040,Mancan,California Other,US,5.0
1068,Earth's Harvest,California Other,US,5.0
1260,Fox Brook,California Other,US,6.0
3694,flipflop,California Other,US,7.0


In [33]:
weather_cord = {'latitude':lat,
               'longitude':lng}


weather_cord = pd.DataFrame(weather_cord)
lat = weather_cord['latitude']
lng = weather_cord['longitude']

cordinates = zip(lat,lng)

In [None]:
city_list = []

for cordinates in cordinates: 
    city = citipy.nearest_city(cordinates[0], cordinates[1]).city_name
    city_list.append(city)

#Finding the length of the list of cities 
len(city_list)

In [None]:
worst_us_wineries['City']= city_list
worst_us_wineries.head()

In [None]:
temp = []
hum = []

weather_api_key = 'e7909cb56283b125242b31247d3922d8'

city = worst_us_wineries['City']

url =  'http://api.openweathermap.org/data/2.5/weather?'

for city in city:
    query_url = url + 'appid=' + weather_api_key + '&q=' + city + '&units=imperial'
    weather_response = requests.get(query_url)
    weather_data = weather_response.json()
    
    temp.append(weather_data['main']['temp_max'])

In [None]:
worst_us_wineries['Temperature'] = temp

In [None]:
temp = worst_us_wineries['Temperature']
price = worst_us_wineries['price']

In [None]:
us_wineries_pd = pd.merge(worst_us_wineries, best_us_wineries, on = ['winery','region_2'\
,'country','price','Latitude','Longitude','City','Temperature'], how = 'outer')


In [None]:
us_temp = us_wineries_pd['Temperature']
us_price = us_wineries_pd['price']

(slope, intercept,  rvalue, pvalue, stderr) = sts.linregress(us_temp, us_price)
lin_reg = ('y = ' + str(round(slope,2)) + 'x + ' + str(round(intercept,2)))
lin_reg


In [None]:
fig = plt.figure()
ax1 = fig.add_subplot(111)

ax1.scatter(temp, price, s=10, c='b', marker="s", label='Worst 200 Wineries')
ax1.scatter(best_temp,best_price, s=10, c='r', marker="o", label='Best 200 Wineries')
plt.legend(loc='upper left')

plt.xlim(48,92)
plt.ylim(-20,700)

#Giving names to labels
plt.title('Temperature(°F) v Average Winery Price($)')
plt.xlabel('Temperature °F)')
plt.ylabel('Average Winery Price($)')

x_values = us_temp
regress_values = x_values * slope + intercept 
plt.plot(x_values, regress_values, 'r-')
plt.grid()
print(f'The r squared value is: {rvalue**2}')
plt.show()

In [None]:
data = [temp, best_temp]

#Plotting the box and whisker plot 
fig, ax = plt.subplots()
ax.set_title('Temperature of Wineries (°F)')
ax.boxplot(data, sym = 'r+')
plt.xticks([1, 2], ['Worst 50 Wineries','Best 50 Wineries'])
plt.show()

In [None]:
temp_mean = temp.mean()

In [None]:
best_temp_mean = best_temp.mean()

In [None]:
mean_sum = {'Mean Temperature(°F) Best 50 Wineries' : [best_temp_mean],
           'Mean Temperature(°F) Worst 50 Wineries' : [temp_mean]}

In [None]:
mean_sum_pd = pd.DataFrame(mean_sum)
mean_sum_pd