In [88]:
import os
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path
import hvplot.pandas
import warnings
warnings.filterwarnings("ignore")
from scipy import stats 
import numpy as np
import requests
import json


In [89]:
 #designate the csv file data path
data_load = Path ('../Resources/wine_data.csv')

# uniform language
wine_df = pd.read_csv(data_load,encoding="ISO-8859-1")

# removing duplicates
wine_df = wine_df.drop(["Unnamed: 7", "Unnamed: 8", "Unnamed: 9", "Unnamed: 10", "Unnamed: 11", "Unnamed: 12"], axis=1)
wine_df = wine_df.drop_duplicates()

# rennaming and capitalization of data
wine_df.rename(columns={'country':'Country'}, inplace=True)
wine_df.rename(columns={'description':'Description'}, inplace=True)
wine_df.rename(columns={'points':'Points'}, inplace=True)
wine_df.rename(columns={'price ($)':'Price ($)'}, inplace=True)
wine_df.rename(columns={'region':'Region'}, inplace=True)
wine_df.rename(columns={'title':'Title'}, inplace=True)
wine_df.rename(columns={'variety':'Variety'}, inplace=True)

#change types from objects to strings, anything that is a number to a float64/int
wine_df['Country']= wine_df['Country'].astype('string')
wine_df['Description']= wine_df['Description'].astype('string')
wine_df['Region']= wine_df['Region'].astype('string')
wine_df['Title']= wine_df['Title'].astype('string')
wine_df['Variety']= wine_df['Variety'].astype('string')
wine_df['Price ($)']=pd.to_numeric(wine_df['Price ($)'],errors='coerce')
wine_df['Points']=pd.to_numeric(wine_df['Points'],errors='coerce')

#drop unnecessary values
wine_df = wine_df.dropna(axis=0)

#print first five rows of data frame
wine_df.head()



Unnamed: 0,Country,Description,Points,Price ($),Region,Title,Variety
2,US,"Tart and snappy, the flavors of lime flesh and...",87.0,14.0,Willamette Valley,Rainstorm 2013 Pinot Gris (Willamette Valley),Pinot Gris
3,US,"Pineapple rind, lemon pith and orange blossom ...",87.0,13.0,Lake Michigan Shore,St. Julian 2013 Reserve Late Harvest Riesling ...,Riesling
4,US,"Much like the regular bottling from 2012, this...",87.0,65.0,Willamette Valley,Sweet Cheeks 2012 Vintner's Reserve Wild Child...,Pinot Noir
5,Spain,Blackberry and raspberry aromas show a typical...,87.0,15.0,Navarra,Tandem 2011 Ars In Vitro Tempranillo-Merlot (N...,Tempranillo-Merlot
6,Italy,"Here's a bright, informal red that opens with ...",87.0,16.0,Vittoria,Terre di Giurfo 2013 Belsito Frappato (Vittoria),Frappato


In [90]:
# df types
wine_df.dtypes

Country        string[python]
Description    string[python]
Points                float64
Price ($)             float64
Region         string[python]
Title          string[python]
Variety        string[python]
dtype: object

In [91]:
# df count
wine_df.count()

Country        93576
Description    93576
Points         93576
Price ($)      93576
Region         93576
Title          93576
Variety        93576
dtype: int64

In [92]:
#find the different columns that are in data frame
wine_df.columns 

Index(['Country', 'Description', 'Points', 'Price ($)', 'Region', 'Title',
       'Variety'],
      dtype='object')

In [93]:
#isolate the top 10 wines based on variety data
df= wine_df['Variety'].value_counts()
threshold = 2500
drop_df = df[df > threshold].index

clean_wine_df = wine_df[wine_df['Variety'].isin(drop_df)==False].reset_index(drop=True)

clean_wine_df

Unnamed: 0,Country,Description,Points,Price ($),Region,Title,Variety
0,US,"Tart and snappy, the flavors of lime flesh and...",87.0,14.0,Willamette Valley,Rainstorm 2013 Pinot Gris (Willamette Valley),Pinot Gris
1,US,"Pineapple rind, lemon pith and orange blossom ...",87.0,13.0,Lake Michigan Shore,St. Julian 2013 Reserve Late Harvest Riesling ...,Riesling
2,Spain,Blackberry and raspberry aromas show a typical...,87.0,15.0,Navarra,Tandem 2011 Ars In Vitro Tempranillo-Merlot (N...,Tempranillo-Merlot
3,Italy,"Here's a bright, informal red that opens with ...",87.0,16.0,Vittoria,Terre di Giurfo 2013 Belsito Frappato (Vittoria),Frappato
4,France,This dry and restrained wine offers spice in p...,87.0,24.0,Alsace,Trimbach 2012 Gewurztraminer (Alsace),Gewürztraminer
...,...,...,...,...,...,...,...
42650,Italy,"Blackberry, cassis, grilled herb and toasted a...",90.0,40.0,Sicilia,Cusumano 2012 Sàgana Tenuta San Giacomo Nero d...,Nero d'Avola
42651,France,"While it's rich, this beautiful dry wine also ...",90.0,28.0,Alsace,Domaine Rieflé-Landmann 2013 Seppi Landmann Va...,Pinot Gris
42652,France,Well-drained gravel soil gives this wine its c...,90.0,30.0,Alsace,Domaine Gresser 2013 Kritt Gewurztraminer (Als...,Gewürztraminer
42653,France,"A dry style of Pinot Gris, this is crisp with ...",90.0,32.0,Alsace,Domaine Marcel Deiss 2012 Pinot Gris (Alsace),Pinot Gris


In [94]:
clean_wine_df.dtypes

Country        string[python]
Description    string[python]
Points                float64
Price ($)             float64
Region         string[python]
Title          string[python]
Variety        string[python]
dtype: object

In [95]:
#isolate the wines that have points of 90 or avbove
high_points = clean_wine_df.loc[(clean_wine_df['Points']>= 90)]

high_points['Points'].value_counts()

Points
90.0     4843
91.0     3348
92.0     2688
93.0     1734
94.0     1010
95.0      433
96.0      148
97.0       62
98.0       24
99.0       11
100.0       8
Name: count, dtype: int64

In [96]:
#find the countries that produced the 90 or above wines
high_points['Country'].value_counts()

Country
US           4823
Italy        4114
France       3074
Spain        1162
Australia     578
Argentina     487
Canada         71
Name: count, dtype: Int64

In [97]:
#create map for countries



coordinates = {'longitude': [-95.7129,12.496366,2.213749, -4.00000000, 135.0000,-63.616672,-95.00],
                'latitude': [37.0902,41.902782, 46.227638, 40.00000000,-25.0000,-38.416097,60.00], 
               'Country': ['US', 'Italy', 'France', 'Spain', 'Australia', 'Argentina', 'Canada']}

coordinates_df= pd.DataFrame(coordinates)

coordinates_df

Unnamed: 0,longitude,latitude,Country
0,-95.7129,37.0902,US
1,12.496366,41.902782,Italy
2,2.213749,46.227638,France
3,-4.0,40.0,Spain
4,135.0,-25.0,Australia
5,-63.616672,-38.416097,Argentina
6,-95.0,60.0,Canada


In [98]:
country_map= coordinates_df.hvplot.points(
    "longitude",
    "latitude",
    geo = True,
    tiles = "OSM"
)

country_map

In [99]:
#isolate top 10 regions for US 

#top_ten_regions = high_points.loc[(high_points['Country']=='US')]

#top_ten_regions.head(10)


In [100]:
#create coordinates df
#region_coordinates = {'longitude': [-121.326319, -121.898460,-120.6071139,-120.680008,-122.286865,-120.76875000,-122.966664,-123.43],
                #'latitude': [36.424689, 36.603954,35.2166418,35.640556,38.297539,39.30137000, 38.699997,39.06 ], 
               #'Region': ['Santa Lucia Highlands', 'Monterey', 'Edna Valley', 'Paso Robles', 'Napa Valley', 
                           #'Nevada County', 'Dry Creek Valley', 'Anderson Valley']}

#region_coordinates_df= pd.DataFrame(region_coordinates)

#region_coordinates_df

In [101]:
#create map of top regions for US
#region_map= region_coordinates_df.hvplot.points(
    #"longitude",
    #"latitude",
    #geo = True,
    #tiles = "OSM")

#region_map

In [102]:
highest_points_df = high_points.loc[(high_points['Points']==100)]

highest_points_df


Unnamed: 0,Country,Description,Points,Price ($),Region,Title,Variety
140,Australia,This wine contains some material over 100 year...,100.0,350.0,Rutherglen,Chambers Rosewood Vineyards NV Rare Muscat (Ru...,Muscat
2623,Italy,Thick as molasses and dark as caramelized brow...,100.0,210.0,Vin Santo di Montepulciano,Avignonesi 1995 Occhio di Pernice (Vin Santo ...,Prugnolo Gentile
12723,France,This is a fabulous wine from the greatest Cham...,100.0,259.0,Champagne,Krug 2002 Brut (Champagne),Champagne Blend
13706,Italy,"A perfect wine from a classic vintage, the 200...",100.0,460.0,Toscana,Tenuta dell'Ornellaia 2007 Masseto Merlot (Tos...,Merlot
15986,Italy,"This gorgeous, fragrant wine opens with classi...",100.0,550.0,Brunello di Montalcino,Biondi Santi 2010 Riserva (Brunello di Montal...,Sangiovese
30430,France,This latest incarnation of the famous brand is...,100.0,250.0,Champagne,Louis Roederer 2008 Cristal Vintage Brut (Cha...,Champagne Blend
37113,Italy,It takes only a few moments before you appreci...,100.0,270.0,Brunello di Montalcino,Casanova di Neri 2007 Cerretalto (Brunello di...,Sangiovese Grosso
40633,France,"Full of ripe fruit, opulent and concentrated, ...",100.0,848.0,Pessac-Léognan,Château Haut-Brion 2014 Pessac-Léognan,Bordeaux-style White Blend


In [103]:
#create coordinates df
high_region_coordinates = {'longitude': [146.448431, 11.933222, 4.365000, 10.611521, 11.502941, 3.981701, 11.520198, -0.585224],
                          'latitude': [ -36.044628, 43.173714, 48.957500, 43.212206, 43.045119, 49.085187, 43.057458, 44.731470],
                            'Region': ['Rutherglen', 'Vin', 'Champagne', 'Toscana', 'Brunello', 'Champagne', 'Brunello', 'Pessac']}

high_region_coordinates_df= pd.DataFrame(high_region_coordinates)

high_region_coordinates_df

Unnamed: 0,longitude,latitude,Region
0,146.448431,-36.044628,Rutherglen
1,11.933222,43.173714,Vin
2,4.365,48.9575,Champagne
3,10.611521,43.212206,Toscana
4,11.502941,43.045119,Brunello
5,3.981701,49.085187,Champagne
6,11.520198,43.057458,Brunello
7,-0.585224,44.73147,Pessac


In [None]:
italy_regions

In [131]:
list (high_region_coordinates_df['longitude'])

[146.448431,
 11.933222,
 4.365,
 10.611521,
 11.502941,
 3.981701,
 11.520198,
 -0.585224]

In [104]:
high_region_map= high_region_coordinates_df.hvplot.points(
    "longitude",
    "latitude",
    geo = True,
    tiles = "OSM"
)

high_region_map

In [121]:
#pull italy's regions weather since that is the country with most regions that produced wines rated at 100
#url ='https://history.openweathermap.org/data/2.5/history/city?lat=43.17&lon=11.93&type=hour&appid=e43774300d0259ed16731921dc4f1fa1'

#url = 'https://history.openweathermap.org/data/2.5/aggregated/year?lat=43.17&lon=11.93&appid=e43774300d0259ed16731921dc4f1fa1'

url= 'https://api.openweathermap.org/data/2.5/weather?lat=43.17&lon=11.93&appid=e43774300d0259ed16731921dc4f1fa1'



weather = requests.get(url).json()

weather

{'coord': {'lon': 11.93, 'lat': 43.17},
 'weather': [{'id': 802,
   'main': 'Clouds',
   'description': 'scattered clouds',
   'icon': '03n'}],
 'base': 'stations',
 'main': {'temp': 277,
  'feels_like': 275.45,
  'temp_min': 273.64,
  'temp_max': 279.42,
  'pressure': 1027,
  'humidity': 96,
  'sea_level': 1027,
  'grnd_level': 992},
 'visibility': 10000,
 'wind': {'speed': 1.76, 'deg': 12, 'gust': 1.7},
 'clouds': {'all': 27},
 'dt': 1706847989,
 'sys': {'type': 2,
  'id': 2089004,
  'country': 'IT',
  'sunrise': 1706855250,
  'sunset': 1706891053},
 'timezone': 3600,
 'id': 3179419,
 'name': 'Castiglione del Lago',
 'cod': 200}

In [127]:
temperature = weather["main"]["temp"]

print(f'The temperature for Vin Santo di Montepulciano is {temperature} in Kelvin.')

The temperature for Vin Santo di Montepulciano is 277 in Kelvin.


In [129]:
sea_level = weather["main"]["sea_level"]

print(f'The sea level for Vin Santo di Montepulciano is {sea_level}.')


The sea level for Vin Santo di Montepulciano is 1027.


In [130]:
max_temp = weather["main"]["temp_max"]

print(f'The maximum temperature for Vin Santo di Montepulciano is {max_temp} in Kelvin.')

The maximum temperature for Vin Santo di Montepulciano is 279.42 in Kelvin.
