# Wine - By the Numbers

#### Note
* Evaluate the quality of wines to determine if tester, region, or climate impacts the score and price of the wine. 

In [22]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests, gmaps, os, re, datetime

# Import API key
from api_keys import g_key

In [23]:
#Import the raw data file downloaded from https://www.kaggle.com/zynicide/wine-reviews
wine_file = "input/winemag-data-130k-v2.csv"
wine_df = pd.read_csv(wine_file)

#Print the header from the dataset
wine_df.head()

Unnamed: 0.1,Unnamed: 0,country,description,designation,points,price,province,region_1,region_2,taster_name,taster_twitter_handle,title,variety,winery
0,0,Italy,"Aromas include tropical fruit, broom, brimston...",Vulkà Bianco,87,,Sicily & Sardinia,Etna,,Kerin O’Keefe,@kerinokeefe,Nicosia 2013 Vulkà Bianco (Etna),White Blend,Nicosia
1,1,Portugal,"This is ripe and fruity, a wine that is smooth...",Avidagos,87,15.0,Douro,,,Roger Voss,@vossroger,Quinta dos Avidagos 2011 Avidagos Red (Douro),Portuguese Red,Quinta dos Avidagos
2,2,US,"Tart and snappy, the flavors of lime flesh and...",,87,14.0,Oregon,Willamette Valley,Willamette Valley,Paul Gregutt,@paulgwine,Rainstorm 2013 Pinot Gris (Willamette Valley),Pinot Gris,Rainstorm
3,3,US,"Pineapple rind, lemon pith and orange blossom ...",Reserve Late Harvest,87,13.0,Michigan,Lake Michigan Shore,,Alexander Peartree,,St. Julian 2013 Reserve Late Harvest Riesling ...,Riesling,St. Julian
4,4,US,"Much like the regular bottling from 2012, this...",Vintner's Reserve Wild Child Block,87,65.0,Oregon,Willamette Valley,Willamette Valley,Paul Gregutt,@paulgwine,Sweet Cheeks 2012 Vintner's Reserve Wild Child...,Pinot Noir,Sweet Cheeks


In [24]:
#This section is written to clean the data and set up the dataframe for analysis

#Drop the index coulmn that was part of the original dataset
wine_df = wine_df.drop(['Unnamed: 0'], axis=1)
#Drop the region_2 column as the data isn't required and provides little benefit to our analysis
wine_df = wine_df.drop(['region_2'], axis=1)
#Remove designation field as it isn't needed for our analysis and is missing a quarter of the values
wine_df = wine_df.drop(['designation'], axis=1)

#Remove data when the country or province data is blank
wine_df = wine_df.dropna(subset=['country', 'province'])

#Add a Column for the year of the wine
wine_df['Wine_Year'] = ""

#Print the header from the dataset
wine_df.head()

Unnamed: 0,country,description,points,price,province,region_1,taster_name,taster_twitter_handle,title,variety,winery,Wine_Year
0,Italy,"Aromas include tropical fruit, broom, brimston...",87,,Sicily & Sardinia,Etna,Kerin O’Keefe,@kerinokeefe,Nicosia 2013 Vulkà Bianco (Etna),White Blend,Nicosia,
1,Portugal,"This is ripe and fruity, a wine that is smooth...",87,15.0,Douro,,Roger Voss,@vossroger,Quinta dos Avidagos 2011 Avidagos Red (Douro),Portuguese Red,Quinta dos Avidagos,
2,US,"Tart and snappy, the flavors of lime flesh and...",87,14.0,Oregon,Willamette Valley,Paul Gregutt,@paulgwine,Rainstorm 2013 Pinot Gris (Willamette Valley),Pinot Gris,Rainstorm,
3,US,"Pineapple rind, lemon pith and orange blossom ...",87,13.0,Michigan,Lake Michigan Shore,Alexander Peartree,,St. Julian 2013 Reserve Late Harvest Riesling ...,Riesling,St. Julian,
4,US,"Much like the regular bottling from 2012, this...",87,65.0,Oregon,Willamette Valley,Paul Gregutt,@paulgwine,Sweet Cheeks 2012 Vintner's Reserve Wild Child...,Pinot Noir,Sweet Cheeks,


In [25]:
# Read title string and populate with the year from the title string

for index, row in wine_df.iterrows():
    if index % 1000 == 0:
        print(f'Processing record number {index}')
                     
    title_parse = row['title']
    match = re.search('\d{4}', title_parse)
    Year = match.group(0) if match else '1900'
    
    if Year <= '1995' or Year >= '2020': 
        wine_df.loc[index, 'Wine_Year'] = 1900
    else:
        wine_df.loc[index, 'Wine_Year'] = Year

#Print the header from the dataset
wine_df.head()

Processing record number 0
Processing record number 1000
Processing record number 2000
Processing record number 3000
Processing record number 4000
Processing record number 5000
Processing record number 6000
Processing record number 7000
Processing record number 8000
Processing record number 9000
Processing record number 10000
Processing record number 11000
Processing record number 12000
Processing record number 13000
Processing record number 14000
Processing record number 15000
Processing record number 17000
Processing record number 18000
Processing record number 19000
Processing record number 20000
Processing record number 21000
Processing record number 22000
Processing record number 23000
Processing record number 24000
Processing record number 25000
Processing record number 26000
Processing record number 27000
Processing record number 28000
Processing record number 29000
Processing record number 30000
Processing record number 31000
Processing record number 32000
Processing record num

Unnamed: 0,country,description,points,price,province,region_1,taster_name,taster_twitter_handle,title,variety,winery,Wine_Year
0,Italy,"Aromas include tropical fruit, broom, brimston...",87,,Sicily & Sardinia,Etna,Kerin O’Keefe,@kerinokeefe,Nicosia 2013 Vulkà Bianco (Etna),White Blend,Nicosia,2013
1,Portugal,"This is ripe and fruity, a wine that is smooth...",87,15.0,Douro,,Roger Voss,@vossroger,Quinta dos Avidagos 2011 Avidagos Red (Douro),Portuguese Red,Quinta dos Avidagos,2011
2,US,"Tart and snappy, the flavors of lime flesh and...",87,14.0,Oregon,Willamette Valley,Paul Gregutt,@paulgwine,Rainstorm 2013 Pinot Gris (Willamette Valley),Pinot Gris,Rainstorm,2013
3,US,"Pineapple rind, lemon pith and orange blossom ...",87,13.0,Michigan,Lake Michigan Shore,Alexander Peartree,,St. Julian 2013 Reserve Late Harvest Riesling ...,Riesling,St. Julian,2013
4,US,"Much like the regular bottling from 2012, this...",87,65.0,Oregon,Willamette Valley,Paul Gregutt,@paulgwine,Sweet Cheeks 2012 Vintner's Reserve Wild Child...,Pinot Noir,Sweet Cheeks,2012


In [26]:
# Output File (CSV)
output_data_file = "Output/wine_cleaned.csv"
wine_df.to_csv(output_data_file, index=False, header=True)