In [1]:
import os
import pandas as pd
import numpy as np
import string
from operator import itemgetter
from collections import Counter, OrderedDict

from nltk.tokenize import word_tokenize, sent_tokenize
from nltk.stem import SnowballStemmer
from nltk.corpus import stopwords

from gensim.models.phrases import Phrases, Phraser
from gensim.models import Word2Vec

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.decomposition import PCA
from matplotlib import pyplot as plt

In [3]:
wine = pd.read_csv('../raw_data/winemag-data-130k-v2.csv')
wine_list = list(wine['description'])
#wine_list_normalized = [clean_description(w) for w in wine_list]
#wine_list_preprocessed = list(set(wine_list_normalized))

In [5]:
full_wine_reviews_list = [str(r) for r in wine_list]
full_wine_corpus = ' '.join(wine_list)
wine_sentences_tokenized = sent_tokenize(full_wine_corpus)

In [7]:
stop_words = set(stopwords.words('english')) 

punctuation_table = str.maketrans({key: None for key in string.punctuation})
sno = SnowballStemmer('english')

def normalize_text(raw_text):
    try:
        word_list = word_tokenize(raw_text)
        normalized_sentence = []
        for w in word_list:
            try:
                w = str(w)
                lower_case_word = str.lower(w)
                stemmed_word = sno.stem(lower_case_word)
                no_punctuation = stemmed_word.translate(punctuation_table)
                if len(no_punctuation) > 1 and no_punctuation not in stop_words:
                    normalized_sentence.append(no_punctuation)
            except:
                continue
        return normalized_sentence
    except:
        return ''

# normalized_sentences = []
# for s in sentences_tokenized:
#     normalized_text = normalize_text(s)
#     normalized_sentences.append(normalized_text)

normalized_wine_sentences = []
for s in wine_sentences_tokenized:
    normalized_text = normalize_text(s)
    normalized_wine_sentences.append(normalized_text)

In [8]:
import pickle

In [9]:
with open("../notebooks/g_wine_preprocessing.txt", "rb") as fp:   # Unpickling
    wine_sentences = pickle.load(fp)

In [12]:
wine_word2vec_model = Word2Vec.load("../raw_data/food_word2vec_model.bin")

In [84]:
descriptor_mapping = pd.read_csv('../raw_data/descriptor_mapping.csv',delimiter=';', encoding='latin1').set_index('ï»¿raw descriptor')

In [14]:
wine_trigram_model = Phraser.load('wine_trigrams.pkl')

In [25]:
variety_mapping = {'Shiraz': 'Syrah', 'Pinot Gris': 'Pinot Grigio', 'Pinot Grigio/Gris': 'Pinot Grigio', 
                   'Garnacha, Grenache': 'Grenache', 'Garnacha': 'Grenache', 'CarmenÃ¨re': 'Carmenere',
                    'GrÃ¼ner Veltliner': 'Gruner Veltliner', 'TorrontÃ©s': 'Torrontes', 
                   'RhÃ´ne-style Red Blend': 'Rhone-style Red Blend', 'AlbariÃ±o': 'Albarino',
                  'GewÃ¼rztraminer': 'Gewurztraminer', 'RhÃ´ne-style White Blend': 'Rhone-style White Blend',
                  'SpÃƒÂ¤tburgunder, Pinot Noir': 'Pinot Noir', 'Sauvignon, Sauvignon Blanc': 'Sauvignon Blanc',
                  'Pinot Nero, Pinot Noir': 'Pinot Noir', 'Malbec-Merlot, Bordeaux-style Red Blend': 'Bordeaux-style Red Blend',
                  'Meritage, Bordeaux-style Red Blend': 'Bordeaux-style Red Blend', 'Garnacha, Grenache': 'Grenache',
                   'FumÃ© Blanc': 'Sauvignon Blanc', 'Cabernet Sauvignon-Cabernet Franc, Bordeaux-style Red Blend': 'Bordeaux-style Red Blend',
                   'Cabernet Merlot, Bordeaux-style Red Blend': 'Bordeaux-style Red Blend', 'Cabernet Sauvignon-Merlot, Bordeaux-style Red Blend': 'Bordeaux-style Red Blend',
                   'Cabernet Blend, Bordeaux-style Red Blend': 'Bordeaux-style Red Blend', 'Malbec-Cabernet Sauvignon, Bordeaux-style Red Blend': 'Bordeaux-style Red Blend',
                   'Merlot-Cabernet Franc, Bordeaux-style Red Blend': 'Bordeaux-style Red Blend', 'Merlot-Cabernet Sauvignon, Bordeaux-style Red Blend': 'Bordeaux-style Red Blend',
                   'Cabernet Franc-Merlot, Bordeaux-style Red Blend': 'Bordeaux-style Red Blend', 'Merlot-Malbec, Bordeaux-style Red Blend': 'Bordeaux-style Red Blend',
                   'Cabernet, Bordeaux-style Red Blend': 'Bordeaux-style Red Blend', 'Primitivo, Zinfandel': 'Zinfandel',
                   'AragonÃªs, Tempranillo': 'Aragonez, Tempranillo'
                  }

def consolidate_varieties(variety_name):
    if variety_name in variety_mapping:
        return variety_mapping[variety_name]
    else:
        return variety_name

wine_df_clean = wine.copy()
wine_df_clean['variety'] = wine_df_clean['variety'].apply(consolidate_varieties)

In [41]:
order_of_geographies = ['region_1', 'province', 'country']

# replace any nan values in the geography columns with the word none
def replace_nan_for_zero(value):
    if str(value) == '0' or str(value) == 'nan':
        return 'none'
    else:
        return value

for o in order_of_geographies:
    wine_df_clean[o] = wine_df_clean[o].apply(replace_nan_for_zero)

wine_df_clean.loc[:, order_of_geographies].fillna('none', inplace=True)

In [34]:
wine.head(2)

Unnamed: 0.1,Unnamed: 0,country,description,designation,points,price,province,region_1,region_2,taster_name,taster_twitter_handle,title,variety,winery
0,0,Italy,"Aromas include tropical fruit, broom, brimston...",Vulkà Bianco,87,,Sicily & Sardinia,Etna,,Kerin O’Keefe,@kerinokeefe,Nicosia 2013 Vulkà Bianco (Etna),White Blend,Nicosia
1,1,Portugal,"This is ripe and fruity, a wine that is smooth...",Avidagos,87,15.0,Douro,,,Roger Voss,@vossroger,Quinta dos Avidagos 2011 Avidagos Red (Douro),Portuguese Red,Quinta dos Avidagos


In [36]:
variety_geo = wine_df_clean.groupby(['variety', 'country', 'province', 'region_1']).size().reset_index().rename(columns={0:'count'})
variety_geo_sliced = variety_geo.loc[variety_geo['count'] > 1]

vgeos_df = pd.DataFrame(variety_geo_sliced, columns=['variety', 'country', 'province', 'region_1', 'count']) 
vgeos_df.to_csv('varieties_all_geos.csv')

In [107]:
variety_geo_df = pd.read_csv('../raw_data/varieties_all_geos_normalized.csv', index_col=0)
wine_df_merged = pd.merge(left=wine_df_clean, right=variety_geo_df, left_on=['variety', 'country', 'province', 'region_1', 'region_2'],
                         right_on=['Variety', 'Country', 'Province', 'Region', 'Subregion'])
wine_df_merged.drop(['Unnamed: 0', 'Country', 'taster_name', 'taster_twitter_handle',
                     'Region', 'Subregion', 'count'], 
                    axis=1, inplace=True)
wine_df_merged.shape

(4157, 14)

In [108]:
'''variety_geo_df = pd.read_csv('../raw_data/varieties_all_geos_normalized.csv', index_col=0)

wine_df_merged = pd.merge(left=wine_df_clean, right=variety_geo_df, left_on=['variety', 'country', 'province', 'region_1'],
                         right_on=['variety', 'country', 'province', 'region_1'])
wine_df_merged.drop(['Unnamed: 0', 'country', 
                     'designation', 'province', 
                     'region_1', 'taster_name', 'taster_twitter_handle', 'region_2', 'winery', 'count'], 
                    axis=1, inplace=True)
wine_df_merged.shape'''

"variety_geo_df = pd.read_csv('../raw_data/varieties_all_geos_normalized.csv', index_col=0)\n\nwine_df_merged = pd.merge(left=wine_df_clean, right=variety_geo_df, left_on=['variety', 'country', 'province', 'region_1'],\n                         right_on=['variety', 'country', 'province', 'region_1'])\nwine_df_merged.drop(['Unnamed: 0', 'country', \n                     'designation', 'province', \n                     'region_1', 'taster_name', 'taster_twitter_handle', 'region_2', 'winery', 'count'], \n                    axis=1, inplace=True)\nwine_df_merged.shape"

In [111]:
wine_df_merged.head()

Unnamed: 0,country,description,designation,points,price,province,region_1,region_2,title,variety,winery,Variety,Province,geo_normalized
0,US,"Tart and snappy, the flavors of lime flesh and...",,87,14.0,Oregon,Willamette Valley,Willamette Valley,Rainstorm 2013 Pinot Gris (Willamette Valley),Pinot Grigio,Rainstorm,Pinot Grigio,Oregon,"Willamette Valley, Oregon, USA"
1,US,"Quite tart, the fruit here touches (just barel...",Renovação Estate Vineyards,87,20.0,Oregon,Willamette Valley,Willamette Valley,Coelho 2015 Renovação Estate Vineyards Pinot G...,Pinot Grigio,Coelho,Pinot Grigio,Oregon,"Willamette Valley, Oregon, USA"
2,US,"Creamy and fresh, this is made from estate-gro...",,88,14.0,Oregon,Willamette Valley,Willamette Valley,Stanton Vineyard 2013 Pinot Gris (Willamette V...,Pinot Grigio,Stanton Vineyard,Pinot Grigio,Oregon,"Willamette Valley, Oregon, USA"
3,US,"This is a neutral, papery white wine, with a f...",Cloud Rest Vineyard,85,18.0,Oregon,Willamette Valley,Willamette Valley,Oak Knoll 2014 Cloud Rest Vineyard Pinot Gris ...,Pinot Grigio,Oak Knoll,Pinot Grigio,Oregon,"Willamette Valley, Oregon, USA"
4,US,"Fruit-driven, with pear and apple, but an inte...",,90,16.0,Oregon,Willamette Valley,Willamette Valley,Lange 2010 Pinot Gris (Willamette Valley),Pinot Grigio,Lange,Pinot Grigio,Oregon,"Willamette Valley, Oregon, USA"


In [114]:
variety_geos = wine_df_merged.groupby(['Variety', 'geo_normalized']).size()
at_least_n_types = variety_geos[variety_geos > 30].reset_index()
# at_least_n_types.head(10)
wine_df_merged_filtered = pd.merge(wine_df_merged, at_least_n_types, left_on=['Variety', 'geo_normalized'], right_on=['Variety', 'geo_normalized'])
wine_df_merged_filtered = wine_df_merged_filtered[['title', 'Variety', 'geo_normalized', 'description']]
print(wine_df_merged_filtered.shape)

(2960, 4)


In [115]:
wine_reviews = list(wine_df_merged['description'])

core_tastes = ['aroma', 'weight', 'sweet', 'acid', 'salt', 'piquant', 'fat', 'bitter']

descriptor_mappings = dict()
for c in core_tastes:
    descriptor_mapping_filtered=descriptor_mapping[descriptor_mapping['type']==c]
    descriptor_mappings[c] = descriptor_mapping_filtered                                                   
    

def return_descriptor_from_mapping(descriptor_mapping, word, core_tastes):
    if word in list(descriptor_mapping.index):
        descriptor_to_return = descriptor_mapping['level_3'][word]
        return descriptor_to_return
    else:
        return None

review_descriptors = []
for review in wine_reviews:
    taste_descriptors = []
    normalized_review = normalize_text(review)
    phrased_review = wine_trigram_model[normalized_review]
    #print(normalized_review)
    
    for c in core_tastes:                                                      
        descriptors_only = [return_descriptor_from_mapping(descriptor_mappings[c], word, c) for word in phrased_review]
        no_nones = [str(d).strip() for d in descriptors_only if d is not None]
        descriptorized_review = ' '.join(no_nones)
        taste_descriptors.append(descriptorized_review)
    review_descriptors.append(taste_descriptors)


In [82]:
review_descriptors

[['herb sage',
  '',
  'tropical_fruit fruit dry apple citrus dry',
  'brisk',
  '',
  '',
  '',
  ''],
 ['mineral jasmine jasmine', '', 'pear peach honeysuckle', '', '', '', '', ''],
 ['white_flower flint white white minerality',
  '',
  'stone_fruit unripe peach citrus',
  'crisp refreshing vibrant',
  '',
  '',
  '',
  ''],
 ['yellow minerality', '', 'apple citrus_peel', 'crisp', '', '', '', ''],
 ['white minerality', '', 'dry', 'crisp fresh', '', '', '', ''],
 ['acacia yeast bread_crust savory green meditteranean_herbs minerality almond',
  '',
  'peach apple lemon_peel',
  'tart bright',
  '',
  '',
  '',
  ''],
 ['acacia white gold minerality meditteranean_herbs',
  '',
  'fruit apple pear dry',
  'crisp',
  '',
  '',
  '',
  ''],
 ['flower white_flower hay meditteranean_herbs green minerality',
  '',
  'pear citrus beeswax apple citrus',
  'bright fresh',
  '',
  '',
  '',
  'polished'],
 ['green minerality rosemary',
  '',
  'honeysuckle stone_fruit apple citrus_peel',
  'brigh

In [116]:
taste_descriptors = []
taste_vectors = []

for n, taste in enumerate(core_tastes):
    print(taste)
    taste_words = [r[n] for r in review_descriptors]
    
    vectorizer = TfidfVectorizer()
    X = vectorizer.fit(taste_words)
    dict_of_tfidf_weightings = dict(zip(X.get_feature_names(), X.idf_))
        
    wine_review_descriptors = []
    wine_review_vectors = []
    
    for d in taste_words:
        descriptor_count = 0
        weighted_review_terms = []
        terms = d.split(' ')
        for term in terms:
            if term in dict_of_tfidf_weightings.keys():
                tfidf_weighting = dict_of_tfidf_weightings[term]
                try:
                    word_vector = wine_word2vec_model.wv.get_vector(term).reshape(1, 300)
                    weighted_word_vector = tfidf_weighting * word_vector
                    weighted_review_terms.append(weighted_word_vector)
                    descriptor_count += 1
                except:
                    continue
            else:
                continue
        try:
            review_vector = sum(weighted_review_terms)/len(weighted_review_terms)
            review_vector = review_vector[0]
        except:
            review_vector = np.nan
#         terms_and_vec = [terms, review_vector]
        wine_review_vectors.append(review_vector)
        wine_review_descriptors.append(terms)
    
    taste_vectors.append(wine_review_vectors)
    taste_descriptors.append(wine_review_descriptors)
    

taste_vectors_t = list(map(list, zip(*taste_vectors)))
taste_descriptors_t = list(map(list, zip(*taste_descriptors)))

review_vecs_df = pd.DataFrame(taste_vectors_t, columns=core_tastes)

columns_taste_descriptors = [a + '_descriptors' for a in core_tastes]
review_descriptors_df = pd.DataFrame(taste_descriptors_t, columns=columns_taste_descriptors)

wine_df_vecs = pd.concat([wine_df_merged, review_descriptors_df, review_vecs_df], axis=1)
wine_df_vecs.head(5)

aroma
weight
sweet
acid
salt
piquant
fat
bitter


Unnamed: 0,country,description,designation,points,price,province,region_1,region_2,title,variety,...,fat_descriptors,bitter_descriptors,aroma,weight,sweet,acid,salt,piquant,fat,bitter
0,US,"Tart and snappy, the flavors of lime flesh and...",,87,14.0,Oregon,Willamette Valley,Willamette Valley,Rainstorm 2013 Pinot Gris (Willamette Valley),Pinot Grigio,...,[],[],"[-2.3044057, 1.7656577, -1.8665085, -5.7635765...",,"[0.66069156, 1.1582786, -4.7787557, -5.734443,...","[0.5593994, 2.9896472, -1.1942301, -2.1565619,...",,,,
1,US,"Quite tart, the fruit here touches (just barel...",Renovação Estate Vineyards,87,20.0,Oregon,Willamette Valley,Willamette Valley,Coelho 2015 Renovação Estate Vineyards Pinot G...,Pinot Grigio,...,[],[],"[3.4671462, -0.65168643, 2.7401485, -1.7691413...",,"[-0.7474705, 2.661012, -1.3973554, -5.317113, ...","[-2.4111335, 0.49238068, -2.552854, -4.102157,...",,,,
2,US,"Creamy and fresh, this is made from estate-gro...",,88,14.0,Oregon,Willamette Valley,Willamette Valley,Stanton Vineyard 2013 Pinot Gris (Willamette V...,Pinot Grigio,...,[cream],[],"[-1.4443628, -1.4515445, 0.6221052, -2.2628999...",,"[0.040976427, 0.48068962, -1.0463705, -4.39017...","[3.874277, 1.713511, 0.7658327, -3.1345055, 5....",,,"[-3.4364169, 4.4776816, 3.0765462, -1.009486, ...",
3,US,"This is a neutral, papery white wine, with a f...",Cloud Rest Vineyard,85,18.0,Oregon,Willamette Valley,Willamette Valley,Oak Knoll 2014 Cloud Rest Vineyard Pinot Gris ...,Pinot Grigio,...,[],[],"[2.6959183, -0.6034039, -4.4925923, -2.9032035...",,"[-1.4620538, -0.22076248, -0.9846976, -1.64091...","[1.0365291, -3.7425401, -2.6575792, -2.730917,...",,"[-6.965675, 6.647032, 0.32544118, 8.876246, 4....",,
4,US,"Fruit-driven, with pear and apple, but an inte...",,90,16.0,Oregon,Willamette Valley,Willamette Valley,Lange 2010 Pinot Gris (Willamette Valley),Pinot Grigio,...,[],[],,"[-4.169179, 1.1868968, 0.30094403, -6.1035743,...","[-0.8910833, -0.31242326, -2.0574894, -3.88909...",,,"[-6.965675, 6.647032, 0.32544118, 8.876246, 4....",,


In [117]:
# pull the average embedding for the wine attribute across all wines. 
avg_taste_vecs = dict()
for t in core_tastes:
    # look at the average embedding for a taste, across all wines that have descriptors for that taste 
    review_arrays = wine_df_vecs[t].dropna()
    average_taste_vec = np.average(review_arrays)
    avg_taste_vecs[t] = average_taste_vec

In [118]:
normalized_geos = list(set(zip(wine_df_vecs['variety'], wine_df_vecs['geo_normalized'])))

def subset_wine_vectors(list_of_varieties, wine_attribute):
    wine_variety_vectors = []
    for v in list_of_varieties:

        one_var_only = wine_df_vecs.loc[(wine_df_vecs['Variety'] == v[0]) & 
                                                (wine_df_vecs['geo_normalized'] == v[1])]
        if len(list(one_var_only.index)) < 1 or str(v[1][-1]) == '0':
            continue
        else:
            taste_vecs = list(one_var_only[wine_attribute])
            taste_vecs = [avg_taste_vecs[wine_attribute] if 'numpy' not in str(type(x)) else x for x in taste_vecs]
            average_variety_vec = np.average(taste_vecs, axis=0)
            
            descriptor_colname = wine_attribute + '_descriptors'
            all_descriptors = [i[0] for i in list(one_var_only[descriptor_colname])]
            word_freqs = Counter(all_descriptors)
            most_common_words = word_freqs.most_common(50)
            top_n_words = [(i[0], "{:.2f}".format(i[1]/len(taste_vecs))) for i in most_common_words]
            top_n_words = [i for i in top_n_words if len(i[0])>2]
            wine_variety_vector = [v, average_variety_vec, top_n_words]
                
            wine_variety_vectors.append(wine_variety_vector)
            
    return wine_variety_vectors


def pca_wine_variety(list_of_varieties, wine_attribute, pca=True):
    wine_var_vectors = subset_wine_vectors(normalized_geos, wine_attribute)
    
    wine_varieties = [str(w[0]).replace('(', '').replace(')', '').replace("'", '').replace('"', '') for w in wine_var_vectors]
    wine_var_vec = [w[1] for w in wine_var_vectors]
    if pca:
        pca = PCA(1)
        wine_var_vec = pca.fit_transform(wine_var_vec)
        wine_var_vec = pd.DataFrame(wine_var_vec, index=wine_varieties)
    else:
        wine_var_vec = pd.Series(wine_var_vec, index=wine_varieties)
    wine_var_vec.sort_index(inplace=True)
    
    wine_descriptors = pd.DataFrame([w[2] for w in wine_var_vectors], index=wine_varieties)
    wine_descriptors = pd.melt(wine_descriptors.reset_index(), id_vars='index')
    wine_descriptors.sort_index(inplace=True)
    
    return wine_var_vec, wine_descriptors

taste_dataframes = []
# generate the dataframe of aromas vectors as output, 
aroma_vec, aroma_descriptors = pca_wine_variety(normalized_geos, 'aroma', pca=False)
taste_dataframes.append(aroma_vec)

# generate the dataframes of nonaroma scalars
for tw in core_tastes[1:]:
    pca_w_dataframe, nonaroma_descriptors = pca_wine_variety(normalized_geos, tw, pca=True)
    taste_dataframes.append(pca_w_dataframe)
    
# combine all the dataframes created above into one 
all_nonaromas = pd.concat(taste_dataframes, axis=1)
all_nonaromas.columns = core_tastes

In [119]:
all_nonaromas

Unnamed: 0,aroma,weight,sweet,acid,salt,piquant,fat,bitter
"Bordeaux-style Red Blend, Finger Lakes, New York, USA","[-3.226746, 1.7810324, 0.24879082, -0.5414489,...",-0.030542,-16.017256,-3.070442,-0.149744,2.258673,-0.835478,-5.074528
"Bordeaux-style Red Blend, Long Island, New York, USA","[0.06627969, -1.5383883, 1.3340853, 1.2963829,...",-2.164904,-18.015681,-7.464947,-0.149744,-5.160767,-0.835478,-5.240400
"Bordeaux-style Red Blend, Sierra Foothills, California, USA","[-3.3335855, -1.2797539, 1.4143531, -1.6117953...",-10.952851,-6.562218,-1.269620,-0.149745,-1.357033,11.643385,-13.837542
"Bordeaux-style Red Blend, South Coast, California, USA","[-3.9725761, 4.5112786, 1.0302643, 0.9029363, ...",-9.925279,-28.133741,-1.404779,-0.149744,-0.593666,-0.835478,0.865868
"Cabernet Franc, Finger Lakes, New York, USA","[-2.397506, 1.0332931, 0.472259, -0.4593114, -...",-1.704316,-14.265391,-4.267620,0.488892,-1.547599,-0.830551,-0.729579
...,...,...,...,...,...,...,...,...
"White Blend, Sierra Foothills, California, USA","[-1.4988766, 0.14053038, 0.92164135, 0.2661283...",-0.881381,17.292646,-0.320422,-0.149744,-3.068713,4.192974,-2.754886
"White Blend, Southern Oregon, Oregon, USA","[-2.0954926, 0.8583569, 0.16689874, -1.1808553...",-5.383383,18.265796,6.483460,-0.149744,-1.865767,-0.835478,0.865868
"White Blend, Willamette Valley, Oregon, USA","[-0.29822418, 0.046197098, -3.7454133, -4.2599...",15.473466,12.493213,2.018470,-0.149745,-2.498153,-0.835479,0.865868
"Zinfandel, Sierra Foothills, California, USA","[-3.6750522, 1.0363878, 1.1606226, -0.08287661...",-2.012935,-13.769359,-1.044039,-0.149744,3.013351,-2.068554,-4.271967


In [120]:
# save the 50 top descriptors for each wine variety as a CSV file. We will us this later to dig deeper into our proposed wine recommendations.

aroma_descriptors_copy = aroma_descriptors.copy()
aroma_descriptors_copy.set_index('index', inplace=True)
aroma_descriptors_copy.dropna(inplace=True)

aroma_descriptors_copy = pd.DataFrame(aroma_descriptors_copy['value'].tolist(), index=aroma_descriptors_copy.index)
aroma_descriptors_copy.columns = ['descriptors', 'relative_frequency']
aroma_descriptors_copy.to_csv('wine_variety_descriptors.csv')

In [121]:
def normalize(df, cols_to_normalize):
    for feature_name in cols_to_normalize:
        print(feature_name)
        max_value = df[feature_name].max()
        min_value = df[feature_name].min()
        df[feature_name] = df[feature_name].apply(lambda x: (x- min_value)/(max_value-min_value))
#         (df[feature_name] - min_value) / (max_value - min_value)
    return df

all_nonaromas_normalized = normalize(all_nonaromas, cols_to_normalize=core_tastes[1:])
all_nonaromas_normalized.to_csv('wine_aromas_nonaromas.csv')



weight
sweet
acid
salt
piquant
fat
bitter
