# Learn to talk about wine using Javascript
This is the notebook used to write the post published on Medium.

## Loading the Library

In [2]:
Collection = require('dstools').Collection;

{ [Function] registerFunction: [Function] }

# Loading the Data

In [3]:
data = Collection().loadCSV('/home/elshor/data/winemag-data-130k-v2.csv')

In [5]:
data.head().show()

id,country,description,designation,points,price,province,region_1,region_2,taster_name,taster_twitter_handle,title,variety,winery
0,Italy,"Aromas include tropical fruit, broom, brimstone and dried herb. The palate isn't overly expressive, offering unripened apple, citrus and dried sage alongside brisk acidity.",Vulkà Bianco,87,,Sicily & Sardinia,Etna,,Kerin O’Keefe,@kerinokeefe,Nicosia 2013 Vulkà Bianco (Etna),White Blend,Nicosia
1,Portugal,"This is ripe and fruity, a wine that is smooth while still structured. Firm tannins are filled out with juicy red berry fruits and freshened with acidity. It's already drinkable, although it will certainly be better from 2016.",Avidagos,87,15.0,Douro,,,Roger Voss,@vossroger,Quinta dos Avidagos 2011 Avidagos Red (Douro),Portuguese Red,Quinta dos Avidagos
2,US,"Tart and snappy, the flavors of lime flesh and rind dominate. Some green pineapple pokes through, with crisp acidity underscoring the flavors. The wine was all stainless-steel fermented.",,87,14.0,Oregon,Willamette Valley,Willamette Valley,Paul Gregutt,@paulgwine,Rainstorm 2013 Pinot Gris (Willamette Valley),Pinot Gris,Rainstorm
3,US,"Pineapple rind, lemon pith and orange blossom start off the aromas. The palate is a bit more opulent, with notes of honey-drizzled guava and mango giving way to a slightly astringent, semidry finish.",Reserve Late Harvest,87,13.0,Michigan,Lake Michigan Shore,,Alexander Peartree,,St. Julian 2013 Reserve Late Harvest Riesling (Lake Michigan Shore),Riesling,St. Julian
4,US,"Much like the regular bottling from 2012, this comes across as rather rough and tannic, with rustic, earthy, herbal characteristics. Nonetheless, if you think of it as a pleasantly unfussy country wine, it's a good companion to a hearty winter stew.",Vintner's Reserve Wild Child Block,87,65.0,Oregon,Willamette Valley,Willamette Valley,Paul Gregutt,@paulgwine,Sweet Cheeks 2012 Vintner's Reserve Wild Child Block Pinot Noir (Willamette Valley),Pinot Noir,Sweet Cheeks


In [6]:
data.fields().data().join();//showing the fields

'id,country,description,designation,points,price,province,region_1,region_2,taster_name,taster_twitter_handle,title,variety,winery'

## Term Counts

In [7]:
data
.terms({field:'description'}) //extract terms in field description
.dropStopwords('term')//remove stopwords
.sortDesc('count') //sort by count or terms
.head(5)//choose top 5 terms
.show();//show it

term,count
wine,80166
flavors,60322
fruit,49671
palate,38516
aromas,35450


## Word Cloud Visualization

In [8]:
data.terms({field:'description'}).dropStopwords('term')
.sortDesc('count').head(50)
.wordCloud('term','count')//arguments are label and measure
.show();

## Cabernet Sauvignon Term Count

In [9]:
data
.terms({field:'description',groupBy:'variety'}) //group by variety
.dropStopwords('term')//remove stopwords
.filterEqual('variety','Cabernet Sauvignon')//only show cabernet sauvignon terms
.sortDesc('count') //sort by count or terms
.head(30)//top 30 terms
.column('term')//get 'term' column
.data().join(', ');//show the terms as a list

'flavors, wine, black, tannins, fruit, Cabernet, cherry, finish, oak, aromas, blackberry, palate, cassis, chocolate, currant, plum, ripe, red, notes, berry, dark, rich, dry, soft, sweet, full, years, bodied, spice, tannic'

## Using TFIDF

In [10]:
data
.terms({field:'description',groupBy:'variety',calc:'tfidf,idf'}) //calculate tfidf and idf
.dropStopwords('term')//remove stopwords
.filterEqual('variety','Cabernet Sauvignon')//only show cabernet sauvignon terms
.sortDesc('tfidf') //sort by count or terms
.head(30)//top 30 terms
.show()

term,variety,tfidf,idf
Cabernet,Cabernet Sauvignon,0.0234624405933873,3.028431172743317
black,Cabernet Sauvignon,0.0167331787158781,1.8877073987250803
tannins,Cabernet Sauvignon,0.016253677803851,1.8407188892891613
flavors,Cabernet Sauvignon,0.0161688951110702,1.2224365876933898
cherry,Cabernet Sauvignon,0.0142472654925299,1.9226759965628268
wine,Cabernet Sauvignon,0.0139053707062576,1.147571708729216
blackberry,Cabernet Sauvignon,0.0128366678329194,2.233154497106992
oak,Cabernet Sauvignon,0.0125397640943125,2.000349034881045
fruit,Cabernet Sauvignon,0.0116257568513991,1.3464225674743808
cassis,Cabernet Sauvignon,0.0108284103697791,2.670681537674819


## Filter using IDF

In [11]:
data
.terms({field:'description',groupBy:'variety',calc:'tfidf,idf'})
.dropStopwords('term')
.filterEqual('variety','Cabernet Sauvignon')
.filter((term)=>term.idf>2)
.sortDesc('tfidf')
.head(50)
.wordCloud('term','tfidf',{title:'Word Cloud for Cabernet Sauvignon'})
.show()

In [12]:
data
.terms({field:'description',groupBy:'variety',calc:'tfidf,idf'})
.dropStopwords('term')
.filterEqual('variety','Chardonnay')
.filter((term)=>term.idf>2)
.sortDesc('tfidf')
.head(50)
.wordCloud('term','tfidf',{title:'Word Cloud for Chardonnay'})
.show()

## word2vec

In [4]:
data
.column('description')//get a vector of wine's description field
.toLowerCase()//turn the descriptions into lower case
.merge()//merge all descriptions into one string
.save('wine-descriptions.txt')//save them into a file



[Wrapper object]

In [6]:
word2vec = require( 'word2vec' );
word2vec.word2vec('wine-descriptions.txt','wine-model.txt');

[0m[2Jc[36mStarting training using file ../../../wine-descriptions.txt
[36m200K300K400K500K600K700K800K900K1000K1100K1200K1300K1400K1500K1600K1700K1800K1900K2000K2100K2200K2300K2400K2500K2600K2700K2800K2900K3000K3100K3200K3300K3400K3500K3600K3700K3800K3900K4000K4100K4200K4300K4400K4500K4600K4700K4800K4900K5000K5100K5200K5300KVocab size: 20422
Words in train file: 5298121
Alpha: 0.045573  Progress: 8.89%  Words/thread/sec: 295.22k  [36m[36m[36m[36m[36m[36m[36m[36m[36m[36m[36m[36m[36m[36m[36m[36m[36m[36m[36m[36m[36m[36m[36m[36m[36m[36m[36m[36m[36m[36m[36m[36m[36m[36m[36m[36m[36m[36m[36m[36m[36m[36m[36m[36m[36m[36m[36m[36m[36m[36m[36m[36m[36m[36m[36m[36m[36m[36m[36m[36m[36m[36m[36m[36m[36m[36m[36m[36m[36m[36m[36m[36m[36m[36m[36m[36m[36m[36m[36m[36m[36m[36m[36m[36m[36m[36m[36m[36m[36m[36m[36m[36m[36m[36m[36m[36m[36m[36m[36m[36m[36m[36m[36m[36m[36m[36m[36m[36m[36m[36m[36m[36m

In [7]:
//load the model from file
word2vec.loadModel('wine-model.txt', function( err, model ) {
['blackberry','chocolate','tropical','mineral','green']//terms
.forEach((base)=>console.log(base + ': ' + 
//most similar function returns terms most similar to base
model.mostSimilar(base,10)
.map((term)=>term.word).join()));//show terms in list
});

blackberry: berry,raspberry,blueberry,blackberry,,boysenberry,black,dark-berry,strawberry,black-cherry,red-berry
chocolate: chocolate,,mocha,cocoa,licorice,coffee,carob,molasses,tobacco,cola,cedar
tropical: passion,kiwi,stone,pineapple,lychee,yellow,melon,mango,peachy,papaya
mineral: nervy,steely,minerality,flinty,minerally,mineral,,mineral-driven,lemon-zest,tangy,saline
green: bruised,sliced,cider,green,,fresh-cut,grassy,cucumber,gala,underripe,yellow
