# Putting Wine Data into MongoDB

In [39]:
%matplotlib inline

import datetime
import json
import pandas
import pymongo
import requests
import seaborn
import time

## Creating a class for the CellarWatch API

In [40]:
class CellarWatch(object):
    base_url = "https://www.cellar-watch.com"
    cookies = None
    headers = {
        "Accept": "application/json, text/javascript, */*",
        "X-Requested-With": "XMLHttpRequest",
        "Accept-Encoding": "gzip, deflate, sdch, br",
        "Accept-Language": "en-GB,en;q=0.8"
    }
    
    def __init__(self, cookies=None):
        self.cookies = cookies
    
    def _construct_get(self, endpoint, url_components):
        url = "/".join([self.base_url, endpoint])
        args = []
        for k, v in url_components.items():
            args.append("{k}={v}".format(k=k, v=v))
        return "{url}?{args}".format(url=url, args="&".join(args))
    
    def _get(self, url):
        resp = requests.get(url, headers=self.headers, cookies=self.cookies)
        rc = str(resp.status_code)
        if rc.startswith("4") or rc.startswith("5"):
            raise requests.exceptions.HTTPError("Got {rc}, expected 200.".format(rc=rc))
        else:
            return resp.json()
    
    def get_wine_price_history(self, lwin, vintage, up_to=None, name=None):
        """
        Args:
            lwin    (int): The Liv-Ex wine reference number (LWIN).
            vintage (int): The year the wine was harvested.
            up_to   (int): Milliseconds since 1970-01-01.
        Returns:
            dict: A dictionary containing price history and auction information.
        """
        def __tidy_up_block(block):
            wine_data = {
                "_id": "{lwin}-{vintage}".format(lwin=lwin, vintage=vintage),
                "name": name,
                "vintage": str(vintage),
                "lwin": str(lwin),
                "history": []
            }
            for data in block:
                wine_data["history"].append({
                    "date": datetime.datetime.fromtimestamp(data["date"] / 1000),
                    "price": data["value"]
                })
            return wine_data
        
        if up_to is None:
            up_to = int(time.time() * 1000)
            
        url = self._construct_get("chart/individualwinechartpage.do", {
            "_": int(time.time() * 1000),
            "ajaxReq": 1,
            "lwin": lwin,
            "vintage": vintage,
            "type": "max",
            "endTime": up_to
        })
        
        price_history = self._get(url)
        hist = { "auction": None, "market": None, "list": None }
        for block in price_history:
            if "name" not in block or "data" not in block:
                continue
            b = __tidy_up_block(block["data"])
            if "Auction" in block["name"]:
                hist["auction"] = b
            elif "Market" in block["name"]:
                hist["market"] = b
            elif "List" in block["name"]:
                hist["list"] = b
        
        return hist
    
    def get_lwins(self, name):
        """
        Args:
            name (str): The name of the wine (or vineyard) to search for. Alphanumeric only.
        Returns:
            list: A list of matching wines (with corresponding LWINs).
        """
        url = self._construct_get("autocompletewinenames.do",
            {
                "ajaxReq": 1,
                "term": name.replace(" ", "+")
            }
        )
        
        return sorted(self._get(url), key=lambda v: v["id"])

## Set up the CellarWatch API

In [60]:
# Define the wines and years we want
years = range(2004, 2016)
wines = [
    # Bordeaux
    {
        "name": "Haut Brion",
        "area": "Bordeaux",
        "lwin": 1011247
    },
    {
        "name": "Lafite Rothschild",
        "area": "Bordeaux",
        "lwin": 1011872
    },
    {
        "name": "Latour",
        "area": "Bordeaux",
        "lwin": 1012316
    },
    {
        "name": "Margaux",
        "area": "Bordeaux",
        "lwin": 1012781
    },
    {
        "name": "Mouton Rothschild",
        "area": "Bordeaux",
        "lwin": 1013544
    },
    # Burgundy
    {
        "name": "Rousseau, Chambertin",
        "area": "Burgundy",
        "lwin": 1057005
    },
    {
        "name": "Vogue, Musigny Vv",
        "area": "Burgundy",
        "lwin": 1026872
    },
    {
        "name": "Grivot, Clos Vougeot",
        "area": "Burgundy",
        "lwin": 1035580
    },
    {
        "name": "Lambrays, Clos Lambrays",
        "area": "Burgundy",
        "lwin": 1040290
    },
    {
        "name": "Ponsot, Clos Roche Vv",
        "area": "Burgundy",
        "lwin": 1051508
    },
    # Southern Rhone
    {
        "name": "Beaucastel, Chateauneuf Du Pape",
        "area": "Southern Rhone",
        "lwin": 1108387
    },
    {
        "name": "Clos Papes, Chateauneuf Du Pape",
        "area": "Southern Rhone",
        "lwin": 1110487
    },
    {
        "name": "Janasse, Chateauneuf Du Pape Vv",
        "area": "Southern Rhone",
        "lwin": 1113970
    },
    {
        "name": "Pegau, Chateauneuf Du Pape Reservee",
        "area": "Southern Rhone",
        "lwin": 1115118
    },
    {
        "name": "Vieux Telegraphe, Chateauneuf Du Pape",
        "area": "Southern Rhone",
        "lwin": 1118076
    },
    # Northern Rhone
    {
        "name": "Chapoutier, Ermitage Pavillon",
        "area": "Northern Rhone",
        "lwin": 1109704
    },
    {
        "name": "Domaine Jean Louis Chave, Hermitage",
        "area": "Northern Rhone",
        "lwin": 1110012
    },
    {
        "name": "Guigal, Cotes Du Rhone",
        "area": "Northern Rhone",
        "lwin": 1113101
    },
    {
        "name": "Jaboulet, Hermitage Chapelle",
        "area": "Northern Rhone",
        "lwin": 1113563
    },
    {
        "name": "Cote Rotie Ampuis",
        "area": "Northern Rhone",
        "lwin": 1113172
    }
]

# Copy the "JSESSIONID" cookie from a session created in a browser
cookies = {
    "JSESSIONID": "FAC0D4150D163DFBA6B3330A87088477"
}
c = CellarWatch(cookies=cookies)

# Create MongoDB client, set up database
client = pymongo.MongoClient("mongodb://group:group@ds029635.mlab.com:29635/fods-seven")
db = client["fods-seven"]


## Get data, load into MongoDB

In [62]:
#db.drop_collection("auction_prices")
#db.drop_collection("market_prices")
#db.drop_collection("list_prices")

In [63]:
def insert_prices(hist, collection):
    docs = []
    
    # transform fields
    ids = set()
    for i in range(0, len(hist["history"])):
        ts = str(hist["history"][i]["date"].timestamp()).split(".")[0]
        _id = str(hist["lwin"]) + "-" + str(hist["vintage"]) + "-" + ts
        
        # this just rewrites the IDs to something deterministic
        j = 1
        while True:
            final_id = _id + "-{}".format(j)
            if final_id in ids:
                j += 1
                continue
            else:
                ids.add(final_id)
                _id = final_id
                j = 1
                break
                
        docs.append({
            "_id": _id,
            "lwin": hist["lwin"],
            "name": hist["name"],
            "date": hist["history"][i]["date"],
            "price": hist["history"][i]["price"],
            "vintage": hist["vintage"]
        })

    collection.insert_many(docs)
    
for year in years:
    for wine in wines:        
        hist = c.get_wine_price_history(lwin=wine["lwin"], vintage=year, name=wine["name"])
        
        # auction_hist = hist["auction"]
        market_hist = hist["market"]
        # list_hist = hist["list"]
        
        #if auction_hist is not None:
        #    auction_hist["area"] = wine["area"]
        #    insert_prices(auction_hist, db.auction_prices)
        if market_hist is not None:
            market_hist["area"] = wine["area"]
            insert_prices(market_hist, db.prices)
        #if list_hist is not None:
        #    list_hist["area"] = wine["area"]
        #    insert_prices(list_hist, db.list_prices)