# Putting Wine Data into MongoDB

In [None]:
import datetime
import json
import pymongo
import requests
import time

## Creating a class for the CellarWatch API

In [None]:
class CellarWatch(object):
    base_url = "https://www.cellar-watch.com"
    cookies = None
    headers = {
        "Accept": "application/json, text/javascript, */*",
        "X-Requested-With": "XMLHttpRequest",
        "Accept-Encoding": "gzip, deflate, sdch, br",
        "Accept-Language": "en-GB,en;q=0.8"
    }
    
    def __init__(self, cookies=None):
        self.cookies = cookies
    
    def _construct_get(self, endpoint, url_components):
        url = "/".join([self.base_url, endpoint])
        args = []
        for k, v in url_components.items():
            args.append("{k}={v}".format(k=k, v=v))
        return "{url}?{args}".format(url=url, args="&".join(args))
    
    def _get(self, url):
        resp = requests.get(url, headers=self.headers, cookies=self.cookies)
        rc = str(resp.status_code)
        if rc.startswith("4") or rc.startswith("5"):
            raise requests.exceptions.HTTPError("Got {rc}, expected 200.".format(rc=rc))
        else:
            return resp.json()
    
    def get_wine_price_history(self, lwin, vintage, up_to=None, name=None):
        """
        Args:
            lwin    (int): The Liv-Ex wine reference number (LWIN).
            vintage (int): The year the wine was harvested.
            up_to   (int): Milliseconds since 1970-01-01.
        Returns:
            dict: A dictionary containing price history and auction information.
        """
        def __tidy_up_block(block):
            new_block = []
            for data in block:
                new_block.append({
                    "date": datetime.datetime.fromtimestamp(data["date"] / 1000),
                    "price": data["value"]
                })
            return new_block
        
        if up_to is None:
            up_to = int(time.time() * 1000)
            
        url = self._construct_get("chart/individualwinechartpage.do", {
            "_": int(time.time() * 1000),
            "ajaxReq": 1,
            "lwin": lwin,
            "vintage": vintage,
            "type": "max",
            "endTime": up_to
        })
        
        price_history = self._get(url)
        hist = {
            "auction": None,
            "market": None,
            "list": None
        }
        for block in price_history:
            if "name" not in block or "data" not in block:
                continue
                
            b = __tidy_up_block(block["data"])
            if "Auction" in block["name"]:
                hist["auction"] = b
            elif "Market" in block["name"]:
                hist["market"] = b
            elif "List" in block["name"]:
                hist["list"] = b

        return {
            "_id": "{lwin}-{vintage}".format(lwin=lwin, vintage=vintage),
            "name": name,
            "vintage": vintage,
            "lwin": lwin,
            "history": hist
        }
    
    def get_lwins(self, name):
        """
        Args:
            name (str): The name of the wine (or vineyard) to search for. Alphanumeric only.
        Returns:
            list: A list of matching wines (with corresponding LWINs).
        """
        url = self._construct_get("autocompletewinenames.do",
            {
                "ajaxReq": 1,
                "term": name.replace(" ", "+")
            }
        )
        
        return sorted(self._get(url), key=lambda v: v["id"])

## Set up the CellarWatch API

In [None]:
# Define the wines and years we want
years = range(2004, 2016)
wines = [
    {
        "name": "Haut Brion",
        "lwin": 1011247
    },
    {
        "name": "Lafite Rothschild",
        "lwin": 1011872
    },
    {
        "name": "Latour",
        "lwin": 1012316
    },
    {
        "name": "Margaux",
        "lwin": 1012781
    },
    {
        "name": "Mouton Rothschild",
        "lwin": 1013544
    }
]

# Copy the cookies from a session created in a browser
cookies = {
    "X-Mapping-fjhppofk": None,
    "cellar-user": None,
    "JSESSIONID": None
}
c = CellarWatch(cookies=cookies)

# Create MongoDB client, set up database
client = pymongo.MongoClient("localhost:27017")
db = client.wine_prices
collection = db.bordeaux

## Get data, load into MongoDB

In [None]:
collection.drop()
for year in years:
    for wine in wines:
        price_history = c.get_wine_price_history(lwin=wine["lwin"], vintage=year, name=wine["name"])
        collection.insert_one(price_history)

## Explore data in MongoDB

In [None]:
collection.find_one({"_id": "1013544-2012"})  # Get data for the Mouton-Rothschild 2012 vintage