# Putting Wine Data into MongoDB

In [50]:
%matplotlib inline

import datetime
import json
import numpy
import pandas
import pymongo
import requests
import time

In [2]:
# Define the wines and years we want
years = range(2008, 2013)
wines = [
    # Bordeaux
    {
        "name": "Haut Brion",
        "area": "Bordeaux",
        "lwin": 1011247
    },
    {
        "name": "Lafite Rothschild",
        "area": "Bordeaux",
        "lwin": 1011872
    },
    {
        "name": "Latour",
        "area": "Bordeaux",
        "lwin": 1012316
    },
    {
        "name": "Margaux",
        "area": "Bordeaux",
        "lwin": 1012781
    },
    {
        "name": "Mouton Rothschild",
        "area": "Bordeaux",
        "lwin": 1013544
    },
    # Burgundy
    {
        "name": "Rousseau, Chambertin",
        "area": "Burgundy",
        "lwin": 1057005
    },
    {
        "name": "Vogue, Musigny Vv",
        "area": "Burgundy",
        "lwin": 1026872
    },
    {
        "name": "Grivot, Clos Vougeot",
        "area": "Burgundy",
        "lwin": 1035580
    },
    {
        "name": "Lambrays, Clos Lambrays",
        "area": "Burgundy",
        "lwin": 1040290
    },
    {
        "name": "Ponsot, Clos Roche Vv",
        "area": "Burgundy",
        "lwin": 1051508
    },
    # Southern Rhone
    {
        "name": "Beaucastel, Chateauneuf Du Pape",
        "area": "Southern Rhone",
        "lwin": 1108387
    },
    {
        "name": "Clos Papes, Chateauneuf Du Pape",
        "area": "Southern Rhone",
        "lwin": 1110487
    },
    {
        "name": "Janasse, Chateauneuf Du Pape Vv",
        "area": "Southern Rhone",
        "lwin": 1113970
    },
    {
        "name": "Pegau, Chateauneuf Du Pape Reservee",
        "area": "Southern Rhone",
        "lwin": 1115118
    },
    {
        "name": "Vieux Telegraphe, Chateauneuf Du Pape",
        "area": "Southern Rhone",
        "lwin": 1118076
    },
    # Northern Rhone
    {
        "name": "Chapoutier, Ermitage Pavillon",
        "area": "Northern Rhone",
        "lwin": 1109704
    },
    {
        "name": "Domaine Jean Louis Chave, Hermitage",
        "area": "Northern Rhone",
        "lwin": 1110012
    },
    {
        "name": "Guigal, Cotes Du Rhone",
        "area": "Northern Rhone",
        "lwin": 1113101
    },
    {
        "name": "Jaboulet, Hermitage Chapelle",
        "area": "Northern Rhone",
        "lwin": 1113563
    },
    {
        "name": "Cote Rotie Ampuis",
        "area": "Northern Rhone",
        "lwin": 1113172
    }
]

# Create MongoDB client, set up database
client = pymongo.MongoClient("mongodb://group:group@ds029635.mlab.com:29635/fods-seven")
db = client["fods-seven"]

In [78]:
prices = db["prices"]
reviews = db["reviews"]

price_increases = []
price_losses = []

NUM_AVG = 1
for wine in wines:
    wine_reviews = list(
        reviews.find({
                "name": wine["name"],
                "year": {
                    "$gt": 2001,
                    "$lt": 2013
                }
            }).sort("year", pymongo.ASCENDING))
    
    for vintage in wine_reviews:
        for review in vintage["reviews"]:
            if "robert parker" in review["reviewer"]["name"].lower():                
                before_prices = [
                    float(x["price"]) for x in prices.find({
                        "name": wine["name"],
                        "vintage": str(vintage["year"]),
                        "date": {
                            "$lt": review["date"]
                        }
                    }).sort("date", -1).limit(NUM_AVG)
                ]
                
                after_prices = [
                    float(x["price"]) for x in prices.find({
                        "name": wine["name"],
                        "vintage": str(vintage["year"]),
                        "date": {
                            "$gt": review["date"]
                        }
                    }).sort("date", 1).limit(NUM_AVG)
                ]
                
                if len(before_prices) == NUM_AVG and len(after_prices) == NUM_AVG:
                    avg_before = numpy.mean(before_prices)
                    avg_after = numpy.mean(after_prices)

                    increase = ((avg_after - avg_before) / avg_before) * 100
                    loss = ((avg_before - avg_after) / avg_before) * 100

                    print(vintage["name"], vintage["year"], review["rating"]["score"], increase)
                    
                    price_increases.append(increase)
                    price_losses.append(loss)

Haut Brion 2005 100.0 13.8924731183
Haut Brion 2006 96.0 -1.85185185185
Haut Brion 2008 96.0 -5.90740740741
Haut Brion 2009 100.0 4.37037037037
Haut Brion 2010 100.0 0.0
Haut Brion 2011 95.0 7.27272727273
Haut Brion 2012 98.0 7.77777777778
Lafite Rothschild 2004 95.0 2.96836982968
Lafite Rothschild 2005 95.0 0.0
Lafite Rothschild 2006 97.0 -2.94117647059
Lafite Rothschild 2008 98.0 -8.89421382761
Lafite Rothschild 2009 99.0 -0.3879626717
Lafite Rothschild 2010 98.0 12.4347476911
Latour 2004 95.0 11.042047532
Latour 2005 98.0 2.20588235294
Latour 2006 95.0 -0.954653937947
Latour 2008 96.0 -3.15789473684
Latour 2009 100.0 -0.699851281603
Latour 2010 100.0 6.66666666667
Margaux 2005 98.0 1.85185185185
Margaux 2009 99.0 -6.17610062893
Margaux 2010 99.0 6.0922681241
Margaux 2012 95.0 -4.46841294299
Mouton Rothschild 2005 97.0 -0.533333333333
Mouton Rothschild 2006 96.0 3.125
Mouton Rothschild 2009 99.0 -4.68085106383
Mouton Rothschild 2010 97.0 -4.0
Rousseau, Chambertin 2005 98.0 -4.8611111

In [84]:
"Max increase: {}%, max loss: {}%".format(max(price_increases), max(price_losses))

'Max increase: 48.54294478527607%, max loss: 25.0%'