In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import spearmanr
from scripts.utils import save_fig

%load_ext autoreload
%autoreload 2
plt.rcParams["figure.figsize"] = (12, 4)

# WA Primeur Ratings

A noter: Clinet, La Conseillante, Ducru-Beaucaillou

In [None]:
ratings = pd.read_excel('data/ratings/ratings.xlsx')
ratings['Rating'] = (ratings['Rating - LB'] + ratings['Rating - HB'])/2

#Select ratings up to 2017
ratings = ratings[ratings['Vintage'] <= 2017]

# RP Ratings vs Price

In [None]:
wines_corpus = ratings['Chateau'].unique()

In [None]:
prices = pd.read_excel("data/prices/prices_feb_2022.xlsx", header=0, index_col=0)
prices = prices.drop([2018, 2019, 2020], axis=1)
prices = prices.reset_index().groupby(["index", "Appellation"]).first()
prices.index.names = ["Vineyard", "Appellation"]
prices = (
    prices.transpose()
    .reset_index()
    .melt(id_vars="index")
    .rename(columns={"index": "Vintage", "value": "Price"})
)
prices["Price"] = np.log(prices["Price"])

In [None]:
wines_corpus = [
    "Château Angélus",
    "Château Ausone",
    "Château Cheval Blanc",
    "Château Figeac",
    "Château Haut-Brion",
    "Château L'Evangile",
    "Château La Mission Haut-Brion",
    "Château Lafite Rothschild",
    "Château Léoville Las Cases",
    "Château Margaux",
    "Château Palmer",
    "Château Pape Clément",
    "Château Pavie",
    "Château Trotanoy",
    "Château Mouton Rothschild",
    "Château Trotte Vieille",
    "Petrus",
    'Château Montrose',
    "Vieux Château Certan"
]

In [None]:
BEGIN_YEAR = 1994
END_YEAR = 2013
corrs = pd.DataFrame(index=wines_corpus, columns=["Corr"])
for vineyard in wines_corpus:
    vineyard_prices = (
        prices[prices["Vineyard"] == vineyard]
        .groupby("Vintage")[["Price"]]
        .mean()
        .interpolate()
    )
    vineyard_ratings = ratings.loc[
        ratings["Chateau"] == vineyard, ["Vintage", "Rating"]
    ].set_index("Vintage")
    selected_ratings = vineyard_ratings.loc[BEGIN_YEAR:END_YEAR]
    selected_prices = vineyard_prices.loc[BEGIN_YEAR:END_YEAR]
    corrs.loc[vineyard, "Corr"] = spearmanr(selected_ratings, selected_prices)[0]
corrs["Corr"] = pd.to_numeric(corrs["Corr"])
corrs = corrs.sort_index()
display(corrs)
corrs.to_excel("data/rp_ratings_prices_spearman_correlations.xlsx")
display(
    f"Median: {round(corrs.median().values[0], 3)}, mean {round(corrs.mean().values[0], 3)}"
)

## Evolution of correlations through time

In [None]:
labels = []
width = 10
interval_starts = list(range(2017 - 1994 - width + 1))
correlations = pd.DataFrame(columns=wines_corpus)
for i in interval_starts:
    begin = 1994 + i
    end = begin + width
    corr = []
    for vineyard in wines_corpus:
        price = prices.loc[
            prices["Vineyard"] == vineyard, ["Vintage", "Price"]
        ].set_index("Vintage")
        rating = ratings.loc[
            ratings["Chateau"] == vineyard, ["Vintage", "Rating"]
        ].set_index("Vintage")
        concat = (
            pd.concat([price, rating], axis=1).dropna(how="any", axis=0).loc[begin:end]
        )
        correlations.loc[begin, vineyard] = round(
            spearmanr(concat.iloc[:, 0], concat.iloc[:, 1])[0], 3
        )
    labels.append(str(begin) + "-" + str(end))

In [None]:
correlations = correlations.apply(pd.to_numeric).reset_index().rename(columns={'index':'Vintage'})
correlations = correlations.melt(id_vars='Vintage', var_name='Chateau', value_name='Correlation')

In [None]:
plt.figure(figsize=(6, 4.5))
font = {"weight": "regular", "family": "serif", "size": 16}
plt.rc("font", **font)
plt.rc("xtick", labelsize="13")
plt.rc("ytick", labelsize="14")
plt.grid(True, which="major", alpha=0.5)

PROPS = {
    "boxprops": {"facecolor": "lightgrey", "edgecolor": "k"},
    "medianprops": {"color": "k"},
    "whiskerprops": {"color": "k"},
    "capprops": {"color": "k"},
}
sns.boxplot(data=correlations, x="Vintage", y="Correlation", **PROPS, width=0.6)
ax = plt.gca()
ax.set_xticklabels(labels, rotation=75)
plt.ylim((0, 1))
ax.set_ylabel("Spearman correlation", labelpad=10)
ax.set_xlabel("Vintage groups", labelpad=10)
ax.set_axisbelow(True)
plt.tight_layout()
save_fig("views/ratings_corr_through_time", width_column="single")
plt.show()

### Mann Kendall Tau test

In [None]:
from scipy.stats import kendalltau

kendalltau(correlations['Vintage'], correlations['Correlation'])

---
# End of notebook
