In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

## Import the data

In [None]:
reviews = pd.read_csv('../input/winemag-data-130k-v2.csv')

## View the data

In [None]:
reviews.head()

### View the distribution of review points

In [None]:
reviews.describe().T

In [None]:
plt.subplots(figsize=(20,10))
sns.countplot('points', data=reviews)
sns.set(font_scale=2)
plt.title('Points Count')
plt.xlabel('Points')
plt.ylabel("Count")

In [None]:
sns.kdeplot(reviews['points'])
plt.title('Points Count')
plt.xlabel('Points')
plt.ylabel("Count")

### Visualizing price vs review points

In [None]:
sns.lmplot('points','price', data=reviews)

### Getting review count per country

In [None]:
rev_count_df = pd.DataFrame(reviews.groupby(['country'])['taster_name'].count())

In [None]:
rev_count_df.head(10)

In [None]:
rev_count_df.describe()

In [None]:
rev_count_df.sort_values(by='taster_name', ascending=False, inplace=True)

In [None]:
rev_count_df.head(10)

In [None]:
top_reviewed = rev_count_df.iloc[:10,:]

In [None]:
top_reviewed

### Visualizing review totals for top 10 countries

In [None]:
plt.subplots(figsize=(20,10))
sns.set(font_scale=2)
r_count = sns.barplot(top_reviewed['taster_name'],top_reviewed.index, data=top_reviewed, palette='winter')
r_count.set_title('Review Count by Country (Top 10)', fontsize=30)
r_count.set_xlabel('Review Count', fontsize=30)
r_count.set_ylabel('Country', fontsize=30)


In [None]:
reviews.head()

In [None]:
prices_df = reviews[['country', 'price']]

In [None]:
prices_before = prices_df['price'].count()
prices_df = prices_df.dropna()
prices_after = prices_df['price'].count()

In [None]:
prices_before - prices_after

In [None]:
prices_df.head()

### Average pricing of wine for country

In [None]:
avg_price = pd.DataFrame(prices_df.groupby('country')['price'].mean())
avg_price.sort_values(by='price', ascending=False, inplace=True)

In [None]:
avg_price.head(10)

In [None]:
top_avg = avg_price.iloc[:10,:]

### Visualizing avg price of wine for top 10 countries

In [None]:
plt.subplots(figsize=(20,10))
sns.set(font_scale=2)
wine_avg = sns.barplot(top_avg['price'],top_avg.index, data=top_avg, palette='summer')
wine_avg.set_title('Avg Price by Country (Top 10)', fontsize=30)
wine_avg.set_xlabel('Avg Price', fontsize=30)
wine_avg.set_ylabel('Country', fontsize=30)

In [None]:
reviews.head()

In [None]:
wine_variety = reviews[['country', 'variety']]

In [None]:
wine_variety = wine_variety.dropna()

In [None]:
wine_variety.head()

In [None]:
variety_count = pd.DataFrame(wine_variety.groupby('variety')['country'].count())

In [None]:
variety_count.head()

In [None]:
variety_count.sort_values(by='country', ascending=False, inplace=True)
#sns.barplot(variety_count['country'], variety_count.index, data=variety_count, palette='GnBu_d')

In [None]:
top_variety = variety_count.iloc[:10,:]

In [None]:
top_variety.columns = ['Count']
top_variety.head(10)

In [None]:
plt.subplots(figsize=(20,10))
var_count = sns.barplot(top_variety.Count, top_variety.index, palette='cubehelix')
sns.set(font_scale=2)
var_count.set_title('Popular Wine Varieties (Top 10)', fontsize=30)
var_count.set_xlabel('Variety Count', fontsize=30)
var_count.set_ylabel('Varieties', fontsize=30)