I love red wine - so I decided to play with this data set to create a new dataframe that consists of only red wines that rate a 92 or higher than a 92 and are below $20. 

In [None]:
#import appropriate libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')
%matplotlib inline

In [None]:
#read dataframe
wine_df=pd.read_csv("../input/winemag-data_first150k.csv")

In [None]:
#explore dataframe
wine_df.head()

In [None]:
#basic visualization of values
plt.xlabel('Point Values')
plt.ylabel('Count')
plt.title('Overview of Wine DF')
plt.hist(wine_df['points'],bins = 15, edgecolor = 'white')

In [None]:
#explore missing values
sns.heatmap(wine_df.isnull(),yticklabels=False,cbar=False,cmap='viridis')

In [None]:
#drop columns that are missing data
wine_df.drop('region_1',axis=1,inplace=True)

In [None]:
wine_df.drop('region_2',axis=1,inplace=True)

In [None]:
wine_df.drop('description',axis=1,inplace=True)

In [None]:
wine_df.drop('designation',axis=1,inplace=True)

In [None]:
#check out new dataframe structure
wine_df.head()

In [None]:
#create new columns to identify which are the best rated and less expensive wines
wine_df["cheaper"] = 'no'
wine_df["cheaper"][wine_df["price"]< 20.0] = 'yes'
wine_df["quality"] = 'no'
wine_df["quality"][wine_df["points"]> 91] = 'yes'

In [None]:
#create new dataframe with only quality, inexpensive wines
newdf= wine_df[(wine_df['cheaper']=="yes") & (wine_df['quality']=="yes")]

In [None]:
newdf.head()

In [None]:
#this graph shows that the US produces the most inexpensive higher quality wines 
#(if white wines are included)
#sns.countplot(x='country',data=newdf,)
ax = sns.countplot(x="country", data=newdf)
ax.set_xticklabels(ax.get_xticklabels(), rotation=40, ha="right")
plt.tight_layout()

In [None]:
#this graph shows that Sauv Blanc is the varietal that has the most value for your $
plt.figure(figsize=(10,6))
newdf['variety'].value_counts().plot(kind='bar', title = "Countries with highest rated low-cost wines(all varietals)")

In [None]:
#create new column that identifies which are red wines
#can someone do this with .loc to kill the error reports and still retain the all cases designation?

wine_df["reds"] = 'no'
wine_df["reds"][wine_df["variety"].str.contains("Red",case=False)]= 'yes'
wine_df["reds"][wine_df["variety"].str.contains("Cabernet",case=False)]= 'yes'
wine_df["reds"][wine_df["variety"].str.contains("Pinot Noir",case=False)]= 'yes'
wine_df["reds"][wine_df["variety"].str.contains("Syrah",case=False)]= 'yes'
wine_df["reds"][wine_df["variety"].str.contains("Malbec",case=False)]= 'yes'
wine_df["reds"][wine_df["variety"].str.contains("Sangiovese",case=False)]= 'yes'
wine_df["reds"][wine_df["variety"].str.contains("Merlot",case=False)]= 'yes'
wine_df["reds"][wine_df["variety"].str.contains("Grenache",case=False)]= 'yes'
wine_df["reds"][wine_df["variety"].str.contains("Shiraz",case=False)]= 'yes'
wine_df["reds"][wine_df["variety"].str.contains("Pinotage",case=False)]= 'yes'
wine_df["reds"][wine_df["variety"].str.contains("Monastrell",case=False)]= 'yes'
wine_df["reds"][wine_df["variety"].str.contains("Tempranillo",case=False)]= 'yes'
wine_df["reds"][wine_df["variety"].str.contains("Claret",case=False)]= 'yes'
wine_df["reds"][wine_df["variety"].str.contains("Mourvèdre",case=False)]= 'yes'
wine_df["reds"][wine_df["variety"].str.contains("Verdot",case=False)]= 'yes'
wine_df["reds"][wine_df["variety"].str.contains("Dolcetto",case=False)]= 'yes'
wine_df["reds"][wine_df["variety"].str.contains("Carmenère",case=False)]= 'yes'
wine_df["reds"][wine_df["variety"].str.contains("G-S-M",case=False)]= 'yes'


In [None]:
#creates a new data frame that lists only red quality, inexpensive wines
red_df = wine_df[(wine_df['cheaper']=="yes") & (wine_df['quality']=="yes") & (wine_df['reds']=="yes")]

In [None]:
red_df.head()

In [None]:
#this graph shows that the best inexpensive red wines come primarily from Portugal. 
#sns.countplot(x='variety',data=red_df)
red_df['country'].value_counts().plot(kind='bar', title='Countries with the highest rated, low cost red wines')

In [None]:
#this plot shows the varieties of reds that tend to be the best yet inexpensive
red_df['variety'].value_counts().plot(kind='bar', title='Number of Red Varietals 92+ Rating')

In [None]:
#here is the complete dataframe that lists the best red wines that are under $20
#red_df.drop(['designation'],axis=1, inplace=True)

In [None]:
red_df

If you also like white wines, run the newdf and you will get red and white wines that are below $20 and are rated 92 and above.