In [None]:
import pandas as pd
import numpy as np

reviews = pd.read_csv("../input/wine-reviews/winemag-data-130k-v2.csv", index_col=0)
pd.set_option("display.max_rows", 5)

reviews.groupby('points').points.count()
#output:
#points
#80     397
#81     692
#      ... 
#99      33
#100     19
#Name: points, Length: 21, dtype: int64

#groupby() -> created group of reviews; alloted same points values to the given wines
#grabbed points column, then counted how many times those points appeared using count()


reviews.groupby('points').price.min()
#output: table showing points from 80-100, with the minimum price of the wines in the group of said points
#e.g. prices of 80 points are [20,5,13,25] -> min shows the lowest for that group


#to select the name of the first wine reviewed from each winery
reviews.groupby('winery').apply(lambda df: df.title.iloc[0])
#puts all wines into groups by winery
#applies function which takes the name of the first wine in the group
#remember: iloc[row, col]


reviews.groupby(['country', 'province']).apply(lambda df: df.loc[df.points.idxmax()])
#pick out best wine by country and province
#groups by country and then provinces alphabetically
#lambda function shows the best wine in that country/province (most points)


#agg() -> runs different functions on the dataframe
#e.g. len, min, max
reviews.groupby(['country']).price.agg([len,min,max])
#agg can take default or custom functions
#output returns table sorted by country, with number of prices (len), min price, and max price



#Multi-indexes

#multi-index differs from regular index by having multiple levels
countries_reviewed = reviews.groupby(['country', 'privince']).description.agg([len])
countries_reviewed
#output: table grouping alphabetically by country/province, with one column showing number of wines surveyed in said place

mi = countries_reviewed,index
type(mi)
#output: pandas.core.indexes.multi.MultiIndex
#MIs require 2 levels to retrieve a value
#most common one is reset_index()

countries_reviewed.reset_index()
#back to normal 0-n index, however, rows are still sorted by country/province



#Sorting

#groupby -> order of rows is dependent on the values in the index, not the data
#order data -> sort_values()

countries_reviewed = countries_reviewed.reset_index()
countries_reviewed.sort_values(by='len')

#sort_vaues() defaukts to ascending - owest comes first
#output is table where rows are ordered by len (number of wines reviewed in an area)

#for descending:
countries_reviewed.sort_values(by='len', ascending=False)

#sort by index values -> sort_index()

countries_reviewed.sort_index()

#output table orders rows by index 

countries_reviewed.sort_values(by=['country', 'len'])

#output table sorts rows by country first, and then by len of the different rows
#e.g argentina - other - len=536 | argentina - mendoza - len=3264 ...
