In [None]:
import pandas as pd
reviews = pd.read_csv("../input/wine-reviews/winemag-data-130k-v2.csv", index_col=0)
pd.set_option('display.max_rows', 5)

In [None]:
reviews.shape

In [None]:
# You can access columns like attributes on an object
reviews.country

In [None]:
# or using indexing
reviews['country']

In [None]:
# Down to a single value

reviews.country[0]


## Index-based selection

Selects data based on its numerical position iloc follows this paradigm. This are row first, colum second selection.


In [None]:
reviews.iloc[0]

In [None]:
reviews.iloc[:, 0]

In [None]:
# It fully supports the slicing operator
reviews.iloc[:3,0]

In [None]:
reviews.iloc[1:3, 0]

In [None]:
# Last five rows of a data set
reviews.iloc[-5:]

## Label based selection

loc uses a label-based selection. In it the data index value, not its position matters.

In [None]:
reviews.loc[0, 'country']

In [None]:
reviews.loc[:, ['taster_name', 'taster_twitter_handle', 'points']]

## Differences between loc and iloc

Gotcha loc contains the last entry in the range specified! So 0-10 would give you 11 records:

In [None]:
reviews.loc[0:3].shape == reviews[0:3].shape #lolz
reviews.loc[0:3].shape == reviews.iloc[0:3].shape # double lolz

In [None]:
reviews['title']

In [None]:
# Indices are not immutable and we can set a new index based on a different column of the DataFrame
reviews.set_index('title')

In [None]:
reviews.country == 'Italy'

In [None]:
reviews.country

In [None]:
reviews.loc[reviews.country == 'Italy']

In [None]:
# How many reviews per country
reviews.groupby('country').count()

In [None]:
# all wines from Italy that scored well
reviews.loc[( reviews.country == 'Italy') & (reviews.points >= 90)]

In [None]:
reviews.loc[reviews.country.isin(['Italy', 'France'])]

In [None]:
# wines lacking a price
reviews.loc[reviews.price.notnull()]

## Assigning Data

In [None]:
reviews.shape

In [None]:
# Set up symmetrical index in reverse
reviews['index_backwards'] = range(len(reviews) - 1, -1,-1)

In [None]:
reviews.shape

In [None]:
reviews.iloc[-1].index_backwards == 0

In [None]:
reviews.head()

In [None]:
desc = reviews.description

In [None]:
type(desc)

In [None]:
a_smaller_df = reviews.loc[:, ['description', 'country']]

In [None]:
type(a_smaller_df)

In [None]:
first_description = reviews.description[0]

In [None]:
first_description

In [None]:
first_row = reviews.iloc[0]

In [None]:
first_row

In [None]:
first_descriptions = reviews.loc[[1,2,3,5,8]]

In [None]:
first_descriptions

In [None]:
first_descriptions = reviews.description[0:10]

In [None]:
first_descriptions

In [None]:
sample_reviews = reviews.loc[ [1,2,3,5,8] ]

In [None]:
sample_reviews

In [None]:
df = reviews.loc[[0,1,10,100], ['country', 'province', 'region_1', 'region_2']]

In [None]:
df

In [None]:
df = reviews.loc[:100 - 1, ['country', 'variety']]

In [None]:
italian_wines = reviews.loc[reviews.country == 'Italy']

In [None]:
italian_wines

In [None]:
oceania_wines = reviews.loc[reviews.country.isin(['Australia', 'New Zealand'])]
top_oceania_wines = oceania_wines.loc[reviews.points >= 95]

In [None]:
top_oceania_wines