![Königsweg Logo](../img/koenigsweg_150.png)

<span style="font-size: small;float: right;">&copy; 2015-2017 Alexander C.S. Hendorf, <a href="http://koenigsweg.com">Königsweg GmbH</a>, Mannheim </span>

In [None]:
import numpy as np
import pandas as pd
import random

In [None]:
%run 'helpers.py'

# Data selection & Indexing

## Series

In [None]:
series = pd.Series([3, 62, 75, 83, 47, 43, 39, 16, 19, 2])

In [None]:
series

### Access by Position / Slice

In [None]:
series[0]

In [None]:
series[3:6]

In [None]:
# series[3:6]
series.iloc[3:6]
# note [] not ()!

### Access by label

In [None]:
# set alpha label as new index for the series
series.index = [x for x in "ABCDEFGHIJKLMNOPQRSTUVWXYZ"][:len(series)]

In [None]:
series

In [None]:
series[3:6]
# position, pythonic

In [None]:
series['D':'F']
# by label: slice includes end! 

In [None]:
series[['D':'F', 'I':'J']]
# cannot combine multiple ranges

In [None]:
pd.concat([series['D':'F'], series['I':'J']])
# concat to combine multiple ranges

In [None]:
# set alpha label as new index for the series
series.index = [x for x in "GATTACAXYZ"][:len(series)]

In [None]:
series

In [None]:
series.loc['G']

In [None]:
series.loc['G':'A']
# non-unique values breaks slicing

In [None]:
series.loc['X':'Z']
# while unique values are still slicable in a non-unique index

## DataFrames, 2D Data

In [None]:
df = pd.read_json('../data/sampledf.json')

In [None]:
df

In [None]:
# visualisation of below - for presentation
display(df.style.apply(highlight, subset=pd.IndexSlice[:, 2]))

# column
df[2]

In [None]:
# visualisation of below - for presentation
display(df.style.apply(highlight, subset=pd.IndexSlice[range(2, 4), :]))

# column
df[2:4]

In [None]:
# visualisation of below - for presentation
display(df.style.apply(highlight, subset=pd.IndexSlice[range(2, 4), range(2, 4)]))


# segment
df.iloc[2:4, 2:4]

In [None]:
# visualisation of below - for presentation
display(df.style.apply(highlight, subset=pd.IndexSlice[:, range(2, 4)]))

# column slice
df.iloc[:, 2:4]

In [None]:
df

In [None]:
df.index = ["R{:02d}".format(i) for i in range(len(df))]

In [None]:
df.columns = ["C{:02d}".format(i) for i in range(len(df.columns))]

In [None]:
df

In [None]:
# visualisation of below - for presentation
display(df.style.apply(highlight, subset=pd.IndexSlice[:, 'C05']))

df['C05']

In [None]:
# visualisation of below - for presentation
display(df.style.apply(highlight, subset=pd.IndexSlice['R02':'R05', :]))


df['R02':'R05']

In [None]:
# visualisation of below - for presentation
display(df.style.apply(highlight, subset=pd.IndexSlice['R02':'R05', 'C04':'C05']))


# segment
df.loc['R02':'R05', 'C04':'C05']

### Excercise

In [None]:
sales_data = pd.read_excel('../data/blooth_sales_data_clean.xlsx')
sales_data.head(5)

In [None]:
sales_data.info()

Select columns two to four (three columsn in total)

In [None]:
# Your code here


In [None]:
# load solution
%load ../solutions/201.py

Select the columns *birthday and name* (together)

In [None]:
# Your code here


In [None]:
# load solution
%load ../solutions/202.py

Select the rows 2 to 4 (three rows)

In [None]:
# Your code here


In [None]:
# load solution
%load ../solutions/203.py

Select the rows 55, 77

In [None]:
# Your code here


In [None]:
# load solution
%load ../solutions/204.py

## Boolean Index

A boolean index is an array of true/false values: [1, 0, 1, 1, 0, 0, 1, …]

! though the index name it's not one of the Pandas Index Types.

In [None]:
df['C04']

In [None]:
df['C04'] > 60

In [None]:
df[df['C04'] > 60]

In [None]:
df[(df['C04'] < 60) | (df['C04'] > 80)]  # multiple OR

In [None]:
df[(df['C04'] < 60) & (df['C04'] % 2 == 0)]  # multiple AND

### Excercise

In [None]:
sales_data = pd.read_excel('../data/blooth_sales_data_clean.xlsx')
sales_data.head(5)

In [None]:
sales_data.info(5)

Find all rows with exactly 50 units

In [None]:
# Your code here


In [None]:
# load solution
%load ../solutions/205.py

Find all rows with exactly 50 playstations

In [None]:
# Your code here


## filter

Filter by label or index

In [58]:
df.filter(like='1')  # , axis=1 per default

Unnamed: 0,C01
R00,19
R01,39
R02,64
R03,61
R04,60
R05,26
R06,29
R07,32
R08,53
R09,74


In [58]:
df.filter(regex='.0[2-4]', axis=0)

Unnamed: 0,C01
R00,19
R01,39
R02,64
R03,61
R04,60
R05,26
R06,29
R07,32
R08,53
R09,74


### Transpose with .T

In [None]:
df.iloc[2:3]

In [None]:
df.iloc[2:3].T

### Formatting with Styler

In [None]:
df = pd.read_json('../data/sampledf.json')
df

In [None]:
df.style.highlight_min()

In [None]:
def odd_or_even(data):
    return [('background-color: green; color:white;' if x%2==0 else 'background-color: orange') 
            for x in data]
df.style.apply(odd_or_even)