## Setup

In [1]:
import pandas as pd
import numpy as np
import datetime

# turn of data table rendering (of the 90s)
pd.set_option('display.notebook_repr_html', False)   

## Creating a DataFrame

In [2]:
# Collection of data columns
s1 = np.random.randn(5)
s2 = [True, True, False, True, False]
s3 = ['Apple', 'Banana', 'Tomato', 'Bean', 'Rice']

# Dict with added column names
data = {'Randnum': s1, 'IsBool': s2, 'Name': s3}
df = pd.DataFrame(data)
df

    Randnum  IsBool    Name
0  0.177730    True   Apple
1  0.136812    True  Banana
2  0.330615   False  Tomato
3 -1.694660    True    Bean
4 -0.471962   False    Rice

In [4]:
print(df.index)
print(df.columns)

RangeIndex(start=0, stop=5, step=1)
Index(['Randnum', 'IsBool', 'Name'], dtype='object')


In [5]:
# Create a new column and assign it all 127
df['New'] = 127
df

    Randnum  IsBool    Name  New
0  0.177730    True   Apple  127
1  0.136812    True  Banana  127
2  0.330615   False  Tomato  127
3 -1.694660    True    Bean  127
4 -0.471962   False    Rice  127

## Data retrieval

In [6]:
# Select the Randnum column
df.Randnum

0    0.177730
1    0.136812
2    0.330615
3   -1.694660
4   -0.471962
Name: Randnum, dtype: float64

In [7]:
# Select the first three rows
df[:3]

    Randnum  IsBool    Name  New
0  0.177730    True   Apple  127
1  0.136812    True  Banana  127
2  0.330615   False  Tomato  127

In [9]:
# Select the second row
df.iloc[1]

Randnum    0.136812
IsBool         True
Name         Banana
New             127
Name: 1, dtype: object

In [15]:
# Select the Randnum value of the second row
print(df.iloc[1, 2]) # or
print(df.iloc[1, [2,3]])

Banana
Name    Banana
New        127
Name: 1, dtype: object


In [19]:
# Selecting specific rows and columns
df.loc[[0, 2, 6], ['Name', 'Randnum', 'Unknown']]

Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the documentation here:
https://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike
  


     Name   Randnum  Unknown
0   Apple  0.177730      NaN
2  Tomato  0.330615      NaN
6     NaN       NaN      NaN

## Conditions

In [20]:
# Retrieve boolean Series, True if Randnum is smaller than zero
belowzero = df.Randnum < 0
belowzero

0    False
1    False
2    False
3     True
4     True
Name: Randnum, dtype: bool

In [21]:
# Selects all rows meeting the belowzero condition
df[belowzero]

    Randnum  IsBool  Name  New
3 -1.694660    True  Bean  127
4 -0.471962   False  Rice  127

In [22]:
# Retrieve boolean Series, True if Randnum is smaller than zero
isapple = df['Name'] == 'Apple'
isapple

0     True
1    False
2    False
3    False
4    False
Name: Name, dtype: bool

In [23]:
# Select belowzero AND isapple conditions
df[belowzero & isapple]

Empty DataFrame
Columns: [Randnum, IsBool, Name, New]
Index: []

In [24]:
# Select belowzero OR isapple conditions
df[belowzero | isapple]

    Randnum  IsBool   Name  New
0  0.177730    True  Apple  127
3 -1.694660    True   Bean  127
4 -0.471962   False   Rice  127

## Date range as an index

In [25]:
# Set the index to a date range
df.index = pd.date_range('1-1-2015', periods=5, freq='d')
df.index.name = 'Date'
df

             Randnum  IsBool    Name  New
Date                                     
2015-01-01  0.177730    True   Apple  127
2015-01-02  0.136812    True  Banana  127
2015-01-03  0.330615   False  Tomato  127
2015-01-04 -1.694660    True    Bean  127
2015-01-05 -0.471962   False    Rice  127

## Nested dictionary to DataFrame

In [26]:
# Create a nested dictionary of equal inner value-count
data = {'Paris': {'N': 1.2, 'E': 4, 'S': 2.9, 'W': 0.8},
        'Amsterdam': {'N': 2.3, 'E': 1.7, 'S': 2.1, 'W': 7.2},
        'London': {'N': 9.7, 'E': 3.1, 'S': 7.2, 'W': 2}}

df2 = pd.DataFrame(data)
df2

   Paris  Amsterdam  London
E    4.0        1.7     3.1
N    1.2        2.3     9.7
S    2.9        2.1     7.2
W    0.8        7.2     2.0