In [2]:
import pandas as pd
import numpy as np

class color:
    BOLD = '\033[1m'
    CYAN = '\033[96m'

In [3]:
# Create an array with the last day of each month
dates = pd.date_range(start='1/1/2018', periods=6, freq='M')

# Create a DataFrame with 12 rows and 4 columns
df = pd.DataFrame(np.random.randn(6, 4),
                 index=dates,
                 columns=['A','B','C','D'])

print(color.BOLD, df)

[1m                    A         B         C         D
2018-01-31  0.758382 -0.246323 -1.019489 -0.425718
2018-02-28  0.486131  0.831800 -0.563784 -0.335895
2018-03-31 -0.456077 -0.083808 -0.197743 -0.209686
2018-04-30 -0.144154  1.254693  1.997075  1.704234
2018-05-31 -0.971981  0.622208 -0.017082  0.350343
2018-06-30  0.025314  0.329782 -0.643223  0.491544


### Indexing using <code>[]</code> :

In [4]:
# Assign column A to column_a
column_a = df['A']
# Select the third row (dates[2] of column_a)
print(column_a[dates[2]])

-0.4560766296272373


In [5]:
# Change columns
df[['B', 'A']] = df[['A', 'B']]

#Print 2 columns using a named index
print(df[['A', 'B']])

#Print 2 columns using numerically index
print(df.iloc[:,0:2])

                   A         B
2018-01-31 -0.246323  0.758382
2018-02-28  0.831800  0.486131
2018-03-31 -0.083808 -0.456077
2018-04-30  1.254693 -0.144154
2018-05-31  0.622208 -0.971981
2018-06-30  0.329782  0.025314
                   A         B
2018-01-31 -0.246323  0.758382
2018-02-28  0.831800  0.486131
2018-03-31 -0.083808 -0.456077
2018-04-30  1.254693 -0.144154
2018-05-31  0.622208 -0.971981
2018-06-30  0.329782  0.025314


### Indexing using attribute access <code>.</code> :

In [6]:
# Create a Series
sa = pd.Series([1, 2, 3], index = list('abc'))

# Change the value of index 'A' to 5
sa.a = 5

print(sa,
      'Value on index \'b\': {}'.format( sa.b ),
      sep="\n\n")

a    5
b    2
c    3
dtype: int64

Value on index 'b': 2


In [7]:
df1 = df.copy()

# Change values of column A
df1.A = list(range(len(df.index)))

# Print columns A and B from df
print(df1.A, df1.B, sep="\n\n")

2018-01-31    0
2018-02-28    1
2018-03-31    2
2018-04-30    3
2018-05-31    4
2018-06-30    5
Freq: M, Name: A, dtype: int64

2018-01-31    0.758382
2018-02-28    0.486131
2018-03-31   -0.456077
2018-04-30   -0.144154
2018-05-31   -0.971981
2018-06-30    0.025314
Freq: M, Name: B, dtype: float64


### <code>.loc</code> & <code>.iloc</code> for indexing

.loc is for selecting things by label

<code>def.loc[row_index, columns_index]</code> #Both are inclusive

In [8]:
df.loc[:'2018-03-31', ['A','B','C']]

Unnamed: 0,A,B,C
2018-01-31,-0.246323,0.758382,-1.019489
2018-02-28,0.8318,0.486131,-0.563784
2018-03-31,-0.083808,-0.456077,-0.197743


.iloc is for selecting things by number

<code>def.iloc[row_index, columns_index]</code> # The first number is inclusive, the second is exclusive (e.g. 2:4 -> 2 is inclusive, 4 is exclusive)

In [9]:
df.iloc[:3, 1:3]

Unnamed: 0,B,C
2018-01-31,0.758382,-1.019489
2018-02-28,0.486131,-0.563784
2018-03-31,-0.456077,-0.197743


For getting values with a boolean array:

In [16]:
df.iloc[:3, :3] > 0

Unnamed: 0,A,B,C
2018-01-31,False,True,False
2018-02-28,True,True,False
2018-03-31,False,False,False


### Indexing with <code>isin</code>

In [18]:
df2 = pd.DataFrame({'vals': [1, 2, 3, 4], 'ids': ['a', 'b', 'f', 'n'],
                    'ids2': ['a', 'n', 'c', 'n']})
print(df2)

   vals ids ids2
0     1   a    a
1     2   b    n
2     3   f    c
3     4   n    n


In [20]:
values = ['a', 'b', 1, 3]

df2.isin(values)

Unnamed: 0,vals,ids,ids2
0,True,True,True
1,False,True,False
2,True,False,False
3,False,False,False
