In [1]:
import pandas as pd
import numpy as np

In [2]:
stock_data = pd.read_csv("pair_ETF_0302.csv")

In [3]:
stock_data.head()

Unnamed: 0,pairs,ave_return,total_return,volatility,sharp_ratio
0,"('NYT', 'BANC')",-1683.602874,-15.167593,139.6606,-0.108603
1,"('EEA', 'ESS')",7.915179,0.121772,123.131991,0.000989
2,"('BXP', 'LFC')",1194.149695,11.593686,88.070115,0.131642
3,"('PCF', 'ANIK')",2.441353,0.021605,13.484533,0.001602
4,"('NDSN', 'POL')",-293.861701,-3.719768,36.593718,-0.10165


# Column Names

Column Name is one of the index that you finds in a dataframe.

In [8]:
stock_data.columns

Index([u'pairs', u'ave_return', u'total_return', u'volatility',
       u'sharp_ratio'],
      dtype='object')

In [25]:
stock_data.columns[0]

'pairs'

# Dataframe Index

In [39]:
stock_data.index #This line gives you a iterater

RangeIndex(start=0, stop=58592, step=1)

In [20]:
list(stock_data.index)[:5]

[0, 1, 2, 3, 4]

You can set a column as index

In [33]:
stock_data.set_index("pairs").head(2)

Unnamed: 0_level_0,ave_return,total_return,volatility,sharp_ratio
pairs,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
"('NYT', 'BANC')",-1683.602874,-15.167593,139.6606,-0.108603
"('EEA', 'ESS')",7.915179,0.121772,123.131991,0.000989


# Groupby & Aggregation 

.groupby and .aggregation are good tools for data organization. Let's see some examples.

In [43]:
stock_data.groupby("ave_return") #Pay attention to this. This method return a groupby object. It requires some extra work.

<pandas.core.groupby.groupby.DataFrameGroupBy object at 0x000000000C6209B0>

In [44]:
stock_group = stock_data.groupby("ave_return")

In [47]:
list(stock_group)[0]  #gourpby object is also an iterater

(-inf,
                   pairs  ave_return  total_return  volatility  sharp_ratio
 671      ('AOI', 'JNP')        -inf          -inf         NaN          NaN
 15754    ('HRG', 'BLH')        -inf          -inf         NaN          NaN
 18802  ('HRZN', 'ISTR')        -inf          -inf         NaN          NaN
 20746    ('PAH', 'BAC')        -inf          -inf         NaN          NaN
 27999   ('NWL', 'BCOR')        -inf          -inf         NaN          NaN
 35942   ('TPC', 'BPFH')        -inf          -inf         NaN          NaN
 40324     ('BAK', 'KW')        -inf          -inf         NaN          NaN
 42970    ('CTS', 'CCA')        -inf          -inf         NaN          NaN
 45654  ('DATA', 'WSTG')        -inf          -inf         NaN          NaN
 47904  ('DVCR', 'PCYO')        -inf          -inf         NaN          NaN
 48735   ('WSTG', 'FSB')        -inf          -inf         NaN          NaN)

Aggregation uses in groupby object.

In [53]:
stock_group.aggregate(sum).head(2)

Unnamed: 0_level_0,total_return,volatility,sharp_ratio
ave_return,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
-inf,-inf,0.0,0.0
-1958925000.0,-15797780.0,174118600.0,-0.09073


In [69]:
stock_group.aggregate(max).head(2)

Unnamed: 0_level_0,pairs,total_return,volatility,sharp_ratio
ave_return,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
-inf,"('WSTG', 'FSB')",-inf,,
-1958925000.0,"('KS', 'CTX')",-15797780.0,174118600.0,-0.09073


In [70]:
stock_group.aggregate(min).head(2)

Unnamed: 0_level_0,pairs,total_return,volatility,sharp_ratio
ave_return,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
-inf,"('AOI', 'JNP')",-inf,,
-1958925000.0,"('KS', 'CTX')",-15797780.0,174118600.0,-0.09073


You can feed a numpy function, as long as the function take in a list and return an aggregated value

In [54]:
stock_group.aggregate(np.mean).head(2)

Unnamed: 0_level_0,total_return,volatility,sharp_ratio
ave_return,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
-inf,-inf,,
-1958925000.0,-15797780.0,174118600.0,-0.09073


If there's no exiting function that meet your need, you can feed in your own function. 

In [72]:
def my_mean(x):
    mean = 0.0
    for i in x:
        mean = mean + i
    mean = mean/(len(x))
    return mean

In [73]:
stock_group.aggregate(my_mean).head(2) #return same results as the np.mean

Unnamed: 0_level_0,total_return,volatility,sharp_ratio
ave_return,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
-inf,-inf,,
-1958925000.0,-15797780.0,174118600.0,-0.09073
