In [1]:
# source:https://github.com/fonnesbeck/statistical-analysis-python-tutorial/blob/master/1.%20Introduction%20to%20Pandas.ipynb

In [2]:
from IPython.core.display import HTML
HTML("<iframe src=http://pandas.pydata.org width=800 height=350></iframe>")



In [3]:
%matplotlib inline
import pandas as pd
import numpy as np



In [4]:
counts = pd.Series([632,1638,569,115])
print(counts)

0     632
1    1638
2     569
3     115
dtype: int64


In [5]:
counts.values

array([ 632, 1638,  569,  115], dtype=int64)

In [6]:
counts.index

RangeIndex(start=0, stop=4, step=1)

In [7]:
bacteria = pd.Series([632,1638,569,115],
                    index=['Firmicutes', 'Proteobacteria','Actinobacteria','Bacteroidetes'])
bacteria

Firmicutes         632
Proteobacteria    1638
Actinobacteria     569
Bacteroidetes      115
dtype: int64

In [8]:
bacteria['Actinobacteria']

569

In [9]:
bacteria[[name.endswith('bacteria') for name in bacteria.index]]

Proteobacteria    1638
Actinobacteria     569
dtype: int64

In [10]:
[name.endswith('bacteria') for name in bacteria.index]

[False, True, True, False]

In [11]:
bacteria[0:2]

Firmicutes         632
Proteobacteria    1638
dtype: int64

In [12]:
bacteria.name = 'counts'
bacteria.index.name='phylum'
bacteria

phylum
Firmicutes         632
Proteobacteria    1638
Actinobacteria     569
Bacteroidetes      115
Name: counts, dtype: int64

In [13]:
np.log(bacteria)

phylum
Firmicutes        6.448889
Proteobacteria    7.401231
Actinobacteria    6.343880
Bacteroidetes     4.744932
Name: counts, dtype: float64

In [14]:
bacteria[bacteria>1000]

phylum
Proteobacteria    1638
Name: counts, dtype: int64

In [15]:
bacteria_dict = {'Firmicutes':632, 'Proteobacteria': 1638, 'Actinobacteria':569,'Bacteroidetes':115}
pd.Series(bacteria_dict)

Firmicutes         632
Proteobacteria    1638
Actinobacteria     569
Bacteroidetes      115
dtype: int64

In [16]:
bacteria2 = pd.Series(bacteria_dict, index=['Cyanobacteria','Firmicutes','Proteobacteria','Actinobacteria'])
bacteria2

Cyanobacteria        NaN
Firmicutes         632.0
Proteobacteria    1638.0
Actinobacteria     569.0
dtype: float64

In [17]:
bacteria2.isnull()

Cyanobacteria      True
Firmicutes        False
Proteobacteria    False
Actinobacteria    False
dtype: bool

In [18]:
bacteria+bacteria2

Actinobacteria    1138.0
Bacteroidetes        NaN
Cyanobacteria        NaN
Firmicutes        1264.0
Proteobacteria    3276.0
dtype: float64

In [19]:
data = pd.DataFrame({'value':[632,1638,569,115,433,1130,754,555],
                     'patient':[1,1,1,1,2,2,2,2],
                     'phylum':['Firmicutes','Proteobacteria','Actinobacteria',
                               'Bacteroidetes','Firmicutes','Proteobacteria','Actinobacteria','Bacteroidetes']})
data

Unnamed: 0,value,patient,phylum
0,632,1,Firmicutes
1,1638,1,Proteobacteria
2,569,1,Actinobacteria
3,115,1,Bacteroidetes
4,433,2,Firmicutes
5,1130,2,Proteobacteria
6,754,2,Actinobacteria
7,555,2,Bacteroidetes


In [20]:
data[['phylum','value','patient']]

Unnamed: 0,phylum,value,patient
0,Firmicutes,632,1
1,Proteobacteria,1638,1
2,Actinobacteria,569,1
3,Bacteroidetes,115,1
4,Firmicutes,433,2
5,Proteobacteria,1130,2
6,Actinobacteria,754,2
7,Bacteroidetes,555,2


In [21]:
data.columns

Index(['value', 'patient', 'phylum'], dtype='object')

In [22]:
data['value']

0     632
1    1638
2     569
3     115
4     433
5    1130
6     754
7     555
Name: value, dtype: int64

In [23]:
data.value

0     632
1    1638
2     569
3     115
4     433
5    1130
6     754
7     555
Name: value, dtype: int64

In [24]:
type(data[['value']])

pandas.core.frame.DataFrame

In [25]:
data.loc[3]

value                115
patient                1
phylum     Bacteroidetes
Name: 3, dtype: object

In [26]:
data = pd.DataFrame({0:{'patient':1,'phylum':'Firmicutes', 'value':632},
                     1:{'patient':1,'phylum':'Proteobacteria', 'value':1638},
                     2:{'patient':1,'phylum':'Actinobacteria', 'value':569},
                     3:{'patient':1,'phylum':'Bacteroidetes', 'value':115},
                     4:{'patient':2,'phylum':'Firmicutes', 'value':433},
                     5:{'patient':2,'phylum':'Proteobacteria', 'value':1130},
                     6:{'patient':2,'phylum':'Actinobacteria', 'value':754},
                     7:{'patient':2,'phylum':'Bacteroidetes', 'value':555}})

In [30]:
data

Unnamed: 0,0,1,2,3,4,5,6,7
patient,1,1,1,1,2,2,2,2
phylum,Firmicutes,Proteobacteria,Actinobacteria,Bacteroidetes,Firmicutes,Proteobacteria,Actinobacteria,Bacteroidetes
value,632,1638,569,115,433,1130,754,555


In [31]:
data=data.T
data

Unnamed: 0,patient,phylum,value
0,1,Firmicutes,632
1,1,Proteobacteria,1638
2,1,Actinobacteria,569
3,1,Bacteroidetes,115
4,2,Firmicutes,433
5,2,Proteobacteria,1130
6,2,Actinobacteria,754
7,2,Bacteroidetes,555


In [32]:
vals = data.value
vals

0     632
1    1638
2     569
3     115
4     433
5    1130
6     754
7     555
Name: value, dtype: object

In [33]:
vals[5]=0
vals

0     632
1    1638
2     569
3     115
4     433
5       0
6     754
7     555
Name: value, dtype: object

In [34]:
data

Unnamed: 0,patient,phylum,value
0,1,Firmicutes,632
1,1,Proteobacteria,1638
2,1,Actinobacteria,569
3,1,Bacteroidetes,115
4,2,Firmicutes,433
5,2,Proteobacteria,0
6,2,Actinobacteria,754
7,2,Bacteroidetes,555


In [35]:
vals = data.value.copy()
vals[5] = 1000
data

Unnamed: 0,patient,phylum,value
0,1,Firmicutes,632
1,1,Proteobacteria,1638
2,1,Actinobacteria,569
3,1,Bacteroidetes,115
4,2,Firmicutes,433
5,2,Proteobacteria,0
6,2,Actinobacteria,754
7,2,Bacteroidetes,555


In [36]:
data.value[3] = 14
data

Unnamed: 0,patient,phylum,value
0,1,Firmicutes,632
1,1,Proteobacteria,1638
2,1,Actinobacteria,569
3,1,Bacteroidetes,14
4,2,Firmicutes,433
5,2,Proteobacteria,0
6,2,Actinobacteria,754
7,2,Bacteroidetes,555


In [37]:
data['year'] = 2013
data

Unnamed: 0,patient,phylum,value,year
0,1,Firmicutes,632,2013
1,1,Proteobacteria,1638,2013
2,1,Actinobacteria,569,2013
3,1,Bacteroidetes,14,2013
4,2,Firmicutes,433,2013
5,2,Proteobacteria,0,2013
6,2,Actinobacteria,754,2013
7,2,Bacteroidetes,555,2013


In [38]:
data.treatment = 1
data

Unnamed: 0,patient,phylum,value,year
0,1,Firmicutes,632,2013
1,1,Proteobacteria,1638,2013
2,1,Actinobacteria,569,2013
3,1,Bacteroidetes,14,2013
4,2,Firmicutes,433,2013
5,2,Proteobacteria,0,2013
6,2,Actinobacteria,754,2013
7,2,Bacteroidetes,555,2013


In [39]:
data.treatment

1

In [40]:
treatment = pd.Series([0]*4+[1]*2)
treatment

0    0
1    0
2    0
3    0
4    1
5    1
dtype: int64

In [41]:
data['tereatment'] = treatment
data

Unnamed: 0,patient,phylum,value,year,tereatment
0,1,Firmicutes,632,2013,0.0
1,1,Proteobacteria,1638,2013,0.0
2,1,Actinobacteria,569,2013,0.0
3,1,Bacteroidetes,14,2013,0.0
4,2,Firmicutes,433,2013,1.0
5,2,Proteobacteria,0,2013,1.0
6,2,Actinobacteria,754,2013,
7,2,Bacteroidetes,555,2013,


In [45]:
#month = ['Jan','Feb','Mar','Apr']
#data['month'] = month

data['month']=['Jan']*len(data)
data

Unnamed: 0,patient,phylum,value,year,tereatment,month
0,1,Firmicutes,632,2013,0.0,Jan
1,1,Proteobacteria,1638,2013,0.0,Jan
2,1,Actinobacteria,569,2013,0.0,Jan
3,1,Bacteroidetes,14,2013,0.0,Jan
4,2,Firmicutes,433,2013,1.0,Jan
5,2,Proteobacteria,0,2013,1.0,Jan
6,2,Actinobacteria,754,2013,,Jan
7,2,Bacteroidetes,555,2013,,Jan
