In [1]:
import pandas as pd
import numpy as np

In [2]:
counts = pd.Series([632, 1638, 569, 115])
counts

0     632
1    1638
2     569
3     115
dtype: int64

In [3]:
counts.values

array([ 632, 1638,  569,  115])

In [4]:
counts.index

RangeIndex(start=0, stop=4, step=1)

In [5]:
bacteria = pd.Series([632, 1638, 569, 115], 
    index=['Firmicutes', 'Proteobacteria', 'Actinobacteria', 'Bacteroidetes'])

bacteria

Firmicutes         632
Proteobacteria    1638
Actinobacteria     569
Bacteroidetes      115
dtype: int64

In [6]:
bacteria['Actinobacteria']

569

In [7]:
bacteria[[name.endswith('bacteria') for name in bacteria.index]]

Proteobacteria    1638
Actinobacteria     569
dtype: int64

In [8]:
[name.endswith('bacteria') for name in bacteria.index]

[False, True, True, False]

In [9]:
np.log(bacteria)

Firmicutes        6.448889
Proteobacteria    7.401231
Actinobacteria    6.343880
Bacteroidetes     4.744932
dtype: float64

In [10]:
type(bacteria>1000)

pandas.core.series.Series

In [11]:
bacteria_dict = {'Proteobacteria': 1638, 'Actinobacteria': 569,'Firmicutes': 632, 
                 'Bacteroidetes': 115}
pd.Series(bacteria_dict)

Proteobacteria    1638
Actinobacteria     569
Firmicutes         632
Bacteroidetes      115
dtype: int64

In [12]:
bacteria2 = pd.Series(bacteria_dict, 
                      index=['Cyanobacteria','Firmicutes',
                             'Proteobacteria','Actinobacteria'])
bacteria2

Cyanobacteria        NaN
Firmicutes         632.0
Proteobacteria    1638.0
Actinobacteria     569.0
dtype: float64

In [13]:
bacteria2.isnull()

Cyanobacteria      True
Firmicutes        False
Proteobacteria    False
Actinobacteria    False
dtype: bool

In [14]:
bacteria + bacteria2

Actinobacteria    1138.0
Bacteroidetes        NaN
Cyanobacteria        NaN
Firmicutes        1264.0
Proteobacteria    3276.0
dtype: float64

In [15]:
data = pd.DataFrame({'value':[632, 1638, 569, 115, 433, 1130, 754, 555],
                     'patient':[1, 1, 1, 1, 2, 2, 2, 2],
                     'phylum':['Firmicutes', 'Proteobacteria', 'Actinobacteria', 
    'Bacteroidetes', 'Firmicutes', 'Proteobacteria', 'Actinobacteria', 'Bacteroidetes']})
data

Unnamed: 0,value,patient,phylum
0,632,1,Firmicutes
1,1638,1,Proteobacteria
2,569,1,Actinobacteria
3,115,1,Bacteroidetes
4,433,2,Firmicutes
5,1130,2,Proteobacteria
6,754,2,Actinobacteria
7,555,2,Bacteroidetes


In [16]:
data.dtypes

value       int64
patient     int64
phylum     object
dtype: object

In [17]:
vals = data.value
vals

0     632
1    1638
2     569
3     115
4     433
5    1130
6     754
7     555
Name: value, dtype: int64

In [18]:
vals[5] = 0
vals

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


0     632
1    1638
2     569
3     115
4     433
5       0
6     754
7     555
Name: value, dtype: int64

In [19]:
data

Unnamed: 0,value,patient,phylum
0,632,1,Firmicutes
1,1638,1,Proteobacteria
2,569,1,Actinobacteria
3,115,1,Bacteroidetes
4,433,2,Firmicutes
5,0,2,Proteobacteria
6,754,2,Actinobacteria
7,555,2,Bacteroidetes


In [20]:
vals = data.value.copy()
vals[5] = 1000
data

Unnamed: 0,value,patient,phylum
0,632,1,Firmicutes
1,1638,1,Proteobacteria
2,569,1,Actinobacteria
3,115,1,Bacteroidetes
4,433,2,Firmicutes
5,0,2,Proteobacteria
6,754,2,Actinobacteria
7,555,2,Bacteroidetes


In [21]:
data.value[[3,4,6]] = [14, 21, 5]
data

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


Unnamed: 0,value,patient,phylum
0,632,1,Firmicutes
1,1638,1,Proteobacteria
2,569,1,Actinobacteria
3,14,1,Bacteroidetes
4,21,2,Firmicutes
5,0,2,Proteobacteria
6,5,2,Actinobacteria
7,555,2,Bacteroidetes


In [22]:
data['year'] = 2013
data

Unnamed: 0,value,patient,phylum,year
0,632,1,Firmicutes,2013
1,1638,1,Proteobacteria,2013
2,569,1,Actinobacteria,2013
3,14,1,Bacteroidetes,2013
4,21,2,Firmicutes,2013
5,0,2,Proteobacteria,2013
6,5,2,Actinobacteria,2013
7,555,2,Bacteroidetes,2013


In [23]:
data.treatment = 1
data

Unnamed: 0,value,patient,phylum,year
0,632,1,Firmicutes,2013
1,1638,1,Proteobacteria,2013
2,569,1,Actinobacteria,2013
3,14,1,Bacteroidetes,2013
4,21,2,Firmicutes,2013
5,0,2,Proteobacteria,2013
6,5,2,Actinobacteria,2013
7,555,2,Bacteroidetes,2013


In [24]:
data.treatment

1

In [25]:
data[data.phylum.str.endswith('bacteria') & (data.value > 100)]

Unnamed: 0,value,patient,phylum,year
1,1638,1,Proteobacteria,2013
2,569,1,Actinobacteria,2013


In [26]:
treatment = pd.Series([0]*4 + [1]*2)
treatment

0    0
1    0
2    0
3    0
4    1
5    1
dtype: int64

In [27]:
data['treatment'] = treatment
data

Unnamed: 0,value,patient,phylum,year,treatment
0,632,1,Firmicutes,2013,0.0
1,1638,1,Proteobacteria,2013,0.0
2,569,1,Actinobacteria,2013,0.0
3,14,1,Bacteroidetes,2013,0.0
4,21,2,Firmicutes,2013,1.0
5,0,2,Proteobacteria,2013,1.0
6,5,2,Actinobacteria,2013,
7,555,2,Bacteroidetes,2013,
