In [1]:
import pandas as pd
counts = pd.Series([632, 1638, 569, 115])
counts

0     632
1    1638
2     569
3     115
dtype: int64

In [2]:
import pandas as pd
bacteria = pd.Series([632, 1638, 569, 115], 
    index=['Firmicutes', 'Proteobacteria', 'Actinobacteria', 'Bacteroidetes'])

bacteria

Firmicutes         632
Proteobacteria    1638
Actinobacteria     569
Bacteroidetes      115
dtype: int64

In [3]:
bacteria.name = 'counts'
bacteria.index.name = 'phylum'
bacteria

phylum
Firmicutes         632
Proteobacteria    1638
Actinobacteria     569
Bacteroidetes      115
Name: counts, dtype: int64

In [4]:
bacteria_dict = {'Firmicutes': 632, 'Proteobacteria': 1500, 'Actinobacteria': 569, 'Bacteroidetes': 115}
print(bacteria_dict)
pd.Series(bacteria_dict)

{'Firmicutes': 632, 'Proteobacteria': 1500, 'Actinobacteria': 569, 'Bacteroidetes': 115}


Firmicutes         632
Proteobacteria    1500
Actinobacteria     569
Bacteroidetes      115
dtype: int64

In [5]:
import pandas as pd
data = {"Province": ["FL", "FL", "NH", "NH", "ZH"],
        "Year": [2013, 2014, 2013, 2014, 2014],
        "Literacy": [0.2, 0.1, 0.5, 0.3, 0.5]}
print(data)
data = pd.DataFrame(data)
data

{'Province': ['FL', 'FL', 'NH', 'NH', 'ZH'], 'Year': [2013, 2014, 2013, 2014, 2014], 'Literacy': [0.2, 0.1, 0.5, 0.3, 0.5]}


Unnamed: 0,Province,Year,Literacy
0,FL,2013,0.2
1,FL,2014,0.1
2,NH,2013,0.5
3,NH,2014,0.3
4,ZH,2014,0.5


In [7]:
df = pd.DataFrame(data, columns=["Province", "Literacy","Year"])
df

Unnamed: 0,Province,Literacy,Year
0,FL,0.2,2013
1,FL,0.1,2014
2,NH,0.5,2013
3,NH,0.3,2014
4,ZH,0.5,2014


In [8]:
df['nonsense'] = df.Year / df.Literacy
df

Unnamed: 0,Province,Literacy,Year,nonsense
0,FL,0.2,2013,10065.0
1,FL,0.1,2014,20140.0
2,NH,0.5,2013,4026.0
3,NH,0.3,2014,6713.333333
4,ZH,0.5,2014,4028.0


In [9]:
df['Serie_aligned'] = pd.Series(range(5), index=[0,1,2, 3, 4])
df

Unnamed: 0,Province,Literacy,Year,nonsense,Serie_aligned
0,FL,0.2,2013,10065.0,0
1,FL,0.1,2014,20140.0,1
2,NH,0.5,2013,4026.0,2
3,NH,0.3,2014,6713.333333,3
4,ZH,0.5,2014,4028.0,4


In [10]:
pd.DataFrame(df.to_dict())

Unnamed: 0,Province,Literacy,Year,nonsense,Serie_aligned
0,FL,0.2,2013,10065.0,0
1,FL,0.1,2014,20140.0,1
2,NH,0.5,2013,4026.0,2
3,NH,0.3,2014,6713.333333,3
4,ZH,0.5,2014,4028.0,4


In [11]:
data = [{'a': i, 'b':10* i}for i in range(6)]
print(data)
pd.DataFrame(data)

[{'a': 0, 'b': 0}, {'a': 1, 'b': 10}, {'a': 2, 'b': 20}, {'a': 3, 'b': 30}, {'a': 4, 'b': 40}, {'a': 5, 'b': 50}]


Unnamed: 0,a,b
0,0,0
1,1,10
2,2,20
3,3,30
4,4,40
5,5,50


In [13]:
import pandas as pd 
pd.DataFrame([{'aa': 1, 'bb': 2}, {'bb': 3, 'cc': 6}])

Unnamed: 0,aa,bb,cc
0,1.0,2,
1,,3,6.0


In [16]:
import pandas as pd
import numpy as np
pd.DataFrame(np.random.randint(2, 12),
             columns=['foo', 'bar'],
             index=['a', 'b', 'c'])

Unnamed: 0,foo,bar
a,6,6
b,6,6
c,6,6


In [17]:
rng = np.random.RandomState(15)
ser = pd.Series(rng.randint(0, 10, 4))
ser

0    8
1    5
2    5
3    7
dtype: int64

In [18]:
dfr = pd.DataFrame(rng.randint(0, 10, (5, 4)),
                  columns=['A', 'B', 'C', 'D'])
dfr

Unnamed: 0,A,B,C,D
0,0,7,5,6
1,1,7,0,4
2,9,7,5,3
3,6,8,2,1
4,1,0,5,2


In [19]:
np.exp(ser)

0    2980.957987
1     148.413159
2     148.413159
3    1096.633158
dtype: float64

In [20]:
np.sin(dfr * np.pi / 4)

Unnamed: 0,A,B,C,D
0,0.0,-0.7071068,-0.707107,-1.0
1,0.707107,-0.7071068,0.0,1.224647e-16
2,0.707107,-0.7071068,-0.707107,0.7071068
3,-1.0,-2.449294e-16,1.0,0.7071068
4,0.707107,0.0,-0.707107,1.0


In [21]:
area = pd.Series({'Alaska': 1723337, 'Texas': 695662,
                  'California': 423967}, name='area')
population = pd.Series({'California': 38332521, 'Texas': 26448193,
                        'New York': 19651127}, name='population')
print(area)
population


Alaska        1723337
Texas          695662
California     423967
Name: area, dtype: int64


California    38332521
Texas         26448193
New York      19651127
Name: population, dtype: int64

In [22]:
area.index | population.index

Index(['Alaska', 'California', 'New York', 'Texas'], dtype='object')

In [23]:
A = pd.Series([2, 4, 6], index=[0, 1, 2])
B = pd.Series([1, 3, 5], index=[1, 2, 3])
print(A)
print(B)
B
A + B

0    2
1    4
2    6
dtype: int64
1    1
2    3
3    5
dtype: int64


0    NaN
1    5.0
2    9.0
3    NaN
dtype: float64

In [24]:
A.add(B, fill_value=0)

0    2.0
1    5.0
2    9.0
3    5.0
dtype: float64

In [25]:
df

Unnamed: 0,Province,Literacy,Year,nonsense,Serie_aligned
0,FL,0.2,2013,10065.0,0
1,FL,0.1,2014,20140.0,1
2,NH,0.5,2013,4026.0,2
3,NH,0.3,2014,6713.333333,3
4,ZH,0.5,2014,4028.0,4


In [26]:
df2 = pd.DataFrame({"Province": ["FL", "NH", "ZH"], "Population": ["100000", "200000", "300000"]})
df2

Unnamed: 0,Province,Population
0,FL,100000
1,NH,200000
2,ZH,300000


In [27]:
df.merge(df2)

Unnamed: 0,Province,Literacy,Year,nonsense,Serie_aligned,Population
0,FL,0.2,2013,10065.0,0,100000
1,FL,0.1,2014,20140.0,1,100000
2,NH,0.5,2013,4026.0,2,200000
3,NH,0.3,2014,6713.333333,3,200000
4,ZH,0.5,2014,4028.0,4,300000


In [28]:
df3 = pd.DataFrame({"province": ["FL", "NH"], "Population": ["100000", "200000"]})
df3
df.merge(df3, right_on='province', left_on='Province')

Unnamed: 0,Province,Literacy,Year,nonsense,Serie_aligned,province,Population
0,FL,0.2,2013,10065.0,0,FL,100000
1,FL,0.1,2014,20140.0,1,FL,100000
2,NH,0.5,2013,4026.0,2,NH,200000
3,NH,0.3,2014,6713.333333,3,NH,200000


In [29]:
df4 = pd.DataFrame({"Province": ["FL", "NH", "UT"], "Population": ["100000", "200000", "50000"]})
df.merge(df4, how='outer')

Unnamed: 0,Province,Literacy,Year,nonsense,Serie_aligned,Population
0,FL,0.2,2013.0,10065.0,0.0,100000.0
1,FL,0.1,2014.0,20140.0,1.0,100000.0
2,NH,0.5,2013.0,4026.0,2.0,200000.0
3,NH,0.3,2014.0,6713.333333,3.0,200000.0
4,ZH,0.5,2014.0,4028.0,4.0,
5,UT,,,,,50000.0


In [30]:
df5 = pd.DataFrame({"Province": ["FL", "NH", "FL"], "Population": ["100000", "200000", "50000"]})
print(df)
df.merge(df5, how='outer')

  Province  Literacy  Year      nonsense  Serie_aligned
0       FL       0.2  2013  10065.000000              0
1       FL       0.1  2014  20140.000000              1
2       NH       0.5  2013   4026.000000              2
3       NH       0.3  2014   6713.333333              3
4       ZH       0.5  2014   4028.000000              4


Unnamed: 0,Province,Literacy,Year,nonsense,Serie_aligned,Population
0,FL,0.2,2013,10065.0,0,100000.0
1,FL,0.2,2013,10065.0,0,50000.0
2,FL,0.1,2014,20140.0,1,100000.0
3,FL,0.1,2014,20140.0,1,50000.0
4,NH,0.5,2013,4026.0,2,200000.0
5,NH,0.3,2014,6713.333333,3,200000.0
6,ZH,0.5,2014,4028.0,4,


In [34]:
serie_a = pd.Series([np.nan, 2.5, np.nan, 3.5, 4.5, np.nan],
                     index=['f', 'e', 'd', 'c', 'b', 'a'])
serie_a

f    NaN
e    2.5
d    NaN
c    3.5
b    4.5
a    NaN
dtype: float64

In [35]:
serie_b = pd.Series(np.arange(len(serie_a), dtype=np.float64),
                 index=['f', 'e', 'd', 'c', 'b', 'a'])
serie_b


f    0.0
e    1.0
d    2.0
c    3.0
b    4.0
a    5.0
dtype: float64

In [36]:
pd.Series(np.where(pd.isnull(serie_a), serie_b, serie_a), index=serie_a.index)

f    0.0
e    2.5
d    2.0
c    3.5
b    4.5
a    5.0
dtype: float64

In [37]:
serie_a.combine_first(serie_b)

f    0.0
e    2.5
d    2.0
c    3.5
b    4.5
a    5.0
dtype: float64