In [2]:
import numpy as np
import pandas as pd

In [3]:
pd.set_option('display.notebook_repr_html', False)
pd.set_option('display.max_columns', 10)
pd.set_option('display.max_rows', 10)

In [4]:
s1 = pd.Series(2)
s1

0    2
dtype: int64

In [5]:
s1[0]

2

In [6]:
s2 = pd.Series([1,2,3,4,5])
s2

0    1
1    2
2    3
3    4
4    5
dtype: int64

In [7]:
s2.values

array([1, 2, 3, 4, 5])

In [8]:
s2.index

RangeIndex(start=0, stop=5, step=1)

In [9]:
s3 = pd.Series([1,2,3], index=['a', 'b', 'c'])
s3

a    1
b    2
c    3
dtype: int64

In [10]:
s3.index

Index(['a', 'b', 'c'], dtype='object')

In [11]:
s3['c']

3

In [13]:
s4 = pd.Series(2, index=s2.index)
s4

0    2
1    2
2    2
3    2
4    2
dtype: int64

In [14]:
np.random.seed(12356)
pd.Series(np.random.rand(5))

0    0.398245
1    0.929489
2    0.557382
3    0.423091
4    0.104493
dtype: float64

In [15]:
pd.Series(np.linspace(0, 9, 10))

0    0.0
1    1.0
2    2.0
3    3.0
4    4.0
5    5.0
6    6.0
7    7.0
8    8.0
9    9.0
dtype: float64

In [16]:
pd.Series(np.arange(0,9))

0    0
1    1
2    2
3    3
4    4
5    5
6    6
7    7
8    8
dtype: int64

In [17]:
s6 = pd.Series({'a': 1, 'b': 2, 'c': 3})
s6

a    1
b    2
c    3
dtype: int64

In [18]:
s = pd.Series([0, 1, 1, 2, 3, 4, 5, 6, 7, np.nan])
s

0    0.0
1    1.0
2    1.0
3    2.0
4    3.0
5    4.0
6    5.0
7    6.0
8    7.0
9    NaN
dtype: float64

In [19]:
len(s)

10

In [20]:
s.size

10

In [21]:
s.shape

(10,)

In [22]:
s.count()

9

In [23]:
s.unique()

array([ 0.,  1.,  2.,  3.,  4.,  5.,  6.,  7., nan])

In [24]:
s.value_counts()

1.0    2
7.0    1
6.0    1
5.0    1
4.0    1
3.0    1
2.0    1
0.0    1
dtype: int64

In [25]:
s.head()

0    0.0
1    1.0
2    1.0
3    2.0
4    3.0
dtype: float64

In [26]:
s.head(n=2)

0    0.0
1    1.0
dtype: float64

In [27]:
s.tail()

5    4.0
6    5.0
7    6.0
8    7.0
9    NaN
dtype: float64

In [28]:
s.tail(3)

7    6.0
8    7.0
9    NaN
dtype: float64

In [29]:
s.take([0, 3, 9])

0    0.0
3    2.0
9    NaN
dtype: float64

In [30]:
s3['a']

1

In [31]:
s3[1]

2

In [35]:
s3.take([0, 2])

a    1
c    3
dtype: int64

In [36]:
s3[['a', 'c']]

a    1
c    3
dtype: int64

In [38]:
s5 = pd.Series([1,2,3], index=[10,11,12])
s5

10    1
11    2
12    3
dtype: int64

In [39]:
s5[11]

2

In [40]:
s5.loc[12]

3

In [42]:
s5.iloc[2]

3

In [44]:
s5.loc[[12, 10]]

12    3
10    1
dtype: int64

In [45]:
s5.iloc[[2, 0]]

12    3
10    1
dtype: int64

In [46]:
s5.loc[[12, -1, 15]]

Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the documentation here:
https://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike
  """Entry point for launching an IPython kernel.


 12    3.0
-1     NaN
 15    NaN
dtype: float64

In [47]:
s3.ix[['a', 'c']]

.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated
  """Entry point for launching an IPython kernel.


a    1
c    3
dtype: int64

In [49]:
s6 = pd.Series([4,3,2,1], index=['a', 'b', 'c', 'd'])
s6

a    4
b    3
c    2
d    1
dtype: int64

In [52]:
s7 = pd.Series([4,3,2,1], index=['d', 'c', 'b', 'a'])
s7

d    4
c    3
b    2
a    1
dtype: int64

In [53]:
s6 + s7

d    8
c    6
b    4
a    2
dtype: int64

In [54]:
a1 = np.array([1,2,3,4])
a2 = np.array([4,3,2,1])
a1 + a2

array([5, 5, 5, 5])

In [55]:
s3 * 2

a    2
b    4
c    6
dtype: int64

In [56]:
t = pd.Series(2, s3.index)
t

a    2
b    2
c    2
dtype: int64

In [57]:
s3 * t

a    2
b    4
c    6
dtype: int64

In [58]:
s8 = pd.Series({'a': 1, 'b': 2, 'c': 3, 'd': 5})
s8

a    1
b    2
c    3
d    5
dtype: int64

In [60]:
s9 = pd.Series({'b': 6, 'c': 7, 'd': 9, 'e': 10})
s9

b     6
c     7
d     9
e    10
dtype: int64

In [61]:
s8 + s9

a     NaN
b     8.0
c    10.0
d    14.0
e     NaN
dtype: float64

In [62]:
s10 = pd.Series([1.0, 2.0, 3.0], index=['a', 'a', 'b'])
s10

a    1.0
a    2.0
b    3.0
dtype: float64

In [63]:
s11 = pd.Series([4.0, 5.0, 6.0], index=['a', 'a', 'c'])
s11

a    4.0
a    5.0
c    6.0
dtype: float64

In [64]:
s10 + s11

a    5.0
a    6.0
a    6.0
a    7.0
b    NaN
c    NaN
dtype: float64

In [65]:
nda = np.array([1,2,3,4,5])
nda.mean()

3.0

In [66]:
nda = np.array([1,2,3,4,np.NaN])
nda.mean()

nan

In [67]:
s = pd.Series(nda)
s.mean()

2.5

In [68]:
s.mean(skipna=False)

nan

In [69]:
s = pd.Series(np.arange(0, 10))
s > 5

0    False
1    False
2    False
3    False
4    False
5    False
6     True
7     True
8     True
9     True
dtype: bool

In [70]:
logicalResults = s > 5
s[logicalResults]

6    6
7    7
8    8
9    9
dtype: int64

In [71]:
s[s > 5]

6    6
7    7
8    8
9    9
dtype: int64

In [74]:
s[(s > 5) & (s < 7)]

6    6
dtype: int64

In [75]:
(s > 0).all()

False

In [76]:
(s > 10).any()

False

In [79]:
((s > 2) & (s <6)).sum()

3

In [82]:
s[(s > 2) & (s <6)].sum()

12

In [84]:
s = pd.Series(np.random.randn(5))
s

0    0.419997
1    0.718337
2    0.229271
3   -0.628105
4   -0.080847
dtype: float64

In [88]:
s.index = ['a', 'b', 'c', 'd', 'e']

In [89]:
s1 = pd.Series(np.random.randn(3))
s2 = pd.Series(np.random.randn(3))
combined = pd.concat([s1, s2])
combined

0    0.649459
1    0.714291
2   -0.993328
0   -0.586361
1   -0.350350
2   -0.704669
dtype: float64

In [90]:
combined.index = np.arange(0, len(combined))
combined

0    0.649459
1    0.714291
2   -0.993328
3   -0.586361
4   -0.350350
5   -0.704669
dtype: float64

In [91]:
np.random.seed(654321)
s1 = pd.Series(np.random.randn(4), ['a', 'b', 'c', 'd'])
s2 = s1.reindex(['a', 'c', 'g'])
s2

a    0.767619
c    0.151836
g         NaN
dtype: float64

In [92]:
s1[['a', 'c', 'g']]

Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the documentation here:
https://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike
  return self.loc[key]


a    0.767619
c    0.151836
g         NaN
dtype: float64

In [93]:
s2['a'] = 0
s2

a    0.000000
c    0.151836
g         NaN
dtype: float64

In [94]:
s1

a    0.767619
b   -0.884729
c    0.151836
d    0.161152
dtype: float64

In [96]:
s1 = pd.Series([0,1,2], index=[0,1,2])
s2 = pd.Series([3,4,5], index=['0','1','2'])
s1 + s2

0   NaN
1   NaN
2   NaN
0   NaN
1   NaN
2   NaN
dtype: float64

In [97]:
s2.index = s2.index.values.astype(int)
s1 + s2

0    3
1    5
2    7
dtype: int64

In [98]:
s2 = s.copy()
s2.reindex(['a', 'f'], fill_value=0)

a    0.419997
f    0.000000
dtype: float64