In [1]:
import pandas as pd
import numpy as np

s = pd.Series(np.random.randn(5), index=['a','b','c','d','e'])
print(s)
print(s.index)
print(pd.Series(np.random.rand(5)))

a   -0.025397
b    1.358461
c   -0.661626
d    2.221528
e   -0.293933
dtype: float64
Index(['a', 'b', 'c', 'd', 'e'], dtype='object')
0    0.606684
1    0.259021
2    0.989537
3    0.710528
4    0.264271
dtype: float64


In [2]:
# instantiating Series from dicts:
d = {"b":1, "a":0, "c":2}
pd.Series(d)

b    1
a    0
c    2
dtype: int64

In [3]:
d = {"a":0.0, "b":1.0, "c":2.0}
print(pd.Series(d))
print(pd.Series(d, index=['b', 'c','d','a']))

a    0.0
b    1.0
c    2.0
dtype: float64
b    1.0
c    2.0
d    NaN
a    0.0
dtype: float64


In [4]:
pd.Series(5.0, index=["a","b","c","d","e"])

a    5.0
b    5.0
c    5.0
d    5.0
e    5.0
dtype: float64

In [5]:
print(s[0])
print(s[:3])
print(s[-3:])

-0.02539743575767313
a   -0.025397
b    1.358461
c   -0.661626
dtype: float64
c   -0.661626
d    2.221528
e   -0.293933
dtype: float64


In [6]:
s[s>s.median()]

b    1.358461
d    2.221528
dtype: float64

In [7]:
print(s)
print(s[[4,3,1]])

a   -0.025397
b    1.358461
c   -0.661626
d    2.221528
e   -0.293933
dtype: float64
e   -0.293933
d    2.221528
b    1.358461
dtype: float64


In [8]:
print(np.exp(s))

a    0.974922
b    3.890200
c    0.516011
d    9.221410
e    0.745327
dtype: float64


In [9]:
s.dtype

dtype('float64')

In [10]:
s.array

<PandasArray>
[-0.02539743575767313,    1.358460644232365,  -0.6616263991544843,
   2.2215279115442996,  -0.2939328339768827]
Length: 5, dtype: float64

In [11]:
s.to_numpy()

array([-0.02539744,  1.35846064, -0.6616264 ,  2.22152791, -0.29393283])

**Series is dict-like**

In [12]:
s["a"]

-0.02539743575767313

In [13]:
s["e"]

-0.2939328339768827

In [14]:
s

a   -0.025397
b    1.358461
c   -0.661626
d    2.221528
e   -0.293933
dtype: float64

In [15]:
"e" in s

True

In [16]:
'f' in s

False

In [17]:
print(s.get("f"))

None


In [18]:
print(s.get("f", np.nan))

nan


**Vectorized operations and label alignment with series**

In [19]:
s+s

a   -0.050795
b    2.716921
c   -1.323253
d    4.443056
e   -0.587866
dtype: float64

In [20]:
s*2

a   -0.050795
b    2.716921
c   -1.323253
d    4.443056
e   -0.587866
dtype: float64

In [21]:
np.exp

<ufunc 'exp'>

In [22]:
print(s[1:],"\n",s[:-1])
s[1:]+s[:-1]

b    1.358461
c   -0.661626
d    2.221528
e   -0.293933
dtype: float64 
 a   -0.025397
b    1.358461
c   -0.661626
d    2.221528
dtype: float64


a         NaN
b    2.716921
c   -1.323253
d    4.443056
e         NaN
dtype: float64

In [23]:
s = pd.Series(np.random.rand(5), name='something')

In [24]:
s

0    0.482830
1    0.486785
2    0.433154
3    0.773274
4    0.306520
Name: something, dtype: float64

In [25]:
s2 = s.rename("different")
s2.name

# NOTE: s and s2 refer to different objects

'different'

In [26]:
i = 0
while i<len(s2):
    e1,e2 = s[i], s2[i]
    if e1 == e2:
        print(e1,e2)
    i+=1
    


0.4828295330443916 0.4828295330443916
0.48678501614272296 0.48678501614272296
0.4331541331481116 0.4331541331481116
0.7732735823240349 0.7732735823240349
0.30652023555641017 0.30652023555641017


**DataFrame**

In [27]:
d = {
    "one":pd.Series([1.0,2.0,3.0], index=["a", "b", "c"]),
    "two":pd.Series([1.0,2.0,3.0,4.0], index=["a", "b", "c", "d"])
}

In [28]:
d

{'one': a    1.0
 b    2.0
 c    3.0
 dtype: float64,
 'two': a    1.0
 b    2.0
 c    3.0
 d    4.0
 dtype: float64}

In [29]:
df = pd.DataFrame(d)

In [30]:
pd.Series([1.0,2.0,3.0], index=["a", "b", "c"])

a    1.0
b    2.0
c    3.0
dtype: float64

In [31]:
df

Unnamed: 0,one,two
a,1.0,1.0
b,2.0,2.0
c,3.0,3.0
d,,4.0


In [32]:
pd.DataFrame(d, index=["d", "b", "a"])

Unnamed: 0,one,two
d,,4.0
b,2.0,2.0
a,1.0,1.0


In [33]:
# the row and columns lables can be accessed respectively by accessing 
# the index and columns attributes:
pd.DataFrame(d, index=["d", "b", "a"], columns=["two", "three"])

Unnamed: 0,two,three
d,4.0,
b,2.0,
a,1.0,


In [34]:
# When a particular set of columns is passed along with  dict of data, 
# the passed columns override the keys in the dict.

In [35]:
df.index

Index(['a', 'b', 'c', 'd'], dtype='object')

In [36]:
df.columns

Index(['one', 'two'], dtype='object')