In [1]:
import pandas as pd
import numpy as np
rg = np.random.default_rng(21)

# Series

In [2]:
s1 = pd.Series(np.random.randint(1, high=10, size=5), index=['a', 'b', 'c', 'd', 'e'])
s1

a    2
b    8
c    5
d    2
e    1
dtype: int64

In [3]:
s2 = pd.Series(np.random.randint(1, high=10, size=5))
s2

0    2
1    5
2    4
3    8
4    2
dtype: int64

In [4]:
d = {'p': 100, 'q':101, 'r':102, 's':103, 't':104}
s3 = pd.Series(d)
s3

p    100
q    101
r    102
s    103
t    104
dtype: int64

In [5]:
s1.index, s2.index, s3.index

(Index(['a', 'b', 'c', 'd', 'e'], dtype='object'),
 RangeIndex(start=0, stop=5, step=1),
 Index(['p', 'q', 'r', 's', 't'], dtype='object'))

In [6]:
s4 = pd.Series(d, index=['p', 'q', 'u'])
s4

p    100.0
q    101.0
u      NaN
dtype: float64

In [7]:
s5 = pd.Series(5.0, index=['a', 'b', 'c'])
s5

a    5.0
b    5.0
c    5.0
dtype: float64

## Series is ndarray like

In [8]:
s1[0]

2

In [9]:
s1[:3]

a    2
b    8
c    5
dtype: int64

In [10]:
s1[0:5:2]

a    2
c    5
e    1
dtype: int64

In [11]:
s1[s1 >= s1.median()]

a    2
b    8
c    5
d    2
dtype: int64

In [12]:
np.exp(s1)

a       7.389056
b    2980.957987
c     148.413159
d       7.389056
e       2.718282
dtype: float64

In [13]:
s1[[0, 1, 3]]

a    2
b    8
d    2
dtype: int64

In [14]:
s1.array

<PandasArray>
[2, 8, 5, 2, 1]
Length: 5, dtype: int64

In [15]:
s1.to_numpy()

array([2, 8, 5, 2, 1])

## Series is dict-like

In [16]:
s1['a']

2

In [17]:
s1[['a', 'b', 'c']]

a    2
b    8
c    5
dtype: int64

In [18]:
'e' in s1

True

In [19]:
'f' in s1

False

In [37]:
s1['f']

KeyError: 'f'

In [20]:
s1.get('f', np.nan)

nan

## Vectorized operations and label alignment

In [21]:
s1 ** 2

a     4
b    64
c    25
d     4
e     1
dtype: int64

In [22]:
s1 + s1

a     4
b    16
c    10
d     4
e     2
dtype: int64

In [23]:
s1 + s2

a   NaN
b   NaN
c   NaN
d   NaN
e   NaN
0   NaN
1   NaN
2   NaN
3   NaN
4   NaN
dtype: float64

In [24]:
np.sum(s1)

18

In [25]:
s1[1:] + s1[:-1]

a     NaN
b    16.0
c    10.0
d     4.0
e     NaN
dtype: float64

## Name attribute

In [26]:
s1.name

In [27]:
s1.name = 'My first series'

In [28]:
s1.name

'My first series'

In [29]:
s1.rename('My renamed first series')

a    2
b    8
c    5
d    2
e    1
Name: My renamed first series, dtype: int64

In [30]:
s1

a    2
b    8
c    5
d    2
e    1
Name: My first series, dtype: int64

# Dataframe

In [2]:
d = {'one': pd.Series([1., 2., 3., 5.], index=['a', 'b', 'c', 'e']),
     'two': pd.Series([10., 20., 30., 40.], index=['a', 'b', 'c', 'd'])}

In [3]:
df = pd.DataFrame(d)
d

{'one': a    1.0
 b    2.0
 c    3.0
 e    5.0
 dtype: float64,
 'two': a    10.0
 b    20.0
 c    30.0
 d    40.0
 dtype: float64}

In [4]:
df

Unnamed: 0,one,two
a,1.0,10.0
b,2.0,20.0
c,3.0,30.0
d,,40.0
e,5.0,


In [5]:
dd = {'one': [1, 2, 3, 4], 'two': [4, 3, 2, 1]}
pd.DataFrame(dd)

Unnamed: 0,one,two
0,1,4
1,2,3
2,3,2
3,4,1


In [7]:
pd.DataFrame(dd, index=['a', 'b', 'c', 'd'])

Unnamed: 0,one,two
a,1,4
b,2,3
c,3,2
d,4,1


In [8]:
ddd = [{'a': 1, 'b': 2}, {'a': 10, 'b': 20, 'c': 30}]
pd.DataFrame(ddd)

Unnamed: 0,a,b,c
0,1,2,
1,10,20,30.0


In [10]:
pd.DataFrame(ddd, index=['one', 'two'], columns=['aa', 'bb', 'cc'])

Unnamed: 0,aa,bb,cc
one,,,
two,,,


In [11]:
pd.DataFrame(ddd, index=['one', 'two'], columns=['a', 'b', 'c'])

Unnamed: 0,a,b,c
one,1,2,
two,10,20,30.0


In [12]:
#You can automatically create a MultiIndexed frame by passing a tuples dictionary.

In [13]:
pd.DataFrame({('a', 'b'): {('A', 'B'): 1, ('A', 'C'): 2},
              ('a', 'a'): {('A', 'C'): 3, ('A', 'B'): 4},
              ('a', 'c'): {('A', 'B'): 5, ('A', 'C'): 6},
              ('b', 'a'): {('A', 'C'): 7, ('A', 'B'): 8},
              ('b', 'b'): {('A', 'D'): 9, ('A', 'B'): 10}})

Unnamed: 0_level_0,Unnamed: 1_level_0,a,a,a,b,b
Unnamed: 0_level_1,Unnamed: 1_level_1,b,a,c,a,b
A,B,1.0,4.0,5.0,8.0,10.0
A,C,2.0,3.0,6.0,7.0,
A,D,,,,,9.0
