In [5]:
import pandas as pd
import numpy as np

In [7]:
s = pd.Series(
        np.random.randn(5), 
        index=['a', 'b', 'c', 'd', 'e'], 
        name='example')

s

a   -0.193556
b    0.912507
c    0.469954
d   -0.710688
e    0.306986
Name: example, dtype: float64

In [8]:
pd.Series(5, index=['a', 'b', 'c', 'd', 'e'])

a    5
b    5
c    5
d    5
e    5
dtype: int64

In [9]:
s[0]

-0.19355595352473698

In [10]:
s[:3]

a   -0.193556
b    0.912507
c    0.469954
Name: example, dtype: float64

In [11]:
s[[4, 3, 1]]

e    0.306986
d   -0.710688
b    0.912507
Name: example, dtype: float64

In [12]:
s.values

array([-0.19355595,  0.91250732,  0.46995402, -0.71068767,  0.30698628])

In [14]:
s['e'] = 500
s

a     -0.193556
b      0.912507
c      0.469954
d     -0.710688
e    500.000000
Name: example, dtype: float64

In [15]:
s[[True, True, False, False, True]]

a     -0.193556
b      0.912507
e    500.000000
Name: example, dtype: float64

In [16]:
# or the extremely common
s[s > 0], s > 0

(b      0.912507
 c      0.469954
 e    500.000000
 Name: example, dtype: float64, a    False
 b     True
 c     True
 d    False
 e     True
 Name: example, dtype: bool)

In [17]:
# and you can mutate the data too
# you'll just need to be careful with this!
s[s < 0] *= -1

In [18]:
s

a      0.193556
b      0.912507
c      0.469954
d      0.710688
e    500.000000
Name: example, dtype: float64

In [19]:
s > 0

a    True
b    True
c    True
d    True
e    True
Name: example, dtype: bool

In [20]:
s + s 

a       0.387112
b       1.825015
c       0.939908
d       1.421375
e    1000.000000
Name: example, dtype: float64

In [21]:
np.exp(s)

a     1.213557e+00
b     2.490559e+00
c     1.599921e+00
d     2.035390e+00
e    1.403592e+217
Name: example, dtype: float64

In [22]:
s.mean()

100.45734099299507

In [23]:
# just be careful with some operations
# if the indexes don't match up you will get nans
s + s[s > 0]

a       0.387112
b       1.825015
c       0.939908
d       1.421375
e    1000.000000
Name: example, dtype: float64

In [24]:
d = {'one' : pd.Series([1., 2., 3.], index=['a', 'b', 'c']),
    'two' : pd.Series([1., 2., 3., 4.], index=['a', 'b', 'c', 'd'])}

df = pd.DataFrame(d)

df

Unnamed: 0,one,two
a,1.0,1.0
b,2.0,2.0
c,3.0,3.0
d,,4.0


In [25]:
df.columns = ['1', '2']
df.index = ['a', 'b', 'c', 'd']
df

Unnamed: 0,1,2
a,1.0,1.0
b,2.0,2.0
c,3.0,3.0
d,,4.0


In [26]:
d = {'one' : 'Hellow',
    'two' : np.array([1., 2., 3., 4.])}

df = pd.DataFrame(d)
df.index = ['a', 'b', 'c', 'd']

# gives you back a named series
df['one']

a    Hellow
b    Hellow
c    Hellow
d    Hellow
Name: one, dtype: object

In [27]:
del df['one']

In [28]:
df['three'] = df['two'] + df['two']
df['four'] = 'four'
df['five'] = df['four'][:2]

In [29]:
df

Unnamed: 0,two,three,four,five
a,1.0,2.0,four,four
b,2.0,4.0,four,four
c,3.0,6.0,four,
d,4.0,8.0,four,


In [30]:
df["two"]

a    1.0
b    2.0
c    3.0
d    4.0
Name: two, dtype: float64

In [31]:
df[['five', 'two']]

Unnamed: 0,five,two
a,four,1.0
b,four,2.0
c,,3.0
d,,4.0


In [32]:
df.loc['a', 'two']

1.0

In [33]:
df.loc['d':'a':-1, 'two':'three']

Unnamed: 0,two,three
d,4.0,8.0
c,3.0,6.0
b,2.0,4.0
a,1.0,2.0


In [35]:
df.iloc[1:3, 0]

b    2.0
c    3.0
Name: two, dtype: float64

In [34]:
df.iloc[1:3]

Unnamed: 0,two,three,four,five
b,2.0,4.0,four,four
c,3.0,6.0,four,


In [36]:
df.copy()

Unnamed: 0,two,three,four,five
a,1.0,2.0,four,four
b,2.0,4.0,four,four
c,3.0,6.0,four,
d,4.0,8.0,four,


In [37]:
df.two.astype(np.int)

a    1
b    2
c    3
d    4
Name: two, dtype: int64

In [38]:
df.T

Unnamed: 0,a,b,c,d
two,1,2,3,4
three,2,4,6,8
four,four,four,four,four
five,four,four,,


In [39]:
df.head(2)

Unnamed: 0,two,three,four,five
a,1.0,2.0,four,four
b,2.0,4.0,four,four


In [40]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 4 entries, a to d
Data columns (total 4 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   two     4 non-null      float64
 1   three   4 non-null      float64
 2   four    4 non-null      object 
 3   five    2 non-null      object 
dtypes: float64(2), object(2)
memory usage: 320.0+ bytes


In [41]:
df.describe(include="all")

Unnamed: 0,two,three,four,five
count,4.0,4.0,4,2
unique,,,1,1
top,,,four,four
freq,,,4,2
mean,2.5,5.0,,
std,1.290994,2.581989,,
min,1.0,2.0,,
25%,1.75,3.5,,
50%,2.5,5.0,,
75%,3.25,6.5,,


In [42]:
for i in range(20):
    df[i] = i
    
df.head()

Unnamed: 0,two,three,four,five,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19
a,1.0,2.0,four,four,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19
b,2.0,4.0,four,four,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19
c,3.0,6.0,four,,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19
d,4.0,8.0,four,,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19


In [43]:
df.head().T

Unnamed: 0,a,b,c,d
two,1,2,3,4
three,2,4,6,8
four,four,four,four,four
five,four,four,,
0,0,0,0,0
1,1,1,1,1
2,2,2,2,2
3,3,3,3,3
4,4,4,4,4
5,5,5,5,5


In [44]:
pd.set_option('display.max_rows', 100)
pd.set_option('precision', 7)