In [1]:
import numpy as np

import pandas as pd
from pandas import Series, DataFrame

import matplotlib.pyplot as plt


### Series - labeled values

In [2]:
weights = pd.Series([150, 170, 101], index=['bill', 'ted', 'sue'], name='weight')
weights.index.name = 'Name'
print(weights)
print('sue', weights['sue'])

Name
bill    150
ted     170
sue     101
Name: weight, dtype: int64
sue 101


In [3]:
weights[weights >= 150]

Name
bill    150
ted     170
Name: weight, dtype: int64

In [4]:
mixed = pd.Series([0,1,2,'three'], index=['zero','one', 'two', 'three'])
mixed

zero         0
one          1
two          2
three    three
dtype: object

Slice by start and stop or by label

In [5]:
mixed[1:3]

one    1
two    2
dtype: object

In [6]:
mixed['two': 'three']  # why is this slice inclusive??

two          2
three    three
dtype: object

In [7]:
mixed[['one', 'three']]

one          1
three    three
dtype: object

Kinda like a dictionary, kinda like a numpy array

In [8]:
weights.max()

170

In [9]:
170 in weights.values

True

In [10]:
'ted' in weights

True

### DataFrames

In [11]:
raw = {'name':['bill', 'ted', 'sue', 'stretch'], 'weight':[150, 170, 101, 70], 'height':[67, 71, 63, 70]}
people = pd.DataFrame(raw)
people # by default columns are alphabetically ordered

Unnamed: 0,height,name,weight
0,67,bill,150
1,71,ted,170
2,63,sue,101
3,70,stretch,70


In [12]:
people = pd.DataFrame(raw, columns=['name', 'weight', 'height'])
people # but you can override it

Unnamed: 0,name,weight,height
0,bill,150,67
1,ted,170,71
2,sue,101,63
3,stretch,70,70


In [13]:
people = pd.DataFrame(raw, columns=['name', 'weight', 'height'], index=['16544E80', '69C9855A', '15D5E5CB', '24E6#A1'])
people.index.name = 'ID'
people # you can add an index

Unnamed: 0_level_0,name,weight,height
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
16544E80,bill,150,67
69C9855A,ted,170,71
15D5E5CB,sue,101,63
24E6#A1,stretch,70,70


### row slicing works the way it should

In [14]:
people[1:2]

Unnamed: 0_level_0,name,weight,height
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
69C9855A,ted,170,71


In [15]:
people['16544E80':'69C9855A']

Unnamed: 0_level_0,name,weight,height
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
16544E80,bill,150,67
69C9855A,ted,170,71


In [16]:
people.height is people['height']

True

In [17]:
people.height

ID
16544E80    67
69C9855A    71
15D5E5CB    63
24E6#A1     70
Name: height, dtype: int64

In [18]:
people['weight'] == people['height']

ID
16544E80    False
69C9855A    False
15D5E5CB    False
24E6#A1      True
dtype: bool

In [19]:
people[people['weight'] == people['height']]

Unnamed: 0_level_0,name,weight,height
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
24E6#A1,stretch,70,70


#### Manipulation 

In [20]:
people['hair'] = None
people['ratio'] = people['weight'] / people['height']
people['tall'] = people.height > 68
people['group_weight'] = people.weight.cumsum()
people

Unnamed: 0_level_0,name,weight,height,hair,ratio,tall,group_weight
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
16544E80,bill,150,67,,2.238806,False,150
69C9855A,ted,170,71,,2.394366,True,320
15D5E5CB,sue,101,63,,1.603175,False,421
24E6#A1,stretch,70,70,,1.0,True,491


In [21]:
people.T  # in case you hate tidy data

ID,16544E80,69C9855A,15D5E5CB,24E6#A1
name,bill,ted,sue,stretch
weight,150,170,101,70
height,67,71,63,70
hair,,,,
ratio,2.23881,2.39437,1.60317,1
tall,False,True,False,True
group_weight,150,320,421,491
