# Brief pandas reference

In [1]:
import numpy as np
import pandas as pd

## Series

In [3]:
ser = pd.Series([1,2,3,4,5])
ser

0    1
1    2
2    3
3    4
4    5
dtype: int64

### Index

A series has index and array representations.

In [4]:
ser.index

RangeIndex(start=0, stop=5, step=1)

In [5]:
ser.array

<NumpyExtensionArray>
[1, 2, 3, 4, 5]
Length: 5, dtype: int64

Index can be assinged. Usage is the same for custom index.

In [6]:
ser2 = pd.Series([1,2,3,4], index=['a','b','c','d'])
ser2.index

Index(['a', 'b', 'c', 'd'], dtype='object')

In [7]:
ser2['a']

1

In [8]:
ser2['a'] = 0
ser2

a    0
b    2
c    3
d    4
dtype: int64

In [10]:
np.log1p(ser2)

a    0.000000
b    1.098612
c    1.386294
d    1.609438
dtype: float64

Index can be reassigned as well.

In [29]:
ser2.index = ['z', 'y', 'x', 'w']
ser2

z    0
y    2
x    3
w    4
dtype: int64

### Dictionary

Series is almost like a dictionary.

In [11]:
0 in ser

True

In [12]:
'd' in ser2

True

In [13]:
4 in ser2

False

In [15]:
breakfasts = {'eggs': 1.49, 'bacons': 1.99, 'pancakes': .99}
ser3 = pd.Series(breakfasts)
ser3

eggs        1.49
bacons      1.99
pancakes    0.99
dtype: float64

In [16]:
ser3.to_dict()

{'eggs': 1.49, 'bacons': 1.99, 'pancakes': 0.99}

Changing the order of the sereis is easy.

In [18]:
favorites = ['bacons', 'eggs', 'potatoes']
ser4 = pd.Series(breakfasts, index=favorites)
ser4

bacons      1.99
eggs        1.49
potatoes     NaN
dtype: float64

### Checking NA

In [19]:
ser4.isna()

bacons      False
eggs        False
potatoes     True
dtype: bool

In [20]:
ser4.notna()

bacons       True
eggs         True
potatoes    False
dtype: bool

### Operations

Arithmetic operations are performed on each index.

In [22]:
ser3 + ser4

bacons      3.98
eggs        2.98
pancakes     NaN
potatoes     NaN
dtype: float64

### Name

Series and its index can have names.

In [27]:
ser3.name = 'breakfasts'
ser3.index.name = 'menu'
ser3

menu
eggs        1.49
bacons      1.99
pancakes    0.99
Name: breakfasts, dtype: float64

## DataFrame

Dictionary is basically a bunch of series on index.

In [31]:
data = {'customer': ['Puppy', 'Piggy', 'Kitty', 'Puppy', 'Kitty', 'Piggy'],
        'order': ['Lunch', 'Dinner', 'Dinner', 'Breakfast', 'Breakfast', 'Lunch'],
        'total': [1.49, 2.49, 2.99, .99, .99, 2.99]}
df = pd.DataFrame(data)
df

Unnamed: 0,customer,order,total
0,Puppy,Lunch,1.49
1,Piggy,Dinner,2.49
2,Kitty,Dinner,2.99
3,Puppy,Breakfast,0.99
4,Kitty,Breakfast,0.99
5,Piggy,Lunch,2.99


In [32]:
df.head()

Unnamed: 0,customer,order,total
0,Puppy,Lunch,1.49
1,Piggy,Dinner,2.49
2,Kitty,Dinner,2.99
3,Puppy,Breakfast,0.99
4,Kitty,Breakfast,0.99


In [33]:
pd.DataFrame(data, columns=['total', 'customer'])

Unnamed: 0,total,customer
0,1.49,Puppy
1,2.49,Piggy
2,2.99,Kitty
3,0.99,Puppy
4,0.99,Kitty
5,2.99,Piggy


In [37]:
tips = pd.Series([.5, .2], index=[1,4])
df['tips'] = tips
df

Unnamed: 0,customer,order,total,tips
0,Puppy,Lunch,1.49,
1,Piggy,Dinner,2.49,0.5
2,Kitty,Dinner,2.99,
3,Puppy,Breakfast,0.99,
4,Kitty,Breakfast,0.99,0.2
5,Piggy,Lunch,2.99,


In [38]:
del df['tips']
df

Unnamed: 0,customer,order,total
0,Puppy,Lunch,1.49
1,Piggy,Dinner,2.49
2,Kitty,Dinner,2.99
3,Puppy,Breakfast,0.99
4,Kitty,Breakfast,0.99
5,Piggy,Lunch,2.99


### Dictionary within dictionary

Outer keys are columns and inner keys are rows. There can only be one index.

In [40]:
data2 = {'customer': {'Lunch': 'Puppy', 'Dinner': 'Piggy', 'Dinner': 'Kitty', 'Breakfast': 'Puppy'},
        'total': {'Lunch': 1.49, 'Dinner': 2.49, 'Dinner': 2.99, 'Breakfast': .99}}
df2 = pd.DataFrame(data2)
df2

Unnamed: 0,customer,total
Lunch,Puppy,1.49
Dinner,Kitty,2.99
Breakfast,Puppy,0.99


In [41]:
df2.T

Unnamed: 0,Lunch,Dinner,Breakfast
customer,Puppy,Kitty,Puppy
total,1.49,2.99,0.99


Transposing can lose information about columns' data type if they have different types.

### Specifying index

In [46]:
pd.DataFrame(df2, index=['Lunch', 'Dinner', 'Snacks'])

Unnamed: 0,customer,total
Lunch,Puppy,1.49
Dinner,Kitty,2.99
Snacks,,


Series can be passed for each column.

In [47]:
pd.DataFrame({'customer': df2['customer']})

Unnamed: 0,customer
Lunch,Puppy
Dinner,Kitty
Breakfast,Puppy


In [53]:
df2.index.name = 'meals'
df2.columns.name = 'customer_info'
df2

customer_info,customer,total
meals,Unnamed: 1_level_1,Unnamed: 2_level_1
Lunch,Puppy,1.49
Dinner,Kitty,2.99
Breakfast,Puppy,0.99


Only data is converted to numpy array with a dtype.

In [54]:
df2.to_numpy()

array([['Puppy', 1.49],
       ['Kitty', 2.99],
       ['Puppy', 0.99]], dtype=object)