# Introduction to pandas
* The premier data science library
* The name pandas derives from panel data 
* Has two main objects : DataFrame, Series
* Is a wrapper around, among other libraries, NumPy and matplotlib
* Series is a vector of data with an index
* DataFrame is a collection of Series with a single index (columns are also an index)
* Was developed by a quant working for a hedge fund, long since semi-retired by the age of about 27


In [8]:
import matplotlib as plt
import numpy as np
import numpy.random as npr
import pandas as pd

In [10]:
pd.__version__

'1.4.1'

### 1. make a Series

In [16]:
normals = pd.Series(npr.standard_normal(10))
normals
rands = pd.Series(npr.rand(10))

In [None]:
normals.index

In [34]:
type(rands)

pandas.core.series.Series

### 2. make a DataFrame

In [20]:
df = pd.DataFrame([normals, rands])

In [22]:
df.T

Unnamed: 0,0,1
0,0.625748,0.959943
1,-0.239725,0.921888
2,-1.05095,0.032411
3,0.60984,0.434443
4,0.248869,0.823033
5,0.775524,0.25952
6,-1.109566,0.391152
7,1.464136,0.64465
8,0.130894,0.770948
9,-1.250146,0.025954


In [23]:
# this implies you can make a data frame from dict and json
df = pd.DataFrame({'Norms':normals, 'Rands':rands})
df

Unnamed: 0,Norms,Rands
0,0.625748,0.959943
1,-0.239725,0.921888
2,-1.05095,0.032411
3,0.60984,0.434443
4,0.248869,0.823033
5,0.775524,0.25952
6,-1.109566,0.391152
7,1.464136,0.64465
8,0.130894,0.770948
9,-1.250146,0.025954


In [24]:
df.index

RangeIndex(start=0, stop=10, step=1)

In [25]:
df.columns

Index(['Norms', 'Rands'], dtype='object')

In [31]:
type(pd.DataFrame(df['Norms']))

pandas.core.frame.DataFrame

### 3. Dates and Indexes

In [30]:
dates = pd.date_range('2022-04-01','2022-04-10')
type(dates)

pandas.core.indexes.datetimes.DatetimeIndex

In [36]:
df.index = dates
df

Unnamed: 0,Norms,Rands
2022-04-01,0.625748,0.959943
2022-04-02,-0.239725,0.921888
2022-04-03,-1.05095,0.032411
2022-04-04,0.60984,0.434443
2022-04-05,0.248869,0.823033
2022-04-06,0.775524,0.25952
2022-04-07,-1.109566,0.391152
2022-04-08,1.464136,0.64465
2022-04-09,0.130894,0.770948
2022-04-10,-1.250146,0.025954


In [39]:
df.iloc[1:3]

Unnamed: 0,Norms,Rands
2022-04-02,-0.239725,0.921888
2022-04-03,-1.05095,0.032411


In [43]:
df.loc['2022-04-01':'2022-04-03']

Unnamed: 0,Norms,Rands
2022-04-01,0.625748,0.959943
2022-04-02,-0.239725,0.921888
2022-04-03,-1.05095,0.032411


In [47]:
# M, BM, MS, BMS
pd.date_range('2022-04-01', periods = 10, freq = 'BM')

DatetimeIndex(['2022-04-29', '2022-05-31', '2022-06-30', '2022-07-29',
               '2022-08-31', '2022-09-30', '2022-10-31', '2022-11-30',
               '2022-12-30', '2023-01-31'],
              dtype='datetime64[ns]', freq='BM')

In [48]:
letters = "E J C H A F G D I B".split()

In [49]:
df.index = letters

In [50]:
df

Unnamed: 0,Norms,Rands
E,0.625748,0.959943
J,-0.239725,0.921888
C,-1.05095,0.032411
H,0.60984,0.434443
A,0.248869,0.823033
F,0.775524,0.25952
G,-1.109566,0.391152
D,1.464136,0.64465
I,0.130894,0.770948
B,-1.250146,0.025954


In [53]:
letters.sort()
letters

['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J']

In [75]:
df2 = df.reindex(letters)
df2

Unnamed: 0,Norms,Rands
A,0.248869,0.823033
B,-1.250146,0.025954
C,-1.05095,0.032411
D,1.464136,0.64465
E,0.625748,0.959943
F,0.775524,0.25952
G,-1.109566,0.391152
H,0.60984,0.434443
I,0.130894,0.770948
J,-0.239725,0.921888


### 4. Slicing index loc vs. iloc

In [61]:
df2.loc[['A','B', 'C']]

Unnamed: 0,Norms,Rands
A,0.248869,0.823033
B,-1.250146,0.025954
C,-1.05095,0.032411


In [63]:
df2.iloc[0:3]

Unnamed: 0,Norms,Rands
A,0.248869,0.823033
B,-1.250146,0.025954
C,-1.05095,0.032411


### 5. Slicing columns (columnd are similar to indexes)

In [78]:
index = ['A', 'B', 'C', 'D', 'E']
columns = ['Do', 'Re', 'Mi']
frame = pd.DataFrame(np.arange(15).reshape(5,3), index = index, columns = columns)
frame

Unnamed: 0,Do,Re,Mi
A,0,1,2
B,3,4,5
C,6,7,8
D,9,10,11
E,12,13,14


In [79]:
frame[['Do','Re']]

Unnamed: 0,Do,Re
A,0,1
B,3,4
C,6,7
D,9,10
E,12,13


### 6. Renaming columns

In [80]:
frame.rename(columns={'Do': 'Fa'}, inplace= True)

In [81]:
# what gives? 
frame

Unnamed: 0,Fa,Re,Mi
A,0,1,2
B,3,4,5
C,6,7,8
D,9,10,11
E,12,13,14


### 7. Reordering columns

In [77]:
frame = frame[['Re','Mi', 'Fa']]
frame

Unnamed: 0,Re,Mi,Fa
A,1,2,0
B,4,5,3
C,7,8,6
D,10,11,9
E,13,14,12


In [83]:
frame.sort_index(axis = 1)


Unnamed: 0,Fa,Mi,Re
A,0,2,1
B,3,5,4
C,6,8,7
D,9,11,10
E,12,14,13


In [115]:
# axisd 0 or none  sorts by row inplace =True to make changes stick


### 8. Removing columns or rows

In [86]:
frame.drop('B', inplace = True)

In [87]:
frame

Unnamed: 0,Fa,Re,Mi
A,0,1,2
C,6,7,8
D,9,10,11
E,12,13,14


In [90]:
frame.drop(columns=['Fa'], inplace = True)

In [91]:
frame

Unnamed: 0,Re,Mi
A,1,2
C,7,8
D,10,11
E,13,14
