# This Notebook contains some common operations done using Pandas

Some useful resources:
- https://fabsta.github.io/programming/pandas-cheat-sheet/

### Import the relevant packages

In [2]:
import numpy as np
import pandas as pd

# For graphical purposes
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt

## Data Frame Creation

### Creating a Data Frame from a set of vectors

In [7]:
column_1 = np.array( ['a', 'b', 'c', 'd', 'e' ] )
column_2 = np.array( [1, 2, 3, 4, 5 ] )

df = pd.DataFrame({'letter':column_1, 'number':column_2})

Unnamed: 0,letter,number
0,a,1
1,b,2
2,c,3
3,d,4
4,e,5


### The Head and Tail commands give the first and last entires of the data frame and are very usefull to quickly see what the Data Frame looks like

In [8]:
df.head(n=3)

Unnamed: 0,letter,number
0,a,1
1,b,2
2,c,3


In [10]:
df.tail(n=4)

Unnamed: 0,letter,number
1,b,2
2,c,3
3,d,4
4,e,5


### Creating a Data Frame from a set from a Dictionary, keeping the keys as the index, instead of numbering them directly

In [12]:
dict_letter_num = {}

dict_letter_num['a'] = {}
dict_letter_num['a']['column_1'] = 1
dict_letter_num['a']['column_2'] = 2

dict_letter_num['b'] = {}
dict_letter_num['b']['column_1'] = 3
dict_letter_num['b']['column_2'] = 4

dict_letter_num['c'] = {}
dict_letter_num['c']['column_1'] = 5
dict_letter_num['c']['column_2'] = 6

df = pd.DataFrame.from_dict( dict_letter_num, orient="index")

df.head()

Unnamed: 0,column_1,column_2
a,1,2
b,3,4
c,5,6


## Descriptive information of the Data Frame's content

In [13]:
df.describe()

Unnamed: 0,column_1,column_2
count,3.0,3.0
mean,3.0,4.0
std,2.0,2.0
min,1.0,2.0
25%,2.0,3.0
50%,3.0,4.0
75%,4.0,5.0
max,5.0,6.0


In [14]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 3 entries, a to c
Data columns (total 2 columns):
column_1    3 non-null int64
column_2    3 non-null int64
dtypes: int64(2)
memory usage: 152.0+ bytes


### Get the values inside the Data Frame as a matrix

In [21]:
df.iloc[:,:].values

array([[1, 2],
       [3, 4],
       [5, 6]])

In [23]:
df.iloc[:,[1]].values

array([[2],
       [4],
       [6]])