In [1]:
## Python - Pandas library 

In [7]:
import numpy as np # Numerical Python library used to perform numerical operations
import pandas as pd # Pandas library used to handle and analyze structured data

In [14]:
from numpy.random import randn # this randn function will return sample values from a standard normal distribution

In [16]:
## We will create a dataframe using randn function and perform some basic operations

In [20]:
## pd.DataFrame - this 'DataFrame' class will help us create a DataFrame using the data 
## Since we are creating a table of 5 rows and 5 columns, we will have to specify the column and index titles
## Finally we are assigning the output of entire code to the variable 'df' which will be out dataframe
df = pd.DataFrame(randn(5,5),columns=['V','W','X','Y','Z'],index=['A','B','C','D','E'])

In [26]:
## We can can check the dataframe and its some values by typing the dataframe name (We can als use .head(5) operator to limit to first 5 rows)
df

Unnamed: 0,V,W,X,Y,Z
A,-0.80361,-0.496456,-0.369699,-0.589541,-1.393947
B,-0.485744,0.11547,0.739598,1.394395,-1.319292
C,-0.359874,1.345249,1.030684,-0.329996,-0.511895
D,-0.749256,1.987287,-2.111953,0.970102,0.844873
E,0.725691,0.1012,-0.444215,1.51084,-1.230091


In [70]:
## To check the correlation of all the values with one another we can use the .corr function
df.corr(numeric_only=True)

Unnamed: 0,V,W,X,Y,Z,Total
V,1.0,-0.211454,0.143259,0.521293,-0.336455,0.386277
W,-0.211454,1.0,-0.372056,0.071137,0.936712,0.686909
X,0.143259,-0.372056,1.0,-0.23119,-0.654236,0.050924
Y,0.521293,0.071137,-0.23119,1.0,0.066306,0.574611
Z,-0.336455,0.936712,-0.654236,0.066306,1.0,0.470012
Total,0.386277,0.686909,0.050924,0.574611,0.470012,1.0


In [72]:
## The .describe function will provide the general descriptive statistics of the entire dataframe 
df.describe()

Unnamed: 0,V,W,X,Y,Z,Total
count,5.0,5.0,5.0,5.0,5.0,5.0
mean,-0.334559,0.61055,-0.231117,0.59116,-0.72207,-0.086036
std,0.620405,1.02044,1.238607,0.984528,0.944148,2.013153
min,-0.80361,-0.496456,-2.111953,-0.589541,-1.393947,-3.653253
25%,-0.749256,0.1012,-0.444215,-0.329996,-1.319292,0.444426
50%,-0.485744,0.11547,-0.369699,0.970102,-1.230091,0.663425
75%,-0.359874,1.345249,0.739598,1.394395,-0.511895,0.941053
max,0.725691,1.987287,1.030684,1.51084,0.844873,1.174168


In [30]:
## We can grab any columns using the bracket notation
df['W'] ## will grab only the W column

A   -0.496456
B    0.115470
C    1.345249
D    1.987287
E    0.101200
Name: W, dtype: float64

In [34]:
## If you want to grab two columns then
df[['W','Z']] ## add the columns names into the double brackets

Unnamed: 0,W,Z
A,-0.496456,-1.393947
B,0.11547,-1.319292
C,1.345249,-0.511895
D,1.987287,0.844873
E,0.1012,-1.230091


In [44]:
## If you just want the first row of column V and Z, grab it with the [0:1] after selecting the columns.
## [0:1] means that all the rows starting from index 0 to but not including index 1 (which means only row 0)
df[['V','Z']][0:1]

Unnamed: 0,V,Z
A,-0.80361,-1.393947


In [46]:
## If you want to grab first two rows of columns V and Z, grab it with the [0:2] after selecting the columns.
df[['V','Z']][0:2]

Unnamed: 0,V,Z
A,-0.80361,-1.393947
B,-0.485744,-1.319292


In [54]:
## If you want to do a sum of all the rows, we can do it using the .sum operator
df.sum()

V   -1.672793
W    3.052749
X   -1.155585
Y    2.955799
Z   -3.610352
dtype: float64

In [56]:
## If you want to do a sum of all the columns, we can do it using the same .sum operator but this time with an axis indicator
df.sum(axis=1)

A   -3.653253
B    0.444426
C    1.174168
D    0.941053
E    0.663425
dtype: float64

In [68]:
## If you want to create a new column 'Total' which adds up all the values of the columns, we can do it as below
df['Total'] = df.sum(axis=1)
df

Unnamed: 0,V,W,X,Y,Z,Total
A,-0.80361,-0.496456,-0.369699,-0.589541,-1.393947,-3.653253
B,-0.485744,0.11547,0.739598,1.394395,-1.319292,0.444426
C,-0.359874,1.345249,1.030684,-0.329996,-0.511895,1.174168
D,-0.749256,1.987287,-2.111953,0.970102,0.844873,0.941053
E,0.725691,0.1012,-0.444215,1.51084,-1.230091,0.663425


In [74]:
## To drop a column, use the .drop function along with the name of the column in paranthesis. Use axis=1 to specify that the 
## value to be dropped is a column, inplace=True commits the changes to the actual dataframe (it makes it permanent) 
df.drop('Total',axis=1,inplace=True)
df

Unnamed: 0,V,W,X,Y,Z
A,-0.80361,-0.496456,-0.369699,-0.589541,-1.393947
B,-0.485744,0.11547,0.739598,1.394395,-1.319292
C,-0.359874,1.345249,1.030684,-0.329996,-0.511895
D,-0.749256,1.987287,-2.111953,0.970102,0.844873
E,0.725691,0.1012,-0.444215,1.51084,-1.230091


## Thank You