# Day 11 - Pandas Tutorial

In [None]:
!pip install panda

## Import necessary libraries


In [4]:
import pandas as pd 
import numpy as np

## Object Creation

In [6]:
# series
ser = pd.Series([1,2,3,4,5,6,7,np.nan,8,9])
ser

0    1.0
1    2.0
2    3.0
3    4.0
4    5.0
5    6.0
6    7.0
7    NaN
8    8.0
9    9.0
dtype: float64

In [14]:
# Dates
dates = pd.date_range('20201122', periods=6)
dates

DatetimeIndex(['2020-11-22', '2020-11-23', '2020-11-24', '2020-11-25',
               '2020-11-26', '2020-11-27'],
              dtype='datetime64[ns]', freq='D')

In [15]:
# DataFrame
df = pd.DataFrame(np.random.rand(6,5), index=dates, columns=list('ABCDE'))
df

Unnamed: 0,A,B,C,D,E
2020-11-22,0.830079,0.990057,0.882648,0.411474,0.375247
2020-11-23,0.988036,0.609333,0.603759,0.333452,0.892577
2020-11-24,0.504776,0.218446,0.431538,0.107195,0.479038
2020-11-25,0.000716,0.740718,0.18835,0.485501,0.059689
2020-11-26,0.77593,0.195408,0.963751,0.807434,0.270156
2020-11-27,0.598173,0.883474,0.871991,0.078669,0.888228


## View Data

In [25]:
df.head(2)

Unnamed: 0,A,B,C,D,E
2020-11-22,0.830079,0.990057,0.882648,0.411474,0.375247
2020-11-23,0.988036,0.609333,0.603759,0.333452,0.892577


In [26]:
df.tail(2)

Unnamed: 0,A,B,C,D,E
2020-11-26,0.77593,0.195408,0.963751,0.807434,0.270156
2020-11-27,0.598173,0.883474,0.871991,0.078669,0.888228


## Creating DataFrame

In [24]:
# Creating Dataframe
df2 = pd.DataFrame(np.array([[1, 2, 3], ['a','g',2], [7, 8, 9]]),
                   columns=['a', 'b', 'c'])
df2


Unnamed: 0,a,b,c
0,1,2,3
1,a,g,2
2,7,8,9


## Dataframe Type

In [23]:
df2.dtypes

a    object
b    object
c    object
dtype: object

In [27]:
df2.index

RangeIndex(start=0, stop=3, step=1)

In [28]:
df.index

DatetimeIndex(['2020-11-22', '2020-11-23', '2020-11-24', '2020-11-25',
               '2020-11-26', '2020-11-27'],
              dtype='datetime64[ns]', freq='D')

## Dataframe to Numpy

In [29]:
df.to_numpy()

array([[8.30079189e-01, 9.90056888e-01, 8.82648281e-01, 4.11473615e-01,
        3.75246653e-01],
       [9.88035695e-01, 6.09332735e-01, 6.03759114e-01, 3.33452277e-01,
        8.92577042e-01],
       [5.04776498e-01, 2.18445989e-01, 4.31538274e-01, 1.07195220e-01,
        4.79038390e-01],
       [7.16470203e-04, 7.40718047e-01, 1.88349513e-01, 4.85500673e-01,
        5.96885015e-02],
       [7.75930022e-01, 1.95407580e-01, 9.63751090e-01, 8.07433627e-01,
        2.70156253e-01],
       [5.98172819e-01, 8.83474321e-01, 8.71990976e-01, 7.86686504e-02,
        8.88227519e-01]])

In [30]:
df.describe

<bound method NDFrame.describe of                    A         B         C         D         E
2020-11-22  0.830079  0.990057  0.882648  0.411474  0.375247
2020-11-23  0.988036  0.609333  0.603759  0.333452  0.892577
2020-11-24  0.504776  0.218446  0.431538  0.107195  0.479038
2020-11-25  0.000716  0.740718  0.188350  0.485501  0.059689
2020-11-26  0.775930  0.195408  0.963751  0.807434  0.270156
2020-11-27  0.598173  0.883474  0.871991  0.078669  0.888228>

In [31]:
df.describe()

Unnamed: 0,A,B,C,D,E
count,6.0,6.0,6.0,6.0,6.0
mean,0.616285,0.606239,0.657006,0.370621,0.494156
std,0.346722,0.335093,0.304807,0.268954,0.336849
min,0.000716,0.195408,0.18835,0.078669,0.059689
25%,0.528126,0.316168,0.474593,0.163759,0.296429
50%,0.687051,0.675025,0.737875,0.372463,0.427143
75%,0.816542,0.847785,0.879984,0.466994,0.78593
max,0.988036,0.990057,0.963751,0.807434,0.892577


## Transpose

In [33]:
#Transpose
df.T

Unnamed: 0,2020-11-22,2020-11-23,2020-11-24,2020-11-25,2020-11-26,2020-11-27
A,0.830079,0.988036,0.504776,0.000716,0.77593,0.598173
B,0.990057,0.609333,0.218446,0.740718,0.195408,0.883474
C,0.882648,0.603759,0.431538,0.18835,0.963751,0.871991
D,0.411474,0.333452,0.107195,0.485501,0.807434,0.078669
E,0.375247,0.892577,0.479038,0.059689,0.270156,0.888228


## Selecting column

In [34]:
# Selecting column
df['A']

2020-11-22    0.830079
2020-11-23    0.988036
2020-11-24    0.504776
2020-11-25    0.000716
2020-11-26    0.775930
2020-11-27    0.598173
Freq: D, Name: A, dtype: float64

In [35]:
#Row wise selection
df[0:2]

Unnamed: 0,A,B,C,D,E
2020-11-22,0.830079,0.990057,0.882648,0.411474,0.375247
2020-11-23,0.988036,0.609333,0.603759,0.333452,0.892577


In [38]:
#Getting all data for day1
df.loc[dates[0]]

A    0.830079
B    0.990057
C    0.882648
D    0.411474
E    0.375247
Name: 2020-11-22 00:00:00, dtype: float64

In [39]:
df.loc['2020-11-22':'2020-11-24', ['A','B','C']]

Unnamed: 0,A,B,C
2020-11-22,0.830079,0.990057,0.882648
2020-11-23,0.988036,0.609333,0.603759
2020-11-24,0.504776,0.218446,0.431538


In [41]:
df.loc['2020-11-22':'2020-11-24', 'A':'D']

Unnamed: 0,A,B,C,D
2020-11-22,0.830079,0.990057,0.882648,0.411474
2020-11-23,0.988036,0.609333,0.603759,0.333452
2020-11-24,0.504776,0.218446,0.431538,0.107195


In [42]:
df.at[dates[1],'A']

0.9880356953853331

In [43]:
df.iloc[0:24,0:2]

Unnamed: 0,A,B
2020-11-22,0.830079,0.990057
2020-11-23,0.988036,0.609333
2020-11-24,0.504776,0.218446
2020-11-25,0.000716,0.740718
2020-11-26,0.77593,0.195408
2020-11-27,0.598173,0.883474


## Boolean conditions

In [45]:
# Boolean condition to retrieve specific values
df[df['A']>0.6]

Unnamed: 0,A,B,C,D,E
2020-11-22,0.830079,0.990057,0.882648,0.411474,0.375247
2020-11-23,0.988036,0.609333,0.603759,0.333452,0.892577
2020-11-26,0.77593,0.195408,0.963751,0.807434,0.270156


## Boolean condition on more than 1 column

In [51]:
# Condition on 2 columns
df[(df['A']>0.6) & (df['B']>0.6)]

Unnamed: 0,A,B,C,D,E
2020-11-22,0.830079,0.990057,0.882648,0.411474,0.375247
2020-11-23,0.988036,0.609333,0.603759,0.333452,0.892577


In [53]:
df[df>0.5]

Unnamed: 0,A,B,C,D,E
2020-11-22,0.830079,0.990057,0.882648,,
2020-11-23,0.988036,0.609333,0.603759,,0.892577
2020-11-24,0.504776,,,,
2020-11-25,,0.740718,,,
2020-11-26,0.77593,,0.963751,0.807434,
2020-11-27,0.598173,0.883474,0.871991,,0.888228


## Copying Dataframe

In [54]:
df2 = df.copy()

In [55]:
df2

Unnamed: 0,A,B,C,D,E
2020-11-22,0.830079,0.990057,0.882648,0.411474,0.375247
2020-11-23,0.988036,0.609333,0.603759,0.333452,0.892577
2020-11-24,0.504776,0.218446,0.431538,0.107195,0.479038
2020-11-25,0.000716,0.740718,0.18835,0.485501,0.059689
2020-11-26,0.77593,0.195408,0.963751,0.807434,0.270156
2020-11-27,0.598173,0.883474,0.871991,0.078669,0.888228


## Addition of new Coulmn

In [56]:
df2['E']=[-1,-2,-3,3,2,1]

In [57]:
df2

Unnamed: 0,A,B,C,D,E
2020-11-22,0.830079,0.990057,0.882648,0.411474,-1
2020-11-23,0.988036,0.609333,0.603759,0.333452,-2
2020-11-24,0.504776,0.218446,0.431538,0.107195,-3
2020-11-25,0.000716,0.740718,0.18835,0.485501,3
2020-11-26,0.77593,0.195408,0.963751,0.807434,2
2020-11-27,0.598173,0.883474,0.871991,0.078669,1


## Printing Average in new added column 'mean'

In [61]:

df2['mean']= (df2['A'] + df2['B']+ df2['C'] +df2['D'] + df2['E'])/5

In [62]:
df2

Unnamed: 0,A,B,C,D,E,mean
2020-11-22,0.830079,0.990057,0.882648,0.411474,-1,0.422852
2020-11-23,0.988036,0.609333,0.603759,0.333452,-2,0.106916
2020-11-24,0.504776,0.218446,0.431538,0.107195,-3,-0.347609
2020-11-25,0.000716,0.740718,0.18835,0.485501,3,0.883057
2020-11-26,0.77593,0.195408,0.963751,0.807434,2,0.948504
2020-11-27,0.598173,0.883474,0.871991,0.078669,1,0.686461
