# Pandas Tutorial

In [4]:
!pip install pandas



You should consider upgrading via the 'C:\Users\Hari Om\AppData\Local\Programs\Python\Python310\python.exe -m pip install --upgrade pip' command.


In [5]:
pip show pandas

Name: pandasNote: you may need to restart the kernel to use updated packages.

Version: 1.4.3
Summary: Powerful data structures for data analysis, time series, and statistics
Home-page: https://pandas.pydata.org
Author: The Pandas Development Team
Author-email: pandas-dev@python.org
License: BSD-3-Clause
Location: c:\users\hari om\appdata\local\programs\python\python310\lib\site-packages
Requires: numpy, python-dateutil, pytz
Required-by: seaborn, yfinance


In [12]:
import pandas as pd
import numpy as np

In [13]:
pd.__version__

'1.4.3'

## Series

In [10]:
list = ['a','b','c','d']
lables = [1,2,3,4]
ser = pd.Series(data=list,index=lables)

In [11]:
ser

1    a
2    b
3    c
4    d
dtype: object

In [14]:
arr_1 = np.array([1,2,3,4])
ser_2 = pd.Series(arr_1)

In [15]:
ser_2

0    1
1    2
2    3
3    4
dtype: int32

In [16]:
dict_1 = {'f_name':'Rahul','l_name':'Singh','age':35}
ser_3 = pd.Series(dict_1)

In [17]:
ser_3

f_name    Rahul
l_name    Singh
age          35
dtype: object

In [18]:
ser_2.dtype

dtype('int32')

In [19]:
ser_2 + ser_2

0    2
1    4
2    6
3    8
dtype: int32

In [20]:
ser_2 - ser_2

0    0
1    0
2    0
3    0
dtype: int32

In [22]:
np.exp(ser_2)

0     2.718282
1     7.389056
2    20.085537
3    54.598150
dtype: float64

In [24]:
ser_4 = pd.Series({4:5,5:6,6:7,7:8})

In [25]:
ser_2 + ser_4

0   NaN
1   NaN
2   NaN
3   NaN
4   NaN
5   NaN
6   NaN
7   NaN
dtype: float64

In [26]:
ser_4

4    5
5    6
6    7
7    8
dtype: int64

In [27]:
ser_4 = pd.Series({4:5,5:6,6:7,7:8},name='random_names')

In [28]:
ser_4.name

'random_names'

## DataFrames

### Creating DataFrames

In [29]:
arr_2 = np.random.randint(10,50,size = (2,4))

In [30]:
arr_2

array([[27, 11, 32, 27],
       [20, 18, 37, 31]])

In [31]:
arr_2.dtype

dtype('int32')

In [32]:
df_1 = pd.DataFrame(arr_2, ['A','B'], ['C','D','E','F'])

In [33]:
df_1

Unnamed: 0,C,D,E,F
A,27,11,32,27
B,20,18,37,31


In [34]:
df_1.dtypes

C    int32
D    int32
E    int32
F    int32
dtype: object

In [35]:
dict_3 = {'one': pd.Series([1.,2.,3.],index = ['a','b','c']),
          'two':pd.Series([1.,2.,3.,4.], index = ['a','b','c','d'])}

In [37]:
dict_3

{'one': a    1.0
 b    2.0
 c    3.0
 dtype: float64,
 'two': a    1.0
 b    2.0
 c    3.0
 d    4.0
 dtype: float64}

In [38]:
df_2 = pd.DataFrame(dict_3)

In [39]:
df_2

Unnamed: 0,one,two
a,1.0,1.0
b,2.0,2.0
c,3.0,3.0
d,,4.0


In [43]:
pd.DataFrame.from_dict(dict([('A',[1,2,3]),('B',[4,5,6])]))

Unnamed: 0,A,B
0,1,4
1,2,5
2,3,6


In [44]:
pd.DataFrame.from_dict(dict([('A',[1,2,3]),('B',[4,5,6])]),
                      orient='index', columns=['one','two','three'])

Unnamed: 0,one,two,three
A,1,2,3
B,4,5,6


In [45]:
df_1.shape

(2, 4)

### Editing & Retrieving Data

In [48]:
print(df_1)
df_1['C']

    C   D   E   F
A  27  11  32  27
B  20  18  37  31


A    27
B    20
Name: C, dtype: int32

In [50]:
df_1[['C','E']]

Unnamed: 0,C,E
A,27,32
B,20,37


In [51]:
df_1.loc['A']

C    27
D    11
E    32
F    27
Name: A, dtype: int32

In [52]:
df_1.iloc[1]

C    20
D    18
E    37
F    31
Name: B, dtype: int32

In [53]:
df_1.loc['A','C']

27

In [54]:
df_1.loc[['A','B'],['D','E']]

Unnamed: 0,D,E
A,11,32
B,18,37


In [55]:
df_1['Total'] = df_1['C']+df_1['D']+df_1['E']

In [56]:
df_1

Unnamed: 0,C,D,E,F,Total
A,27,11,32,27,70
B,20,18,37,31,75


In [57]:
df_2

Unnamed: 0,one,two
a,1.0,1.0
b,2.0,2.0
c,3.0,3.0
d,,4.0


In [58]:
df_2['mul'] = df_2['one']*df_2['two']

In [59]:
df_2

Unnamed: 0,one,two,mul
a,1.0,1.0,1.0
b,2.0,2.0,4.0
c,3.0,3.0,9.0
d,,4.0,


In [60]:
df_2 = {'C': 44, 'D': 45, 'E': 46}

In [61]:
new_row = pd.Series(df_2,name='F')

In [62]:
df_1 = df_1.append(new_row)
df_1

  df_1 = df_1.append(new_row)


Unnamed: 0,C,D,E,F,Total
A,27,11,32,27.0,70.0
B,20,18,37,31.0,75.0
F,44,45,46,,


In [64]:
df_1.drop('Total',axis=1,inplace = True)

In [65]:
df_1

Unnamed: 0,C,D,E,F
A,27,11,32,27.0
B,20,18,37,31.0
F,44,45,46,


In [66]:
df_1.drop('B',axis=0,inplace = True)

In [67]:
df_1

Unnamed: 0,C,D,E,F
A,27,11,32,27.0
F,44,45,46,


In [68]:
df_1['Sex'] = ['Men','Women']
df_1.set_index('Sex', inplace=True)

In [69]:
df_1

Unnamed: 0_level_0,C,D,E,F
Sex,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Men,27,11,32,27.0
Women,44,45,46,


In [70]:
df_1.reset_index(inplace=True)

In [71]:
df_1

Unnamed: 0,Sex,C,D,E,F
0,Men,27,11,32,27.0
1,Women,44,45,46,


In [72]:
df_3=pd.DataFrame({'A':[1.,np.nan,3.,np.nan]})
df_4=pd.DataFrame({'A':[8.,9.,2.,4.]})

In [73]:
df_3.combine_first(df_4)

Unnamed: 0,A
0,1.0
1,9.0
2,3.0
3,4.0


In [75]:
df_3.combine(df_4,np.minimum)

Unnamed: 0,A
0,1.0
1,
2,2.0
3,


In [76]:
df_3.combine(df_4,np.maximum)

Unnamed: 0,A
0,8.0
1,
2,3.0
3,


### Conditional Selection

In [78]:
arr_2 = np.random.randint(10,50,size=(2,3))

In [79]:
df_1 = pd.DataFrame(arr_2, ['A','B'],['C','D','E'])

In [80]:
df_1

Unnamed: 0,C,D,E
A,16,44,23
B,23,31,26


In [81]:
print('Greater than 40\n', df_1>40)

Greater than 40
        C      D      E
A  False   True  False
B  False  False  False


In [82]:
print('Greater than 40\n', df_1.gt(40.0))
# lt ge le eq ne

Greater than 40
        C      D      E
A  False   True  False
B  False  False  False


In [85]:
bool_1 = df_1 >= 42.0
df_1[bool_1]

Unnamed: 0,C,D,E
A,,44.0,
B,,,


In [86]:
df_1['E'] > 35

A    False
B    False
Name: E, dtype: bool

In [87]:
df_1[df_1['E']>25]

Unnamed: 0,C,D,E
B,23,31,26


In [88]:
df_2 = df_1[df_1['E']>25]

In [89]:
df_2

Unnamed: 0,C,D,E
B,23,31,26


In [94]:
print(df_1[df_1['E']>20]['C'])

A    16
B    23
Name: C, dtype: int32


In [93]:
print(df_1[df_1['E']>20][['C','D']])

    C   D
A  16  44
B  23  31


In [95]:
arr_2 = np.array([[1,2,3],[4,5,6],[7,8,9]])

In [96]:
df_2 = pd.DataFrame(arr_2, ['A','B','C'],['X','Y','Z'])

In [97]:
df_2

Unnamed: 0,X,Y,Z
A,1,2,3
B,4,5,6
C,7,8,9


In [99]:
df_2[(df_2['X']>3) & (df_2['X']<7)]

Unnamed: 0,X,Y,Z
B,4,5,6


In [100]:
df_2[(df_2['X']>3) | (df_2['X']<7)]

Unnamed: 0,X,Y,Z
A,1,2,3
B,4,5,6
C,7,8,9
