# Pandas Library

In [196]:
import pandas as pd
import numpy as np

Panda Series

In [197]:
s = pd.Series([4,5,6,7])

In [198]:
s

0    4
1    5
2    6
3    7
dtype: int64

In [199]:
s.values

array([4, 5, 6, 7], dtype=int64)

In [200]:
s.index

RangeIndex(start=0, stop=4, step=1)

In [201]:
''''''''''''

''

In [202]:
s1 = pd.Series([4,7,8,-2],index=['d','b','a','c'])

In [203]:
s1

d    4
b    7
a    8
c   -2
dtype: int64

In [204]:
s1.index

Index(['d', 'b', 'a', 'c'], dtype='object')

In [205]:
s1['d']

4

In [206]:
s1['d'] = 6

In [207]:
s1[['c','a','d']]

c   -2
a    8
d    6
dtype: int64

In [208]:
s1[s1>5]

d    6
b    7
a    8
dtype: int64

In [209]:
s1*3

d    18
b    21
a    24
c    -6
dtype: int64

In [210]:
np.exp(s1)

d     403.428793
b    1096.633158
a    2980.957987
c       0.135335
dtype: float64

In [211]:
'd' in s1

True

In [212]:
'e' in s1

False

In [None]:
'''Series from Dictionary'''

In [213]:
series_dict = {'Rawalpindi':46000,'Quetta':87300,'Karachi':72500,'Abbotabad':22020}

In [214]:
series_dict = pd.Series(series_dict)

In [215]:
series_dict

Rawalpindi    46000
Quetta        87300
Karachi       72500
Abbotabad     22020
dtype: int64

In [216]:
cities = ['Punjab','Balochistan','Sindh','KPK']

In [217]:
series_dict_2 = pd.Series(series_dict, index=cities)

In [218]:
series_dict_2

Punjab        NaN
Balochistan   NaN
Sindh         NaN
KPK           NaN
dtype: float64

In [219]:
# Checking if any value is null

pd.isnull(series_dict)

Rawalpindi    False
Quetta        False
Karachi       False
Abbotabad     False
dtype: bool

In [220]:
pd.isnull(series_dict_2)

Punjab         True
Balochistan    True
Sindh          True
KPK            True
dtype: bool

In [221]:
series_dict.name = 'population'
series_dict.index.name = 'state'

In [222]:
series_dict

state
Rawalpindi    46000
Quetta        87300
Karachi       72500
Abbotabad     22020
Name: population, dtype: int64

Pandas DataFrame

In [223]:
data = {'Cities':['Karachi','Lahore','Islamabad','Rawalpindi','Quetta'],
       'Year':['2021','2021','2021','2021','2021'],
       'Population':[1.6,1.3,1.16,2.28,1.13]}

In [224]:
cities_data = pd.DataFrame(data)

In [225]:
cities_data

Unnamed: 0,Cities,Year,Population
0,Karachi,2021,1.6
1,Lahore,2021,1.3
2,Islamabad,2021,1.16
3,Rawalpindi,2021,2.28
4,Quetta,2021,1.13


In [226]:
cities_data.head() #Prints only first 5 rows

Unnamed: 0,Cities,Year,Population
0,Karachi,2021,1.6
1,Lahore,2021,1.3
2,Islamabad,2021,1.16
3,Rawalpindi,2021,2.28
4,Quetta,2021,1.13


In [227]:
# Changing sequence of Columns

pd.DataFrame(cities_data, columns=['Year','Cities','Population'])


Unnamed: 0,Year,Cities,Population
0,2021,Karachi,1.6
1,2021,Lahore,1.3
2,2021,Islamabad,1.16
3,2021,Rawalpindi,2.28
4,2021,Quetta,1.13


In [228]:
# Adding null column
# Updating Index

cities_data=pd.DataFrame(cities_data, columns=['Year','Cities','Population','Debt'])
cities_data

Unnamed: 0,Year,Cities,Population,Debt
0,2021,Karachi,1.6,
1,2021,Lahore,1.3,
2,2021,Islamabad,1.16,
3,2021,Rawalpindi,2.28,
4,2021,Quetta,1.13,


In [229]:
cities_data.columns

Index(['Year', 'Cities', 'Population', 'Debt'], dtype='object')

In [230]:
cities_data['Cities']

0       Karachi
1        Lahore
2     Islamabad
3    Rawalpindi
4        Quetta
Name: Cities, dtype: object

In [231]:
cities_data['Population']

0    1.60
1    1.30
2    1.16
3    2.28
4    1.13
Name: Population, dtype: float64

In [232]:
cities_data.loc[4]

Year            2021
Cities        Quetta
Population      1.13
Debt             NaN
Name: 4, dtype: object

In [233]:
cities_data['Debt'] = 16.5

In [234]:
cities_data

Unnamed: 0,Year,Cities,Population,Debt
0,2021,Karachi,1.6,16.5
1,2021,Lahore,1.3,16.5
2,2021,Islamabad,1.16,16.5
3,2021,Rawalpindi,2.28,16.5
4,2021,Quetta,1.13,16.5


In [235]:
cities_data['Debt'] = np.arange(5.)
print(cities_data)


val = pd.Series([-1.2,-1.5,-3.4,-1.9,-2.1])
cities_data['Debt'] = val

cities_data

   Year      Cities  Population  Debt
0  2021     Karachi        1.60   0.0
1  2021      Lahore        1.30   1.0
2  2021   Islamabad        1.16   2.0
3  2021  Rawalpindi        2.28   3.0
4  2021      Quetta        1.13   4.0


Unnamed: 0,Year,Cities,Population,Debt
0,2021,Karachi,1.6,-1.2
1,2021,Lahore,1.3,-1.5
2,2021,Islamabad,1.16,-3.4
3,2021,Rawalpindi,2.28,-1.9
4,2021,Quetta,1.13,-2.1


In [236]:
# Deleting a column

del cities_data['Debt']

cities_data

Unnamed: 0,Year,Cities,Population
0,2021,Karachi,1.6
1,2021,Lahore,1.3
2,2021,Islamabad,1.16
3,2021,Rawalpindi,2.28
4,2021,Quetta,1.13


Nested Dictionaries into DataFrame

In [237]:
population = {'Rawalpindi':{2020:2.23,2021:2.28},
             'Islamabad':{2020:1.12,2021:1.16},
             'Karachi':{2020:16.0,2021:16.4},
             'Lahore':{2020:12.6,2021:13.1}}

population

{'Rawalpindi': {2020: 2.23, 2021: 2.28},
 'Islamabad': {2020: 1.12, 2021: 1.16},
 'Karachi': {2020: 16.0, 2021: 16.4},
 'Lahore': {2020: 12.6, 2021: 13.1}}

In [238]:
Population = pd.DataFrame(population)

Population

Unnamed: 0,Rawalpindi,Islamabad,Karachi,Lahore
2020,2.23,1.12,16.0,12.6
2021,2.28,1.16,16.4,13.1


In [239]:
Population=Population.T

Population

Unnamed: 0,2020,2021
Rawalpindi,2.23,2.28
Islamabad,1.12,1.16
Karachi,16.0,16.4
Lahore,12.6,13.1


In [240]:
Population.index.name= 'Cities' ; Population.columns.name= 'Year'

In [241]:
Population

Year,2020,2021
Cities,Unnamed: 1_level_1,Unnamed: 2_level_1
Rawalpindi,2.23,2.28
Islamabad,1.12,1.16
Karachi,16.0,16.4
Lahore,12.6,13.1


In [242]:
Population.values

array([[ 2.23,  2.28],
       [ 1.12,  1.16],
       [16.  , 16.4 ],
       [12.6 , 13.1 ]])

In [None]:
'''Dropping Data from DataFrames'''

In [243]:
data = pd.DataFrame(np.arange(16).reshape((4, 4)), 
                    index=['Ohio', 'Colorado', 'Utah', 'New York'],
                    columns=['one', 'two', 'three', 'four'])

data

Unnamed: 0,one,two,three,four
Ohio,0,1,2,3
Colorado,4,5,6,7
Utah,8,9,10,11
New York,12,13,14,15


In [244]:
data.drop(['Colorado','Ohio'])

Unnamed: 0,one,two,three,four
Utah,8,9,10,11
New York,12,13,14,15


In [245]:
data.drop('two',axis=1)

Unnamed: 0,one,three,four
Ohio,0,2,3
Colorado,4,6,7
Utah,8,10,11
New York,12,14,15


In [246]:
data.drop(['two','four'],axis='columns')

Unnamed: 0,one,three
Ohio,0,2
Colorado,4,6
Utah,8,10
New York,12,14


In [None]:
'''Indexing , Slicing , Filtering'''

In [247]:
data = pd.DataFrame(np.arange(16).reshape((4, 4)),
        index=['Ohio', 'Colorado', 'Utah', 'New York'],
        columns=['one', 'two', 'three', 'four'])

data

Unnamed: 0,one,two,three,four
Ohio,0,1,2,3
Colorado,4,5,6,7
Utah,8,9,10,11
New York,12,13,14,15


In [248]:
data[:2]

Unnamed: 0,one,two,three,four
Ohio,0,1,2,3
Colorado,4,5,6,7


In [249]:
data[data['three']>5]

Unnamed: 0,one,two,three,four
Colorado,4,5,6,7
Utah,8,9,10,11
New York,12,13,14,15


In [250]:
data<5

Unnamed: 0,one,two,three,four
Ohio,True,True,True,True
Colorado,True,False,False,False
Utah,False,False,False,False
New York,False,False,False,False


In [251]:
data[data<5] 

Unnamed: 0,one,two,three,four
Ohio,0.0,1.0,2.0,3.0
Colorado,4.0,,,
Utah,,,,
New York,,,,


In [252]:
data[data<5] = 0
data

Unnamed: 0,one,two,three,four
Ohio,0,0,0,0
Colorado,0,5,6,7
Utah,8,9,10,11
New York,12,13,14,15


In [None]:
''' iloc && loc'''

In [253]:
data.loc['Colorado',['two','three']] # loc for azis labels

two      5
three    6
Name: Colorado, dtype: int32

In [254]:
data.iloc[2,[3,0,1]] # iloc for integers

four    11
one      8
two      9
Name: Utah, dtype: int32

In [255]:
data.iloc[1]

one      0
two      5
three    6
four     7
Name: Colorado, dtype: int32

In [256]:
data.iloc[[0,1],[2,1,3]]

Unnamed: 0,three,two,four
Ohio,0,0,0
Colorado,6,5,7


In [257]:
data.loc[:'Utah','three']

Ohio         0
Colorado     6
Utah        10
Name: three, dtype: int32

In [258]:
data.iloc[:,:3] # First argument for rows , second for coumns

Unnamed: 0,one,two,three
Ohio,0,0,0
Colorado,0,5,6
Utah,8,9,10
New York,12,13,14


In [259]:
data.iloc[:,:3][data.four >10]

Unnamed: 0,one,two,three
Utah,8,9,10
New York,12,13,14
