In [1]:
import pandas as pd
data = {'state': ['Ohio', 'Ohio', 'Ohio', 'Nevada', 'Nevada', 'Nevada'],
        'year':  [2000, 2001, 2002, 2001, 2002, 2003],
        'pop':   [1.5, 1.7, 3.6, 2.4, 2.9, 3.2]}
frame = pd.DataFrame(data)
frame

Unnamed: 0,state,year,pop
0,Ohio,2000,1.5
1,Ohio,2001,1.7
2,Ohio,2002,3.6
3,Nevada,2001,2.4
4,Nevada,2002,2.9
5,Nevada,2003,3.2


In [3]:
frame2 = pd.DataFrame(data, columns=['year', 'state', 'pop', 'debt'], index=['one', 'two', 'three', 'four', 'five', 'six'])
frame2

Unnamed: 0,year,state,pop,debt
one,2000,Ohio,1.5,
two,2001,Ohio,1.7,
three,2002,Ohio,3.6,
four,2001,Nevada,2.4,
five,2002,Nevada,2.9,
six,2003,Nevada,3.2,


In [4]:
frame2.columns

Index(['year', 'state', 'pop', 'debt'], dtype='object')

In [6]:
frame2["state"]

one        Ohio
two        Ohio
three      Ohio
four     Nevada
five     Nevada
six      Nevada
Name: state, dtype: object

In [7]:
frame2["year"]

one      2000
two      2001
three    2002
four     2001
five     2002
six      2003
Name: year, dtype: int64

In [9]:
#Rows can also be retrieved by position or name with the special loc attribute
frame2.loc['three']

year     2002
state    Ohio
pop       3.6
debt      NaN
Name: three, dtype: object

In [10]:
#Columns can be modified by assignment.
frame2['debt'] = 16.5
frame2

Unnamed: 0,year,state,pop,debt
one,2000,Ohio,1.5,16.5
two,2001,Ohio,1.7,16.5
three,2002,Ohio,3.6,16.5
four,2001,Nevada,2.4,16.5
five,2002,Nevada,2.9,16.5
six,2003,Nevada,3.2,16.5


In [12]:
import numpy as np
frame2['debt'] = np.arange(6.)
frame2

Unnamed: 0,year,state,pop,debt
one,2000,Ohio,1.5,0.0
two,2001,Ohio,1.7,1.0
three,2002,Ohio,3.6,2.0
four,2001,Nevada,2.4,3.0
five,2002,Nevada,2.9,4.0
six,2003,Nevada,3.2,5.0


In [14]:
#When you are assigning lists or arrays to a column, the value’s length must match the length of the DataFrame.
#If you assign a Series, its labels will be realigned exactly to the DataFrame’s index, inserting missing values in any holes:
val = pd.Series([-1.2, -1.5, -1.7], index=['two', 'four', 'five'])
frame2['debt'] = val
frame2

Unnamed: 0,year,state,pop,debt
one,2000,Ohio,1.5,
two,2001,Ohio,1.7,-1.2
three,2002,Ohio,3.6,
four,2001,Nevada,2.4,-1.5
five,2002,Nevada,2.9,-1.7
six,2003,Nevada,3.2,


In [17]:
#Assigning a column that doesn’t exist will create a new column. 
#The del keyword will delete columns as with a dict.
frame2['eastern'] = frame2.state == 'Ohio'
frame2
#New columns cannot be created with the frame2.eastern syntax.

Unnamed: 0,year,state,pop,debt,eastern
one,2000,Ohio,1.5,,True
two,2001,Ohio,1.7,-1.2,True
three,2002,Ohio,3.6,,True
four,2001,Nevada,2.4,-1.5,False
five,2002,Nevada,2.9,-1.7,False
six,2003,Nevada,3.2,,False


In [18]:
del frame2['eastern']
frame2.columns

Index(['year', 'state', 'pop', 'debt'], dtype='object')

In [16]:
pop = {'Nevada': {2001: 2.4, 2002: 2.9},
       'Ohio': {2000: 1.5, 2001: 1.7, 2002: 3.6}}
frame3 = pd.DataFrame(pop)
frame3

Unnamed: 0,Nevada,Ohio
2000,,1.5
2001,2.4,1.7
2002,2.9,3.6


In [19]:
#You can transpose the DataFrame (swap rows and columns) with similar syntax to a NumPy array:
frame3.T

Unnamed: 0,2000,2001,2002
Nevada,,2.4,2.9
Ohio,1.5,1.7,3.6


In [20]:
pd.DataFrame(pop, index=[2001, 2002, 2003])

Unnamed: 0,Nevada,Ohio
2001,2.4,1.7
2002,2.9,3.6
2003,,


In [21]:
pdata = {'Ohio': frame3['Ohio'][:-1],
         'Nevada': frame3['Nevada'][:2]}
pd.DataFrame(pdata)

Unnamed: 0,Ohio,Nevada
2000,1.5,
2001,1.7,2.4


In [22]:
pdata

{'Ohio': 2000    1.5
 2001    1.7
 Name: Ohio, dtype: float64, 'Nevada': 2000    NaN
 2001    2.4
 Name: Nevada, dtype: float64}

In [24]:
#If a DataFrame’s index and columns have their name attributes set, these will also be displayed:
frame3.index.name = 'year'
frame3

Unnamed: 0_level_0,Nevada,Ohio
year,Unnamed: 1_level_1,Unnamed: 2_level_1
2000,,1.5
2001,2.4,1.7
2002,2.9,3.6


In [25]:
frame3.columns.name = 'states'
frame3

states,Nevada,Ohio
year,Unnamed: 1_level_1,Unnamed: 2_level_1
2000,,1.5
2001,2.4,1.7
2002,2.9,3.6


In [26]:
#As with Series, the values attribute returns the data contained in the DataFrame as a two-dimensional ndarray:
frame3.values

array([[nan, 1.5],
       [2.4, 1.7],
       [2.9, 3.6]])

In [27]:
frame2.values

array([[2000, 'Ohio', 1.5, nan],
       [2001, 'Ohio', 1.7, -1.2],
       [2002, 'Ohio', 3.6, nan],
       [2001, 'Nevada', 2.4, -1.5],
       [2002, 'Nevada', 2.9, -1.7],
       [2003, 'Nevada', 3.2, nan]], dtype=object)

## Essential Functionality

### Reindexing on Series

In [28]:
obj = pd.Series([4.5, 7.2, -5.3, 3.6], index=['d', 'b', 'a', 'c'])
obj

d    4.5
b    7.2
a   -5.3
c    3.6
dtype: float64

In [29]:
#Calling reindex on this Series rearranges the data according to the new index, introducing missing values if any index values were not already present:
obj2 = obj.reindex(['a', 'b', 'c', 'd', 'e'])
obj2

a   -5.3
b    7.2
c    3.6
d    4.5
e    NaN
dtype: float64

In [30]:
obj3 = pd.Series(['blue', 'purple', 'yellow'], index=[0, 2, 4])
obj3

0      blue
2    purple
4    yellow
dtype: object

In [31]:
obj3.reindex(range(6))

0      blue
1       NaN
2    purple
3       NaN
4    yellow
5       NaN
dtype: object

In [32]:
obj3.reindex(range(6), method='ffill') #forward fill = ffill

0      blue
1      blue
2    purple
3    purple
4    yellow
5    yellow
dtype: object

In [33]:
obj3.reindex(range(6), method='bfill') #backward fill = bfill

0      blue
1    purple
2    purple
3    yellow
4    yellow
5       NaN
dtype: object

### Reindexing on Frame

In [34]:
frame = pd.DataFrame(np.arange(9).reshape((3, 3)), index=['a', 'c', 'd'], columns=['Ohio', 'Texas', 'California'])
frame

Unnamed: 0,Ohio,Texas,California
a,0,1,2
c,3,4,5
d,6,7,8


In [36]:
frame2 = frame.reindex(['a', 'b', 'c', 'd'])# row reindexing
frame2

Unnamed: 0,Ohio,Texas,California
a,0.0,1.0,2.0
b,,,
c,3.0,4.0,5.0
d,6.0,7.0,8.0


In [38]:
#The columns can be reindexed with the columns keyword:
states = ['Texas', 'Utah', 'California']
frame2.reindex(columns=states)

Unnamed: 0,Texas,Utah,California
a,1.0,,2.0
b,,,
c,4.0,,5.0
d,7.0,,8.0


In [41]:
frame2.reindex(index = ["f","b","a","d","c"], columns = ['Texas', 'Utah', 'California', 'New York'])

Unnamed: 0,Texas,Utah,California,New York
f,,,,
b,,,,
a,1.0,,2.0,
d,7.0,,8.0,
c,4.0,,5.0,


In [49]:
#we can make dataframe by using numpy
data = pd.DataFrame(np.arange(1,101).reshape((20,5)), columns = ['a','b','c','d','e'], index = range(1,101,5))
data

Unnamed: 0,a,b,c,d,e
1,1,2,3,4,5
6,6,7,8,9,10
11,11,12,13,14,15
16,16,17,18,19,20
21,21,22,23,24,25
26,26,27,28,29,30
31,31,32,33,34,35
36,36,37,38,39,40
41,41,42,43,44,45
46,46,47,48,49,50


In [45]:
np.arange(1,16).reshape((3,5))

array([[ 1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10],
       [11, 12, 13, 14, 15]])

In [53]:
data = pd.DataFrame(np.arange(1,101).reshape((20,5)), columns = list('abcde'), index = range(1,101,5))
data

Unnamed: 0,a,b,c,d,e
1,1,2,3,4,5
6,6,7,8,9,10
11,11,12,13,14,15
16,16,17,18,19,20
21,21,22,23,24,25
26,26,27,28,29,30
31,31,32,33,34,35
36,36,37,38,39,40
41,41,42,43,44,45
46,46,47,48,49,50


In [55]:
frame2

Unnamed: 0,Ohio,Texas,California
a,0.0,1.0,2.0
b,,,
c,3.0,4.0,5.0
d,6.0,7.0,8.0


In [58]:
frame2.loc['c']['California']

5.0

In [59]:
frame2.loc['c'][2]

5.0

In [60]:
frame2.loc[::-1]

Unnamed: 0,Ohio,Texas,California
d,6.0,7.0,8.0
c,3.0,4.0,5.0
b,,,
a,0.0,1.0,2.0


In [62]:
frame2.loc[::-1,::-1]

Unnamed: 0,California,Texas,Ohio
d,8.0,7.0,6.0
c,5.0,4.0,3.0
b,,,
a,2.0,1.0,0.0


In [65]:
#loc last value ko return karta jbk iloc nh karta
frame2.loc['a':'c']

Unnamed: 0,Ohio,Texas,California
a,0.0,1.0,2.0
b,,,
c,3.0,4.0,5.0


In [64]:
frame2.iloc[0:2]

Unnamed: 0,Ohio,Texas,California
a,0.0,1.0,2.0
b,,,


In [70]:
frame2.loc['b']['Ohio'] = 2
frame2

Unnamed: 0,Ohio,Texas,California
a,0.0,1.0,2.0
b,2.0,,
c,3.0,4.0,5.0
d,6.0,7.0,8.0


In [71]:
frame2.loc['b'][0] = 2
frame2

Unnamed: 0,Ohio,Texas,California
a,0.0,1.0,2.0
b,2.0,,
c,3.0,4.0,5.0
d,6.0,7.0,8.0


In [73]:
frame2.loc['b'][[0,2]] = [4,6] #fancy indexing
frame2

Unnamed: 0,Ohio,Texas,California
a,0.0,1.0,2.0
b,4.0,,6.0
c,3.0,4.0,5.0
d,6.0,7.0,8.0


In [74]:
frame2.loc['b'][[2,0]] = frame2.loc['b'][[0,2]]
frame2

Unnamed: 0,Ohio,Texas,California
a,0.0,1.0,2.0
b,6.0,,4.0
c,3.0,4.0,5.0
d,6.0,7.0,8.0


In [75]:
frame2['Texas'][1] + 5 # NaN have float data type , when we add or subtract number to NaN it returns NaN

nan

In [76]:
frame2

Unnamed: 0,Ohio,Texas,California
a,0.0,1.0,2.0
b,6.0,,4.0
c,3.0,4.0,5.0
d,6.0,7.0,8.0


In [77]:
frame4 = frame2.copy()
frame4

Unnamed: 0,Ohio,Texas,California
a,0.0,1.0,2.0
b,6.0,,4.0
c,3.0,4.0,5.0
d,6.0,7.0,8.0


In [79]:
frame4['students'] = [4,5,6,7]
frame4

Unnamed: 0,Ohio,Texas,California,students
a,0.0,1.0,2.0,4
b,6.0,,4.0,5
c,3.0,4.0,5.0,6
d,6.0,7.0,8.0,7


In [80]:
frame2.loc[:'d':2]

Unnamed: 0,Ohio,Texas,California
a,0.0,1.0,2.0
c,3.0,4.0,5.0


In [84]:
frame2.loc['a':'d':-1]

Unnamed: 0,Ohio,Texas,California


In [83]:
frame2.loc['d':'a':-1]

Unnamed: 0,Ohio,Texas,California
d,6.0,7.0,8.0
c,3.0,4.0,5.0
b,6.0,,4.0
a,0.0,1.0,2.0


In [85]:
frame2.loc['d'::-1]

Unnamed: 0,Ohio,Texas,California
d,6.0,7.0,8.0
c,3.0,4.0,5.0
b,6.0,,4.0
a,0.0,1.0,2.0


In [93]:
frame2.loc[::-1,::-1]

Unnamed: 0,California,Texas,Ohio
d,8.0,7.0,6.0
c,5.0,4.0,3.0
b,4.0,,6.0
a,2.0,1.0,0.0


In [95]:
frame2 + frame4

Unnamed: 0,California,Ohio,Texas,students
a,4.0,0.0,2.0,
b,8.0,12.0,,
c,10.0,6.0,8.0,
d,16.0,12.0,14.0,


In [96]:
frame4

Unnamed: 0,Ohio,Texas,California,students
a,0.0,1.0,2.0,4
b,6.0,,4.0,5
c,3.0,4.0,5.0,6
d,6.0,7.0,8.0,7


In [97]:
frame4.isna() # to check which values are NaN

Unnamed: 0,Ohio,Texas,California,students
a,False,False,False,False
b,False,True,False,False
c,False,False,False,False
d,False,False,False,False


In [98]:
frame4[frame4 > 5] 

Unnamed: 0,Ohio,Texas,California,students
a,,,,
b,6.0,,,
c,,,,6.0
d,6.0,7.0,8.0,7.0


In [99]:
frame4.isnull()

Unnamed: 0,Ohio,Texas,California,students
a,False,False,False,False
b,False,True,False,False
c,False,False,False,False
d,False,False,False,False


In [102]:
frame4.fillna(0)

Unnamed: 0,Ohio,Texas,California,students
a,0.0,1.0,2.0,4
b,6.0,0.0,4.0,5
c,3.0,4.0,5.0,6
d,6.0,7.0,8.0,7


In [103]:
frame4

Unnamed: 0,Ohio,Texas,California,students
a,0.0,1.0,2.0,4
b,6.0,,4.0,5
c,3.0,4.0,5.0,6
d,6.0,7.0,8.0,7


In [106]:
frame4.fillna(0, inplace = True) # inplace ja kr variable update kardega
frame4

Unnamed: 0,Ohio,Texas,California,students
a,0.0,1.0,2.0,4
b,6.0,0.0,4.0,5
c,3.0,4.0,5.0,6
d,6.0,7.0,8.0,7


In [108]:
csvData = pd.read_csv('jTeOBO.csv')
csvData.head()

Unnamed: 0,Student Code,Degree,Student Name,Mid,Quiz 1,Quiz 2,Best of Quizzes,Assignment 1,Assignment 2,Best of Assignments,Total Sessional (50),Final (50),Total (100),Grade
0,022-14-19987,BS(CS),Abdul Basit,28,8.0,3.0,8,7.0,9.0,9,45,25.0,70,B
1,022-14-110233,BS(CS),Adeel Ahmed,17,,5.0,5,8.0,10.0,10,32,18.0,50,F
2,022-14-110585,BS(CS),Afrah Zareen,18,5.0,2.0,5,8.0,10.0,10,33,30.0,63,C
3,022-14-19718,BS(CS),Ahmed Ali Raza,14,7.0,2.0,7,,2.0,2,23,23.0,46,F
4,022-14-110648,BS(CS),Ahsan Ali Vohra,27,7.0,6.0,7,7.0,9.0,9,43,34.0,77,B


In [118]:
bestofquiz = csvData[(csvData['Best of Quizzes'] > 7) & (csvData['Best of Assignments'] > 7)] 

Unnamed: 0,Student Code,Degree,Student Name,Mid,Quiz 1,Quiz 2,Best of Quizzes,Assignment 1,Assignment 2,Best of Assignments,Total Sessional (50),Final (50),Total (100),Grade
0,022-14-19987,BS(CS),Abdul Basit,28,8.0,3.0,8,7.0,9.0,9,45,25.0,70,B
5,022-14-110232,BS(CS),Ameer Hamza,25,9.0,6.0,9,8.0,10.0,10,44,27.0,71,B
7,022-14-110388,BS(CS),Aneebullah Niazi,26,9.0,6.0,9,8.0,10.0,10,45,40.0,85,A
8,022-14-110601,BS(CS),Areesha Sohail,19,9.0,4.0,9,7.0,9.0,9,37,24.0,61,C
9,022-14-110599,BS(CS),Arsalan,28,8.0,6.0,8,8.0,,8,44,40.0,84,A
10,022-14-110214,BS(CS),Fatima Haider Warsi,30,8.0,7.0,8,8.0,,8,46,45.0,91,A
11,022-14-110591,BS(CS),Habib Ullah,28,8.0,5.0,8,5.0,,5,41,35.0,76,B
13,022-14-110600,BS(CS),Hamza Abdul Jabbar,24,8.0,4.0,8,8.0,10.0,10,42,25.0,67,C
17,022-14-110396,BS(CS),Khalid Anwer,20,8.0,5.0,8,9.0,11.0,11,39,31.0,70,B
19,022-14-110222,BS(CS),Mohammad Hunain,27,9.0,6.0,9,8.0,10.0,10,46,45.0,91,A


In [114]:
bestofquiz.count()

Student Code            23
Degree                  23
Student Name            23
Mid                     23
Quiz 1                  23
Quiz 2                  23
Best of Quizzes         23
Assignment 1            22
Assignment 2            16
Best of Assignments     23
Total Sessional (50)    23
Final (50)              23
Total (100)             23
Grade                   23
dtype: int64