In [1]:
import requests 
from bs4 import BeautifulSoup 
import pandas as pd

# Lahore Campus Scrapper

In [2]:
data_lhr = {
            "ID":[],
            "Name": [],
            "Designation":[],
            "HEC Approved PHD Supervisor":[],
            "Highest Education":[],
            "Email":[],
            "Department":[],
            "Extension":[],
            "ImageURL":[]
           }
    
def process_LHR_faculties(faculties, dept):
    '''
    This function processes all the list of Faculties (in HTML) and parses them in a Dictionary.
    The dictionary contains a Faculty member's ID, Name, Designation, HEC Accproved Supervisor,
    Highest Education, Email, Department, Extension, and ImageURL.
    '''
    for faculty in faculties:
        data_lhr["Department"].append(dept)
        
        data_lhr["Name"].append(faculty.find('h5', class_='text-center').text)
        #print(name)
        desig_tokens = faculty.find('p', class_='text-center').text.strip().split('\n')
        data_lhr["Designation"].append(desig_tokens[0].strip())
        approved_supervisor = data_lhr["HEC Approved PHD Supervisor"].append(True if 'HEC Approved' in desig_tokens[-1].strip() else False)
        #print(desig)
        #print(approved_supervisor)
        data_lhr["Email"].append(faculty.find('p', class_='mb-0').text)
        #print(email)
        try:
            id = int(faculty.find('a', class_='faculty-link')['href'].split('/')[-1])
        except:
            id = None
        data_lhr["ID"].append(id)
        #print(id)
        data_lhr["ImageURL"].append('https://lhr.nu.edu.pk'+faculty.find('img', class_='card-img-top')['src'])
        #print(img_url)
        faculty_link = 'https://lhr.nu.edu.pk' + faculty.find('a', class_='faculty-link')['href']
        #print(faculty_link)
        if faculty_link is not None:
            r_2 = requests.get(faculty_link)
            s = BeautifulSoup(r_2.content, 'html.parser') 
            try:
                ext = int(s.find('div', class_='facultyCard').find('p', class_='mt-0').find('span', class_='small').text.split(' ')[-1][4:])
            except:
                ext=None
            
            data_lhr["Extension"].append(ext)
            
            #print(ext)
            try:
                highest_education = s.find('div', class_='text-justify').findAll('li')[0].text.strip()
            except:
                highest_education = None
                
            data_lhr["Highest Education"].append(highest_education)
                
            #print(education)
        else:
            data_lhr["Highest Education"].append(None)
            data_lhr["Extension"].append(None)

In [3]:
def get_LHR_Faculty_Data():
    '''
    This function scrapes http://lhr.nu.edu.pk for all the Faculty data of all departments.
    It also uses process_LHR_faculties() function to parse the gathered data in a dictionary.
    '''
    # School of Computing
    URL = "http://lhr.nu.edu.pk/fsc/faculty/"
    r = requests.get(URL) 
    soup = BeautifulSoup(r.content, 'html.parser') 
    containers = soup.findAll('div', class_='container')
    
    for cont in containers:
        dept = cont.find('h1', class_='mb-2')
        if dept != None:
            dept_tokens = dept.text.split(' ')
            dept = dept_tokens[2] + " " + dept_tokens[3]
            #print(dept)
            faculties = cont.findAll('div', class_ = 'facultyCard')
            process_LHR_faculties(faculties, dept)
    
    # Electrical Engineering
    URL = "http://lhr.nu.edu.pk/ee/faculty/"
    r = requests.get(URL) 
    soup = BeautifulSoup(r.content, 'html.parser') 
    faculties = soup.findAll('div', class_='facultyCard')
    process_LHR_faculties(faculties, 'Electrical Engineering')
    
    # Civil Engineering
    URL = "http://lhr.nu.edu.pk/cv/faculty/"
    r = requests.get(URL) 
    soup = BeautifulSoup(r.content, 'html.parser') 
    faculties = soup.findAll('div', class_='facultyCard')
    process_LHR_faculties(faculties, 'Civil Engineering')
    
    # School of Management
    URL = "http://lhr.nu.edu.pk/fsm/faculty/"
    r = requests.get(URL) 
    soup = BeautifulSoup(r.content, 'html.parser') 
    faculties = soup.findAll('div', class_='facultyCard')
    process_LHR_faculties(faculties, 'Management'), data_lhr
    
    # Humanities
    URL = "http://lhr.nu.edu.pk/ss/faculty/"
    r = requests.get(URL) 
    soup = BeautifulSoup(r.content, 'html.parser') 
    faculties = soup.findAll('div', class_='facultyCard')
    process_LHR_faculties(faculties, 'Science & Humanities')


In [4]:
get_LHR_Faculty_Data()

In [5]:
df_lhr = pd.DataFrame(data_lhr)

In [6]:
df_lhr

Unnamed: 0,ID,Name,Designation,HEC Approved PHD Supervisor,Highest Education,Email,Department,Extension,ImageURL
0,1238.0,Dr. Kashif Zafar,Professor,True,"Ph.D(Computer Science), NUCES, Islamabad, Paki...",kashif.zafar@nu.edu.pk,Computer Science,221.0,https://lhr.nu.edu.pk/media/Faculty/02_-_Dr._K...
1,4391.0,Dr. Asif Mahmood Gillani,Professor,True,"Ph.D(Computing), University of Patras , Greece...",asif.gilani@nu.edu.pk,Computer Science,291.0,https://lhr.nu.edu.pk/media/Faculty/03_-_Dr._A...
2,5424.0,Dr. Hammad Naveed,Professor,True,"Ph.D. (Bioengineering), The University of Illi...",hammad.naveed@nu.edu.pk,Computer Science,227.0,https://lhr.nu.edu.pk/media/Faculty/01_-_Dr._H...
3,6174.0,Dr. Arshad Ali,Associate Professor,True,"PhD (CS), University of Paris VI (UPMC) Paris,...",arshad.ali1@nu.edu.pk,Computer Science,406.0,https://lhr.nu.edu.pk/media/Faculty/04_-_Dr._A...
4,6113.0,Dr. Asma Naseer,Associate Professor,True,"PhD (CS), NUCES, Lahore, 2018",asma.naseer@nu.edu.pk,Computer Science,410.0,https://lhr.nu.edu.pk/media/Faculty/Dr_Asma_Na...
...,...,...,...,...,...,...,...,...,...
179,6420.0,Ms. Nokhaiz Zahra,Instructor,False,"MA, Virtual University(2020)",nokhaiz.zahra@nu.edu.pk,Science & Humanities,324.0,https://lhr.nu.edu.pk/media/Faculty/165_-_Nokh...
180,6181.0,Ms. Rida Ahmed,Instructor,False,"BS (English), Fatima Jinnah Women University, ...",rida.ahmed@nu.edu.pk,Science & Humanities,363.0,https://lhr.nu.edu.pk/media/Faculty/6181-remov...
181,6555.0,Ms. Aqsa Naz,Lecturer (English),False,"M.Phil (English), UCP, Lahore (2022)",aqsa.naz@nu.edu.pk,Science & Humanities,324.0,https://lhr.nu.edu.pk/media/Faculty/161_-_Aqsa...
182,,Mr. Hafiz Umair Gulzar,Lecturer (Islamiat),False,"M.Phil (Islamic Studies), GCU, Lahore (2019)",umair.gulzar@nu.edu.pk,Science & Humanities,207.0,https://lhr.nu.edu.pk/media/Faculty/163_-_Hafi...


In [7]:
df_lhr.dtypes

ID                             float64
Name                            object
Designation                     object
HEC Approved PHD Supervisor       bool
Highest Education               object
Email                           object
Department                      object
Extension                      float64
ImageURL                        object
dtype: object

In [8]:
df_lhr = df_lhr.convert_dtypes()

In [9]:
df_lhr.dtypes  # no predefined EMAIL or URL datatypes so considereing them as Strings

ID                               Int64
Name                            string
Designation                     string
HEC Approved PHD Supervisor    boolean
Highest Education               string
Email                           string
Department                      string
Extension                        Int64
ImageURL                        string
dtype: object

In [10]:
df_lhr.to_csv('lhr.csv')

# Faisalabad Capmus Scrapper

In [11]:
data_cfd = {
            "ID":[],
            "Name": [],
            "Designation":[],
            "HEC Approved PHD Supervisor":[],
            "Highest Education":[],
            "Email":[],
            "Department":[],
            "Extension":[],
            "ImageURL":[]
           }
def process_CFD_Faculties(faculties):
    '''
    This function processes all the list of Faculties in CFD Campus (in HTML) and parses them in a Dictionary.
    The dictionary contains a Faculty member's ID, Name, Designation, HEC Accproved Supervisor,
    Highest Education, Email, Department, Extension, and ImageURL.
    '''
    for faculty in faculties:
        imageURL = faculty.find('img')['src']
        details = faculty.find('div', 'unitech-teacher__details')
        desig = details.find('h6').text.strip()
        name = details.find('a').text.strip()
        email = details.find('p', class_=None).text
        try:
            hec_approved = True if "HEC approved" in details.find('p', class_='hec').text.strip() else False
        except:
            hec_approved = False
        link = details.find('a')['href']
        if link is not None:
            #id_ = link.split('/')[-2]
            r2 = requests.get(link)
            s2 = BeautifulSoup(r2.content, 'html.parser')
            try:
                id_ = int(s2.find('link', {"rel":"shortlink"})['href'].split('=')[-1])
            except:
                id_ = None
            try:
                ext = int(s2.find('ul', class_='teacher__address').findAll('li')[-1].text.strip().split('\n')[-1].strip())
            except:
                ext = None
            try:
                dept = s2.find('ul', class_='teacher__address').findAll('li')[0].text.strip()
            except:
                dept = None
            try:
                highest_education = s2.find('div', class_='htc__skill__container progress__bar--2').find('li').text
            except:
                try:
                    highest_education = s2.find('div', class_='htc__skill__container progress__bar--2').find('p').text
                except:
                    highest_education = None
        else:
            id_ = None
            ext = None
            dept = None
            highest_education = None
            
        data_cfd['ID'].append(id_)
        data_cfd['Name'].append(name)
        data_cfd['Designation'].append(desig)
        data_cfd['HEC Approved PHD Supervisor'].append(hec_approved)
        data_cfd['Highest Education'].append(highest_education)
        data_cfd['Department'].append(dept)
        data_cfd['Extension'].append(ext)
        data_cfd['Email'].append(email)
        data_cfd['ImageURL'].append(imageURL)

In [12]:
def get_CFD_Faculty_Data():
    '''
    This function scrapes https://cfd.nu.edu.pk for all the Faculty data of all departments.
    It also uses process_CFD_Faculties() function to parse the gathered data in a dictionary.
    '''
    
    URL = "https://cfd.nu.edu.pk/department-cs/"
    r = requests.get(URL)
    soup = BeautifulSoup(r.content, 'html.parser') 
    faculties = soup.findAll('div', 'unitech-teacher')
    process_CFD_Faculties(faculties)
    
    URL = "https://cfd.nu.edu.pk/department-se/"
    r = requests.get(URL)
    soup = BeautifulSoup(r.content, 'html.parser') 
    faculties = soup.findAll('div', 'unitech-teacher')
    #print(len(faculties))
    process_CFD_Faculties(faculties)
    
    URL = "https://cfd.nu.edu.pk/department-ee/"
    r = requests.get(URL)
    soup = BeautifulSoup(r.content, 'html.parser') 
    faculties = soup.findAll('div', 'unitech-teacher')
    #print(len(faculties))
    process_CFD_Faculties(faculties)
    
    URL = "https://cfd.nu.edu.pk/department-sh/"
    r = requests.get(URL)
    soup = BeautifulSoup(r.content, 'html.parser') 
    faculties = soup.findAll('div', 'unitech-teacher')
    #print(len(faculties))
    process_CFD_Faculties(faculties)
    
    URL = "https://cfd.nu.edu.pk/department-fsm/"
    r = requests.get(URL)
    soup = BeautifulSoup(r.content, 'html.parser') 
    faculties = soup.findAll('div', 'unitech-teacher')
    #print(len(faculties))
    process_CFD_Faculties(faculties)

In [13]:
get_CFD_Faculty_Data()

In [14]:
df_cfd = pd.DataFrame(data_cfd)

In [15]:
df_cfd

Unnamed: 0,ID,Name,Designation,HEC Approved PHD Supervisor,Highest Education,Email,Department,Extension,ImageURL
0,6074,Dr. Muhammad Ahmad,Associate Professor & HOD,True,Doctor of Philosophy (Hyperspectral Imaging) (...,dr.ahmad@nu.edu.pk,Department of Computer Science,160.0,https://cfd.nu.edu.pk/wp-content/uploads/2020/...
1,4748,Dr. Shahzad Sarfraz,Professor,True,"Ph.D. in Remote Sensing Image Processing AIT, ...",shahzad.sarfraz@nu.edu.pk,Department of Computer Science,102.0,https://cfd.nu.edu.pk/wp-content/uploads/2019/...
2,507933,Dr. Anwar Shah,Assistant Professor,True,"PhD Computer Sciences, FAST-NUCES, Peshawar",Anwar.Shah@nu.edu.pk,Department of Computer Science,131.0,https://cfd.nu.edu.pk/wp-content/uploads/2022/...
3,4752,Dr. Bilal Khan,Assistant Professor,True,"Ph.D. (Computer Science & Engineering), Dongg...",khan.bilal@nu.edu.pk,Department of Computer Science,126.0,https://cfd.nu.edu.pk/wp-content/uploads/2019/...
4,4772,Dr. Hashim Yasin,Assistant Professor,True,"Ph.D. (Computer Science), University of Bonn, ...",hashim.yasin@nu.edu.pk,Department of Computer Science,165.0,https://cfd.nu.edu.pk/wp-content/uploads/2019/...
...,...,...,...,...,...,...,...,...,...
101,4858,Dr. Umar Farooq,Assistant Professor,True,"PhD (Finance), COMSATS University Islamabad, L...",umar.farooq@nu.edu.pk,FAST School of Management,264.0,https://cfd.nu.edu.pk/wp-content/uploads/2019/...
102,508478,Mr. Ahmad Salah,Lecturer,False,"Chartered Financial Analyst (CFA), institute, ...",ahmad.salah@nu.edu.pk,FAST School of Management,256.0,https://cfd.nu.edu.pk/wp-content/uploads/2023/...
103,508457,Ms. Amna Babar,Lecturer,False,"MS, Innovation and Entrepreneurship, 2022\nBBA...",amna.Tirmizey@nu.edu.pk,FAST School of Management,,https://cfd.nu.edu.pk/wp-content/uploads/2023/...
104,4865,Mr. Hafiz Muhammad,Lecturer,False,"MBA, FAST National University of computer and ...",Zeeshan.raza@nu.edu.pk,FAST School of Management,265.0,https://cfd.nu.edu.pk/wp-content/uploads/2019/...


In [18]:
df_cfd.dtypes

ID                               int64
Name                            object
Designation                     object
HEC Approved PHD Supervisor       bool
Highest Education               object
Email                           object
Department                      object
Extension                      float64
ImageURL                        object
dtype: object

In [19]:
df_cfd = df_cfd.convert_dtypes()

In [20]:
df_cfd.dtypes

ID                               Int64
Name                            string
Designation                     string
HEC Approved PHD Supervisor    boolean
Highest Education               string
Email                           string
Department                      string
Extension                        Int64
ImageURL                        string
dtype: object

In [21]:
df_cfd.to_csv('cfd.csv')

# Peshawar Campus Scrapper

In [22]:
data_pwr = {
            "ID":[],
            "Name": [],
            "Designation":[],
            "HEC Approved PHD Supervisor":[],
            "Highest Education":[],
            "Email":[],
            "Department":[],
            "Extension":[],
            "ImageURL":[]
           }
def process_PWR_Faculties(faculties):
    '''
    This function processes all the list of Faculties in Peshawar Campus (in HTML) and parses them in a Dictionary.
    The dictionary contains a Faculty member's ID, Name, Designation, HEC Accproved Supervisor,
    Highest Education, Email, Department, Extension, and ImageURL.
    '''
    for faculty in faculties:
        img_url = 'http://pwr.nu.edu.pk/' + faculty.find('img')['src']
        name = faculty.find('a', class_='text-theme-color-2').text.strip()
        link = 'http://pwr.nu.edu.pk/' + faculty.find('a', class_='text-theme-color-2')['href']
        desig = faculty.find('h5', class_='text-theme-color').text.strip()
        mail = faculty.find('li').find('a')['href'][7:]
        
        if link is not None:
            id_ = int(link.split('=')[-1])
            r2 = requests.get(link)
            s2 = BeautifulSoup(r2.content, 'html.parser')

            dept = s2.findAll('h5', class_=None)[-1].text
            try:
                hec_approved = True if 'HEC Approved' in s2.findAll('h5', class_=None)[0].text else False
            except:
                hec_approved = None
            highest_education = s2.find('div', {"id":'education'}).find('li').text.strip()
            try:
                ext = int(s2.find('div', {"id":'contact'}).find('a', {"class":"text-gray"}).text.split('|')[-1].strip()[4:])
            except:
                ext = None
        else:
            id_ = None
            dept = None
            hec_approved = None
            highest_education = None
            ext = None
        
        data_pwr['ID'].append(id_)
        data_pwr['Name'].append(name)
        data_pwr['Designation'].append(desig)
        data_pwr['HEC Approved PHD Supervisor'].append(hec_approved)
        data_pwr['Highest Education'].append(highest_education)
        data_pwr['Department'].append(dept)
        data_pwr['Extension'].append(ext)
        data_pwr['Email'].append(mail)
        data_pwr['ImageURL'].append(img_url)

In [23]:
def get_PWR_Faculty_Data():
    '''
    This function scrapes https://pwr.nu.edu.pk for all the Faculty data of all departments.
    It also uses process_PWR_Faculties() function to parse the gathered data in a dictionary.
    '''
    url = "http://pwr.nu.edu.pk/cs-faculty/"
    r = requests.get(url)
    soup = BeautifulSoup(r.content, 'html.parser')
    faculties = soup.findAll('div', class_='team-members')
    process_PWR_Faculties(faculties)
    
    url = "http://pwr.nu.edu.pk/ee-faculty/"
    r = requests.get(url)
    soup = BeautifulSoup(r.content, 'html.parser')
    faculties = soup.findAll('div', class_='team-members')
    process_PWR_Faculties(faculties)
    
    url = "http://pwr.nu.edu.pk/sh-faculty/"
    r = requests.get(url)
    soup = BeautifulSoup(r.content, 'html.parser')
    faculties = soup.findAll('div', class_='team-members')
    process_PWR_Faculties(faculties)

In [24]:
get_PWR_Faculty_Data()

In [25]:
df_pwr = pd.DataFrame(data_pwr)

In [26]:
df_pwr

Unnamed: 0,ID,Name,Designation,HEC Approved PHD Supervisor,Highest Education,Email,Department,Extension,ImageURL
0,1,Dr. Hafeez Ur Rehman,Associate Professor & HoD,True,"Ph.D. in Computer and Control Engineering, Pol...",hafeez.urrehman@nu.edu.pk,Computer Science,107,http://pwr.nu.edu.pk/images/faculty/dr_hafeez.jpg
1,2,Dr. Omar Usman Khan,Associate Professor & Director,True,"Ph.D. (Computer And Control Engineering), Poli...",omar.khan@nu.edu.pk,Computer Science,103,http://pwr.nu.edu.pk/images/faculty/Dr_Omer_Us...
2,3,Dr. Nouman Azam,Associate Professor,True,"Ph.D. (CS), University of Regina, Canada (2014)",nouman.azam@nu.edu.pk,Computer Science,134,http://pwr.nu.edu.pk/images/faculty/Dr_Noman_A...
3,4,Dr. Mohammad Nauman,Associate Professor,True,"PhD(IT), University of Kuala Lumpur, Malaysia ...",mohammad.nauman@nu.edu.pk,Computer Science,126,http://pwr.nu.edu.pk/images/faculty/Dr. Noman.jpg
4,5,Dr. Taimoor Khan,Assistant Professor,True,"Ph.D. (Computer Science), Bahria University Is...",taimoor.khan@nu.edu.pk,Computer Science,127,http://pwr.nu.edu.pk/images/faculty/Dr_m_taimo...
5,6,Dr. Bahar Ali,Assistant Professor,True,"PhD, Three-way Clustering (Machine Learning), ...",bahar.ali@nu.edu.pk,Computer Science,122,http://pwr.nu.edu.pk/images/faculty/dr_baharal...
6,7,Dr. Musadaq Mansoor,Assistant Professor,False,PhD Computer Science (Machine Learning / Bioin...,musadaq.mansoor@nu.edu.pk,Computer Science,130,http://pwr.nu.edu.pk/images/faculty/dr_musadaq...
7,8,Dr. Muhammad Amin,Assistant Professor,False,"PhD (Computer Science), IM Sciences (2022)",muhammad.amin@nu.edu.pk,Computer Science,146,http://pwr.nu.edu.pk/images/faculty/amin.jpg
8,9,Fazl-e-Basit,Assistant Professor,False,"M.S(Computer Science), NUST, Rawalpindi, Pakis...",fazl.basit@nu.edu.pk,Computer Science,110,http://pwr.nu.edu.pk/images/faculty/Fazl-e-Bas...
9,10,Shoaib Muhammad Khan,Assistant Professor,False,"M.S(Computer Science), NUCES, Islamabad, Pakis...",shoaib.khan@nu.edu.pk,Computer Science,144,http://pwr.nu.edu.pk/images/faculty/Shoaib.jpg


In [27]:
df_pwr.dtypes

ID                              int64
Name                           object
Designation                    object
HEC Approved PHD Supervisor      bool
Highest Education              object
Email                          object
Department                     object
Extension                       int64
ImageURL                       object
dtype: object

In [28]:
df_pwr = df_pwr.convert_dtypes()

In [29]:
df_pwr.dtypes

ID                               Int64
Name                            string
Designation                     string
HEC Approved PHD Supervisor    boolean
Highest Education               string
Email                           string
Department                      string
Extension                        Int64
ImageURL                        string
dtype: object

In [30]:
df_pwr.to_csv("pwr.csv")

## Concatenating DataFrames

In [31]:
all_faculty = pd.concat([df_lhr, df_cfd, df_pwr])

In [32]:
all_faculty

Unnamed: 0,ID,Name,Designation,HEC Approved PHD Supervisor,Highest Education,Email,Department,Extension,ImageURL
0,1238,Dr. Kashif Zafar,Professor,True,"Ph.D(Computer Science), NUCES, Islamabad, Paki...",kashif.zafar@nu.edu.pk,Computer Science,221,https://lhr.nu.edu.pk/media/Faculty/02_-_Dr._K...
1,4391,Dr. Asif Mahmood Gillani,Professor,True,"Ph.D(Computing), University of Patras , Greece...",asif.gilani@nu.edu.pk,Computer Science,291,https://lhr.nu.edu.pk/media/Faculty/03_-_Dr._A...
2,5424,Dr. Hammad Naveed,Professor,True,"Ph.D. (Bioengineering), The University of Illi...",hammad.naveed@nu.edu.pk,Computer Science,227,https://lhr.nu.edu.pk/media/Faculty/01_-_Dr._H...
3,6174,Dr. Arshad Ali,Associate Professor,True,"PhD (CS), University of Paris VI (UPMC) Paris,...",arshad.ali1@nu.edu.pk,Computer Science,406,https://lhr.nu.edu.pk/media/Faculty/04_-_Dr._A...
4,6113,Dr. Asma Naseer,Associate Professor,True,"PhD (CS), NUCES, Lahore, 2018",asma.naseer@nu.edu.pk,Computer Science,410,https://lhr.nu.edu.pk/media/Faculty/Dr_Asma_Na...
...,...,...,...,...,...,...,...,...,...
36,54,Mr. Osama Sohrab,Lecturer,False,"M.S (Applied Mathematics), Ghulam Ishaq Khan I...",osama.sohrab@nu.edu.pk,Science and Humanities,144,http://pwr.nu.edu.pk/images/faculty/Osama_Sohr...
37,53,Mr. Ikram Ullah,Assistant Professor,False,#,ikram.ullah@nu.edu.pk,Science and Humanities,145,http://pwr.nu.edu.pk/images/faculty/Ikram_Ulla...
38,52,Mr. Askar Ali,Lecturer,False,#,askar.ali@nu.edu.pk,Science and Humanities,145,http://pwr.nu.edu.pk/images/faculty/Askar_Ali1...
39,51,Ms. Noreen Shah,Lecturer,False,Ph.D. Scholar (Coursework completed),noreen.shah@nu.edu.pk,Science and Humanities,150,http://pwr.nu.edu.pk/images/faculty/WhatsApp I...


In [33]:
all_faculty.reset_index(drop=True, inplace=True)

In [34]:
all_faculty

Unnamed: 0,ID,Name,Designation,HEC Approved PHD Supervisor,Highest Education,Email,Department,Extension,ImageURL
0,1238,Dr. Kashif Zafar,Professor,True,"Ph.D(Computer Science), NUCES, Islamabad, Paki...",kashif.zafar@nu.edu.pk,Computer Science,221,https://lhr.nu.edu.pk/media/Faculty/02_-_Dr._K...
1,4391,Dr. Asif Mahmood Gillani,Professor,True,"Ph.D(Computing), University of Patras , Greece...",asif.gilani@nu.edu.pk,Computer Science,291,https://lhr.nu.edu.pk/media/Faculty/03_-_Dr._A...
2,5424,Dr. Hammad Naveed,Professor,True,"Ph.D. (Bioengineering), The University of Illi...",hammad.naveed@nu.edu.pk,Computer Science,227,https://lhr.nu.edu.pk/media/Faculty/01_-_Dr._H...
3,6174,Dr. Arshad Ali,Associate Professor,True,"PhD (CS), University of Paris VI (UPMC) Paris,...",arshad.ali1@nu.edu.pk,Computer Science,406,https://lhr.nu.edu.pk/media/Faculty/04_-_Dr._A...
4,6113,Dr. Asma Naseer,Associate Professor,True,"PhD (CS), NUCES, Lahore, 2018",asma.naseer@nu.edu.pk,Computer Science,410,https://lhr.nu.edu.pk/media/Faculty/Dr_Asma_Na...
...,...,...,...,...,...,...,...,...,...
326,54,Mr. Osama Sohrab,Lecturer,False,"M.S (Applied Mathematics), Ghulam Ishaq Khan I...",osama.sohrab@nu.edu.pk,Science and Humanities,144,http://pwr.nu.edu.pk/images/faculty/Osama_Sohr...
327,53,Mr. Ikram Ullah,Assistant Professor,False,#,ikram.ullah@nu.edu.pk,Science and Humanities,145,http://pwr.nu.edu.pk/images/faculty/Ikram_Ulla...
328,52,Mr. Askar Ali,Lecturer,False,#,askar.ali@nu.edu.pk,Science and Humanities,145,http://pwr.nu.edu.pk/images/faculty/Askar_Ali1...
329,51,Ms. Noreen Shah,Lecturer,False,Ph.D. Scholar (Coursework completed),noreen.shah@nu.edu.pk,Science and Humanities,150,http://pwr.nu.edu.pk/images/faculty/WhatsApp I...


In [41]:
all_faculty.dtypes

ID                               Int64
Name                            string
Designation                     string
HEC Approved PHD Supervisor    boolean
Highest Education               string
Email                           string
Department                      string
Extension                        Int64
ImageURL                        string
dtype: object

In [36]:
all_faculty.to_csv("fast_faculty.csv")

## Sampling the Faculty DataFrame

In [37]:
faculty = pd.read_csv("fast_faculty.csv")

In [38]:
faculty

Unnamed: 0.1,Unnamed: 0,ID,Name,Designation,HEC Approved PHD Supervisor,Highest Education,Email,Department,Extension,ImageURL
0,0,1238.0,Dr. Kashif Zafar,Professor,True,"Ph.D(Computer Science), NUCES, Islamabad, Paki...",kashif.zafar@nu.edu.pk,Computer Science,221.0,https://lhr.nu.edu.pk/media/Faculty/02_-_Dr._K...
1,1,4391.0,Dr. Asif Mahmood Gillani,Professor,True,"Ph.D(Computing), University of Patras , Greece...",asif.gilani@nu.edu.pk,Computer Science,291.0,https://lhr.nu.edu.pk/media/Faculty/03_-_Dr._A...
2,2,5424.0,Dr. Hammad Naveed,Professor,True,"Ph.D. (Bioengineering), The University of Illi...",hammad.naveed@nu.edu.pk,Computer Science,227.0,https://lhr.nu.edu.pk/media/Faculty/01_-_Dr._H...
3,3,6174.0,Dr. Arshad Ali,Associate Professor,True,"PhD (CS), University of Paris VI (UPMC) Paris,...",arshad.ali1@nu.edu.pk,Computer Science,406.0,https://lhr.nu.edu.pk/media/Faculty/04_-_Dr._A...
4,4,6113.0,Dr. Asma Naseer,Associate Professor,True,"PhD (CS), NUCES, Lahore, 2018",asma.naseer@nu.edu.pk,Computer Science,410.0,https://lhr.nu.edu.pk/media/Faculty/Dr_Asma_Na...
...,...,...,...,...,...,...,...,...,...,...
326,326,54.0,Mr. Osama Sohrab,Lecturer,False,"M.S (Applied Mathematics), Ghulam Ishaq Khan I...",osama.sohrab@nu.edu.pk,Science and Humanities,144.0,http://pwr.nu.edu.pk/images/faculty/Osama_Sohr...
327,327,53.0,Mr. Ikram Ullah,Assistant Professor,False,#,ikram.ullah@nu.edu.pk,Science and Humanities,145.0,http://pwr.nu.edu.pk/images/faculty/Ikram_Ulla...
328,328,52.0,Mr. Askar Ali,Lecturer,False,#,askar.ali@nu.edu.pk,Science and Humanities,145.0,http://pwr.nu.edu.pk/images/faculty/Askar_Ali1...
329,329,51.0,Ms. Noreen Shah,Lecturer,False,Ph.D. Scholar (Coursework completed),noreen.shah@nu.edu.pk,Science and Humanities,150.0,http://pwr.nu.edu.pk/images/faculty/WhatsApp I...


In [39]:
faculty.drop('Unnamed: 0', axis=1, inplace=True)

In [40]:
faculty.dtypes

ID                             float64
Name                            object
Designation                     object
HEC Approved PHD Supervisor       bool
Highest Education               object
Email                           object
Department                      object
Extension                      float64
ImageURL                        object
dtype: object

In [42]:
faculty = faculty.convert_dtypes()

In [43]:
faculty.dtypes

ID                               Int64
Name                            string
Designation                     string
HEC Approved PHD Supervisor    boolean
Highest Education               string
Email                           string
Department                      string
Extension                        Int64
ImageURL                        string
dtype: object

In [44]:
# Roll Number: 20L-1011 => 1/10 = 0.1
sample_ = faculty.sample(frac=0.1)

In [45]:
sample_

Unnamed: 0,ID,Name,Designation,HEC Approved PHD Supervisor,Highest Education,Email,Department,Extension,ImageURL
167,5438.0,Mr. Abdul Sattar,Lecturer,False,"M.Phil(Pakistan Studies), Quaid-i- Azam Univer...",abdul.sattar@nu.edu.pk,Science & Humanities,,https://lhr.nu.edu.pk/media/Faculty/155_-_Abdu...
6,4261.0,Dr. Saira Karim,Associate Professor,True,"Ph.D(Computer Science), NUCES, Lahore, Pakista...",saira.karim@nu.edu.pk,Computer Science,412.0,https://lhr.nu.edu.pk/media/Faculty/07_-_Dr._S...
319,71.0,Mr. Syed Mohsin Shah,Lecturer,False,"B.Sc(Electrical Engineering), UET, Peshawar, P...",mohsin.shah@nu.edu.pk,Electrical Engineering,149.0,http://pwr.nu.edu.pk/images/faculty/mohsin_sha...
196,4814.0,Ms. Maria Maqsood,Assistant Professor,False,"MS (Computer Science), NUCES FAST, Islamabad, ...",maria.maqsood@nu.edu.pk,Department of Computer Science,,https://cfd.nu.edu.pk/wp-content/uploads/2019/...
4,6113.0,Dr. Asma Naseer,Associate Professor,True,"PhD (CS), NUCES, Lahore, 2018",asma.naseer@nu.edu.pk,Computer Science,410.0,https://lhr.nu.edu.pk/media/Faculty/Dr_Asma_Na...
216,6087.0,Mr. Muhammad Hannan,Lecturer,False,"MS (Computer Science), National University of ...",hannan.farooq@nu.edu.pk,Department of Computer Science,176.0,https://cfd.nu.edu.pk/wp-content/uploads/2020/...
262,5109.0,Dr. Haris Khurram,Assistant Professor,True,"PhD (Statistics), Bahauddin Zakariya Universit...",haris.khurram@nu.edu.pk,Department of Sciences & Humanities,278.0,https://cfd.nu.edu.pk/wp-content/uploads/2019/...
213,4912.0,Mr. Mazhar Hussain,Lecturer,False,"M.S (Computer Science), 2019 FAST-NUCES, Chini...",mazhar.h@nu.edu.pk,Department of Computer Science,,https://cfd.nu.edu.pk/wp-content/uploads/2019/...
193,4793.0,Mr. Adeel Ashraf,Assistant Professor,False,"M.S (Computer Science), NUCES-FAST Islamabad, ...",adeel.cheema@nu.edu.pk,Department of Computer Science,171.0,https://cfd.nu.edu.pk/wp-content/uploads/2019/...
272,508272.0,Mr. Muhammad Shoaib,Lecturer,False,"MS(Math), FAST NUCES, Lahore, 2019",m.shoaib@nu.edu.pk,Department of Sciences & Humanities,283.0,https://cfd.nu.edu.pk/wp-content/uploads/2023/...


In [46]:
sample_.dtypes

ID                               Int64
Name                            string
Designation                     string
HEC Approved PHD Supervisor    boolean
Highest Education               string
Email                           string
Department                      string
Extension                        Int64
ImageURL                        string
dtype: object

In [47]:
sample_.to_csv("sample.csv")