## Loading data into Pandas DataFrame

In [1]:
import pandas as pd
import numpy as np

In [2]:
! gdown --id 17wd1WqnPrJuFLSe8DWw5lMdgBe_fv0A0

Downloading...
From: https://drive.google.com/uc?id=17wd1WqnPrJuFLSe8DWw5lMdgBe_fv0A0
To: /content/best_cs_scientist_details.csv
100% 155k/155k [00:00<00:00, 71.9MB/s]


In [14]:
df = pd.read_csv("/content/best_cs_scientist_details.csv")
df

Unnamed: 0,World Rank,National Rank,Name,Image URLs,Affiliation,Country,H-Index,Citations,#DBLP
0,1,1,Anil K. Jain,https://s.research.com/images/2a967b4499197336...,Michigan State University,United States,201,247416,867
1,2,1,Yoshua Bengio,https://s.research.com/images/aec914cd458a74e2...,University of Montreal,Canada,197,637950,715
2,3,2,Jiawei Han,https://s.research.com/images/42382fdef4ef0953...,University of Illinois at Urbana-Champaign,United States,184,205519,993
3,4,1,Andrew Zisserman,https://s.research.com/images/2a3913cff9795e86...,University of Oxford,United Kingdom,175,301948,596
4,5,3,Michael I. Jordan,https://s.research.com/images/aa9fe06f07349099...,University of California,Berkeley,164,201905,612
...,...,...,...,...,...,...,...,...,...
995,996,592,Jayaram K. Udupa,https://s.research.com/images/7aea05d4628cc176...,University of Pennsylvania,United States,67,18391,325
996,997,591,Xiang-Gen Xia,https://s.research.com/images/41b61d1d8d568c65...,University of Delaware,United States,67,17285,462
997,998,593,Julia Hirschberg,https://s.research.com/images/4bc88ad0be9b7b95...,Columbia University,United States,67,14866,208
998,999,35,Leif Kobbelt,https://s.research.com/images/540ad1eea7b08b05...,RWTH Aachen University,Germany,67,18462,219


## DataFrames
DataFrame is very similar to Excel workbook Tabular Datasheets starting from index 0. <br/>

Operations: <br/>
i) **df.shape** => dimension of the dataframe <br/>
ii) **df.head(n)** => top n records/rows <br/>
iii) **df.tail(n)** => bottom n records/rows <br/>
iv) **df.columns** => all column names <br/>
v) **df['column_name']** => access data from particular column name <br/>
vi) **df[['column_name_1','column_name_2',...]]** => access data from multiple columns

In [6]:
df.shape

(1000, 9)

In [9]:
df.head(3) # default n=5

Unnamed: 0,World Rank,National Rank,Name,Image URLs,Affiliation,Country,H-Index,Citations,#DBLP
0,1,1,Anil K. Jain,https://s.research.com/images/2a967b4499197336...,Michigan State University,United States,201,247416,867
1,2,1,Yoshua Bengio,https://s.research.com/images/aec914cd458a74e2...,University of Montreal,Canada,197,637950,715
2,3,2,Jiawei Han,https://s.research.com/images/42382fdef4ef0953...,University of Illinois at Urbana-Champaign,United States,184,205519,993


In [10]:
df.tail(3) # default n=5

Unnamed: 0,World Rank,National Rank,Name,Image URLs,Affiliation,Country,H-Index,Citations,#DBLP
997,998,593,Julia Hirschberg,https://s.research.com/images/4bc88ad0be9b7b95...,Columbia University,United States,67,14866,208
998,999,35,Leif Kobbelt,https://s.research.com/images/540ad1eea7b08b05...,RWTH Aachen University,Germany,67,18462,219
999,1000,91,Kwang-Ting Cheng,https://s.research.com/images/9f87b14e531b73b5...,Hong Kong University of Science and Technology,China,67,16057,347


In [11]:
df.columns

Index(['World Rank', 'National Rank', 'Name', 'Image URLs', 'Affiliation',
       'Country', 'H-Index', 'Citations', '#DBLP'],
      dtype='object')

In [12]:
df['Name']

0           Anil K. Jain
1          Yoshua Bengio
2             Jiawei Han
3       Andrew Zisserman
4      Michael I. Jordan
             ...        
995     Jayaram K. Udupa
996        Xiang-Gen Xia
997     Julia Hirschberg
998         Leif Kobbelt
999     Kwang-Ting Cheng
Name: Name, Length: 1000, dtype: object

In [13]:
df[['Name','Affiliation']]

Unnamed: 0,Name,Affiliation
0,Anil K. Jain,Michigan State University
1,Yoshua Bengio,University of Montreal
2,Jiawei Han,University of Illinois at Urbana-Champaign
3,Andrew Zisserman,University of Oxford
4,Michael I. Jordan,University of California
...,...,...
995,Jayaram K. Udupa,University of Pennsylvania
996,Xiang-Gen Xia,University of Delaware
997,Julia Hirschberg,Columbia University
998,Leif Kobbelt,RWTH Aachen University


## Indexing

In [15]:
df.iloc[:3]

Unnamed: 0,World Rank,National Rank,Name,Image URLs,Affiliation,Country,H-Index,Citations,#DBLP
0,1,1,Anil K. Jain,https://s.research.com/images/2a967b4499197336...,Michigan State University,United States,201,247416,867
1,2,1,Yoshua Bengio,https://s.research.com/images/aec914cd458a74e2...,University of Montreal,Canada,197,637950,715
2,3,2,Jiawei Han,https://s.research.com/images/42382fdef4ef0953...,University of Illinois at Urbana-Champaign,United States,184,205519,993


In [16]:
df.iloc[3]

World Rank                                                       4
National Rank                                                    1
Name                                              Andrew Zisserman
Image URLs       https://s.research.com/images/2a3913cff9795e86...
Affiliation                                   University of Oxford
Country                                             United Kingdom
H-Index                                                        175
Citations                                                   301948
#DBLP                                                          596
Name: 3, dtype: object

In [18]:
# Second top 50 Names, Image URLs and affiliation

df.iloc[51:101,2:5]

Unnamed: 0,Name,Image URLs,Affiliation
51,Wil M. P. van der Aalst,https://s.research.com/images/fe632259056863c6...,RWTH Aachen University
52,Qiang Yang,https://s.research.com/images/6856b85aba961817...,Hong Kong University of Science and Technology
53,John A. Stankovic,https://s.research.com/images/1590827f1bb021b0...,University of Virginia
54,Pieter Abbeel,https://s.research.com/images/c38e4529bb7e8f2b...,University of California
55,Huan Liu,https://s.research.com/images/4026325414f2d277...,Arizona State University
56,Mubarak Shah,https://s.research.com/images/28796efa4d47a629...,University of Central Florida
57,Witold Pedrycz,https://s.research.com/images/8f26754a6e28c7d6...,University of Alberta
58,Ben Shneiderman,https://s.research.com/images/3b6d2242c89dcf44...,University of Maryland
59,Wolfram Burgard,https://s.research.com/images/2a1bc099402e595a...,University of Freiburg
60,Andrew Y. Ng,https://s.research.com/images/655f85eed1748af3...,Stanford University


In [20]:
# Same thing but in a different way
df[['Name', 'Image URLs', 'Affiliation']].iloc[51:101]

Unnamed: 0,Name,Country,Affiliation
51,Wil M. P. van der Aalst,Germany,RWTH Aachen University
52,Qiang Yang,China,Hong Kong University of Science and Technology
53,John A. Stankovic,United States,University of Virginia
54,Pieter Abbeel,Berkeley,University of California
55,Huan Liu,United States,Arizona State University
56,Mubarak Shah,United States,University of Central Florida
57,Witold Pedrycz,Canada,University of Alberta
58,Ben Shneiderman,College Park,University of Maryland
59,Wolfram Burgard,Germany,University of Freiburg
60,Andrew Y. Ng,United States,Stanford University


In [21]:
df.head()

Unnamed: 0,World Rank,National Rank,Name,Image URLs,Affiliation,Country,H-Index,Citations,#DBLP
0,1,1,Anil K. Jain,https://s.research.com/images/2a967b4499197336...,Michigan State University,United States,201,247416,867
1,2,1,Yoshua Bengio,https://s.research.com/images/aec914cd458a74e2...,University of Montreal,Canada,197,637950,715
2,3,2,Jiawei Han,https://s.research.com/images/42382fdef4ef0953...,University of Illinois at Urbana-Champaign,United States,184,205519,993
3,4,1,Andrew Zisserman,https://s.research.com/images/2a3913cff9795e86...,University of Oxford,United Kingdom,175,301948,596
4,5,3,Michael I. Jordan,https://s.research.com/images/aa9fe06f07349099...,University of California,Berkeley,164,201905,612


In [26]:
# Get all with H-Index > 150

df[ df['H-Index'] > 150 ]

Unnamed: 0,World Rank,National Rank,Name,Image URLs,Affiliation,Country,H-Index,Citations,#DBLP
0,1,1,Anil K. Jain,https://s.research.com/images/2a967b4499197336...,Michigan State University,United States,201,247416,867
1,2,1,Yoshua Bengio,https://s.research.com/images/aec914cd458a74e2...,University of Montreal,Canada,197,637950,715
2,3,2,Jiawei Han,https://s.research.com/images/42382fdef4ef0953...,University of Illinois at Urbana-Champaign,United States,184,205519,993
3,4,1,Andrew Zisserman,https://s.research.com/images/2a3913cff9795e86...,University of Oxford,United Kingdom,175,301948,596
4,5,3,Michael I. Jordan,https://s.research.com/images/aa9fe06f07349099...,University of California,Berkeley,164,201905,612
5,6,5,Thomas S. Huang,https://s.research.com/images/ddd0e184e7830de1...,University of Illinois at Urbana-Champaign,United States,163,118676,1068
6,7,4,Philip S. Yu,https://s.research.com/images/20815a3765631eb9...,University of Illinois at Chicago,United States,163,127284,1531
7,8,6,Takeo Kanade,https://s.research.com/images/6f00a60e4fcd5fbf...,Carnegie Mellon University,United States,157,127286,612
8,9,1,Rajkumar Buyya,https://s.research.com/images/2994e0b16d912fb9...,University of Melbourne,Australia,154,117166,845
9,10,1,Klaus-Robert Müller,https://s.research.com/images/967468fe98f892a9...,Technical University of Berlin,Germany,151,102859,665


## Data Analysis

In [27]:
df['Affiliation'].value_counts()

University of California      80
MIT                           38
Stanford University           33
Carnegie Mellon University    32
Google (United States)        31
                              ..
Leiden University              1
Middlesex University           1
Syracuse University            1
University of Innsbruck        1
University of Delaware         1
Name: Affiliation, Length: 331, dtype: int64

In [30]:
df['Country'].value_counts()

United States                         493
China                                  87
United Kingdom                         57
Canada                                 37
Germany                                35
Berkeley                               33
Switzerland                            29
Australia                              20
Singapore                              18
Israel                                 15
Italy                                  13
Los Angeles                            13
Belgium                                11
College Park                           10
San Diego                              10
Netherlands                            10
France                                  9
Austria                                 8
Irvine                                  8
Santa Barbara                           6
Japan                                   6
Shenzhen                                5
Spain                                   5
Taiwan                            

In [31]:
df.isnull().values.any()

False

In [32]:
# Check if there is any missing value
df.isnull().sum()

World Rank       0
National Rank    0
Name             0
Image URLs       0
Affiliation      0
Country          0
H-Index          0
Citations        0
#DBLP            0
dtype: int64

In [33]:
df = df.dropna(how="any") # drop all null values
df

Unnamed: 0,World Rank,National Rank,Name,Image URLs,Affiliation,Country,H-Index,Citations,#DBLP
0,1,1,Anil K. Jain,https://s.research.com/images/2a967b4499197336...,Michigan State University,United States,201,247416,867
1,2,1,Yoshua Bengio,https://s.research.com/images/aec914cd458a74e2...,University of Montreal,Canada,197,637950,715
2,3,2,Jiawei Han,https://s.research.com/images/42382fdef4ef0953...,University of Illinois at Urbana-Champaign,United States,184,205519,993
3,4,1,Andrew Zisserman,https://s.research.com/images/2a3913cff9795e86...,University of Oxford,United Kingdom,175,301948,596
4,5,3,Michael I. Jordan,https://s.research.com/images/aa9fe06f07349099...,University of California,Berkeley,164,201905,612
...,...,...,...,...,...,...,...,...,...
995,996,592,Jayaram K. Udupa,https://s.research.com/images/7aea05d4628cc176...,University of Pennsylvania,United States,67,18391,325
996,997,591,Xiang-Gen Xia,https://s.research.com/images/41b61d1d8d568c65...,University of Delaware,United States,67,17285,462
997,998,593,Julia Hirschberg,https://s.research.com/images/4bc88ad0be9b7b95...,Columbia University,United States,67,14866,208
998,999,35,Leif Kobbelt,https://s.research.com/images/540ad1eea7b08b05...,RWTH Aachen University,Germany,67,18462,219


In [34]:
df = df.reset_index(drop=True)
df

Unnamed: 0,World Rank,National Rank,Name,Image URLs,Affiliation,Country,H-Index,Citations,#DBLP
0,1,1,Anil K. Jain,https://s.research.com/images/2a967b4499197336...,Michigan State University,United States,201,247416,867
1,2,1,Yoshua Bengio,https://s.research.com/images/aec914cd458a74e2...,University of Montreal,Canada,197,637950,715
2,3,2,Jiawei Han,https://s.research.com/images/42382fdef4ef0953...,University of Illinois at Urbana-Champaign,United States,184,205519,993
3,4,1,Andrew Zisserman,https://s.research.com/images/2a3913cff9795e86...,University of Oxford,United Kingdom,175,301948,596
4,5,3,Michael I. Jordan,https://s.research.com/images/aa9fe06f07349099...,University of California,Berkeley,164,201905,612
...,...,...,...,...,...,...,...,...,...
995,996,592,Jayaram K. Udupa,https://s.research.com/images/7aea05d4628cc176...,University of Pennsylvania,United States,67,18391,325
996,997,591,Xiang-Gen Xia,https://s.research.com/images/41b61d1d8d568c65...,University of Delaware,United States,67,17285,462
997,998,593,Julia Hirschberg,https://s.research.com/images/4bc88ad0be9b7b95...,Columbia University,United States,67,14866,208
998,999,35,Leif Kobbelt,https://s.research.com/images/540ad1eea7b08b05...,RWTH Aachen University,Germany,67,18462,219


# Sorting

In [45]:
name_sorted_df = df.sort_values(by='Name')
name_sorted_df.head()

Unnamed: 0,World Rank,National Rank,Name,Image URLs,Affiliation,Country,H-Index,Citations,#DBLP
385,386,16,Aaron Courville,https://s.research.com/images/ba1916a4e8734172...,University of Montreal,Canada,82,138510,179
644,645,24,Aaron Fenster,https://s.research.com/images/b4b7e0466eaf1fbd...,University of Western Ontario,Canada,73,16545,389
338,339,210,Abhinav Gupta,https://s.research.com/images/1c3bd196ab0b481a...,Facebook (United States),United States,85,41095,162
871,872,524,Abigail Sellen,https://s.research.com/images/9d5bd8a069876190...,Microsoft (United States),United States,69,20151,185
556,557,339,Adam Soroca,https://research.com/images/placeholder-schola...,Millennial Media LLC,United States,76,20550,87


In [43]:
name_sorted_df = df.sort_values(by='Name',ascending=False)
name_sorted_df.head()

Unnamed: 0,World Rank,National Rank,Name,Image URLs,Affiliation,Country,H-Index,Citations,#DBLP
573,574,1,gonzalo navarro,https://s.research.com/images/b791f56c75f53b23...,University of Chile,Chile,76,25071,428
719,720,440,Zygmunt J. Haas,https://s.research.com/images/d4ca1694e43e1705...,Cornell University,United States,72,34836,225
104,105,3,Zoubin Ghahramani,https://s.research.com/images/3692f85465d385b7...,University of Cambridge,United Kingdom,107,60829,313
946,947,81,Zongben Xu,https://s.research.com/images/ed6fdbcc0e88d657...,Xi'an Jiaotong University,China,68,15190,272
403,404,241,Zohar Manna,https://s.research.com/images/3de029346822cc88...,Stanford University,United States,81,29659,195


In [40]:
name_sorted_df = df.sort_values(by=['Name','Affiliation'])
name_sorted_df.head()

Unnamed: 0,World Rank,National Rank,Name,Image URLs,Affiliation,Country,H-Index,Citations,#DBLP
385,386,16,Aaron Courville,https://s.research.com/images/ba1916a4e8734172...,University of Montreal,Canada,82,138510,179
644,645,24,Aaron Fenster,https://s.research.com/images/b4b7e0466eaf1fbd...,University of Western Ontario,Canada,73,16545,389
338,339,210,Abhinav Gupta,https://s.research.com/images/1c3bd196ab0b481a...,Facebook (United States),United States,85,41095,162
871,872,524,Abigail Sellen,https://s.research.com/images/9d5bd8a069876190...,Microsoft (United States),United States,69,20151,185
556,557,339,Adam Soroca,https://research.com/images/placeholder-schola...,Millennial Media LLC,United States,76,20550,87


In [41]:
name_sorted_df = df.sort_values(by=['Affiliation','Name'])
name_sorted_df.head()

Unnamed: 0,World Rank,National Rank,Name,Image URLs,Affiliation,Country,H-Index,Citations,#DBLP
893,894,538,David S. Johnson,https://s.research.com/images/75ee6813942b9e9b...,AT&T (United States),United States,69,103578,119
263,264,161,Divesh Srivastava,https://s.research.com/images/a23448733fc7b2f9...,AT&T (United States),United States,90,29032,379
265,266,1,Christian S. Jensen,https://s.research.com/images/e801bdf772a905ac...,Aalborg University,Denmark,90,29438,419
431,432,2,Kim Guldstrand Larsen,https://s.research.com/images/69d9fd07b90d4549...,Aalborg University,Denmark,80,27466,391
580,581,2,Heikki Mannila,https://s.research.com/images/32d9c306c16f1cf7...,Aalto University,Finland,75,37140,228


In [42]:
name_sorted_df.sort_index(inplace=True)
name_sorted_df

Unnamed: 0,World Rank,National Rank,Name,Image URLs,Affiliation,Country,H-Index,Citations,#DBLP
0,1,1,Anil K. Jain,https://s.research.com/images/2a967b4499197336...,Michigan State University,United States,201,247416,867
1,2,1,Yoshua Bengio,https://s.research.com/images/aec914cd458a74e2...,University of Montreal,Canada,197,637950,715
2,3,2,Jiawei Han,https://s.research.com/images/42382fdef4ef0953...,University of Illinois at Urbana-Champaign,United States,184,205519,993
3,4,1,Andrew Zisserman,https://s.research.com/images/2a3913cff9795e86...,University of Oxford,United Kingdom,175,301948,596
4,5,3,Michael I. Jordan,https://s.research.com/images/aa9fe06f07349099...,University of California,Berkeley,164,201905,612
...,...,...,...,...,...,...,...,...,...
995,996,592,Jayaram K. Udupa,https://s.research.com/images/7aea05d4628cc176...,University of Pennsylvania,United States,67,18391,325
996,997,591,Xiang-Gen Xia,https://s.research.com/images/41b61d1d8d568c65...,University of Delaware,United States,67,17285,462
997,998,593,Julia Hirschberg,https://s.research.com/images/4bc88ad0be9b7b95...,Columbia University,United States,67,14866,208
998,999,35,Leif Kobbelt,https://s.research.com/images/540ad1eea7b08b05...,RWTH Aachen University,Germany,67,18462,219


# Concatenation

In [46]:
df_concat = pd.concat([df,name_sorted_df])
df_concat

Unnamed: 0,World Rank,National Rank,Name,Image URLs,Affiliation,Country,H-Index,Citations,#DBLP
0,1,1,Anil K. Jain,https://s.research.com/images/2a967b4499197336...,Michigan State University,United States,201,247416,867
1,2,1,Yoshua Bengio,https://s.research.com/images/aec914cd458a74e2...,University of Montreal,Canada,197,637950,715
2,3,2,Jiawei Han,https://s.research.com/images/42382fdef4ef0953...,University of Illinois at Urbana-Champaign,United States,184,205519,993
3,4,1,Andrew Zisserman,https://s.research.com/images/2a3913cff9795e86...,University of Oxford,United Kingdom,175,301948,596
4,5,3,Michael I. Jordan,https://s.research.com/images/aa9fe06f07349099...,University of California,Berkeley,164,201905,612
...,...,...,...,...,...,...,...,...,...
403,404,241,Zohar Manna,https://s.research.com/images/3de029346822cc88...,Stanford University,United States,81,29659,195
946,947,81,Zongben Xu,https://s.research.com/images/ed6fdbcc0e88d657...,Xi'an Jiaotong University,China,68,15190,272
104,105,3,Zoubin Ghahramani,https://s.research.com/images/3692f85465d385b7...,University of Cambridge,United Kingdom,107,60829,313
719,720,440,Zygmunt J. Haas,https://s.research.com/images/d4ca1694e43e1705...,Cornell University,United States,72,34836,225


In [47]:
df_concat = pd.concat([df,name_sorted_df],keys=['normal','sorted'])
df_concat

Unnamed: 0,Unnamed: 1,World Rank,National Rank,Name,Image URLs,Affiliation,Country,H-Index,Citations,#DBLP
normal,0,1,1,Anil K. Jain,https://s.research.com/images/2a967b4499197336...,Michigan State University,United States,201,247416,867
normal,1,2,1,Yoshua Bengio,https://s.research.com/images/aec914cd458a74e2...,University of Montreal,Canada,197,637950,715
normal,2,3,2,Jiawei Han,https://s.research.com/images/42382fdef4ef0953...,University of Illinois at Urbana-Champaign,United States,184,205519,993
normal,3,4,1,Andrew Zisserman,https://s.research.com/images/2a3913cff9795e86...,University of Oxford,United Kingdom,175,301948,596
normal,4,5,3,Michael I. Jordan,https://s.research.com/images/aa9fe06f07349099...,University of California,Berkeley,164,201905,612
...,...,...,...,...,...,...,...,...,...,...
sorted,403,404,241,Zohar Manna,https://s.research.com/images/3de029346822cc88...,Stanford University,United States,81,29659,195
sorted,946,947,81,Zongben Xu,https://s.research.com/images/ed6fdbcc0e88d657...,Xi'an Jiaotong University,China,68,15190,272
sorted,104,105,3,Zoubin Ghahramani,https://s.research.com/images/3692f85465d385b7...,University of Cambridge,United Kingdom,107,60829,313
sorted,719,720,440,Zygmunt J. Haas,https://s.research.com/images/d4ca1694e43e1705...,Cornell University,United States,72,34836,225


In [48]:
df_concat.loc['sorted']

Unnamed: 0,World Rank,National Rank,Name,Image URLs,Affiliation,Country,H-Index,Citations,#DBLP
385,386,16,Aaron Courville,https://s.research.com/images/ba1916a4e8734172...,University of Montreal,Canada,82,138510,179
644,645,24,Aaron Fenster,https://s.research.com/images/b4b7e0466eaf1fbd...,University of Western Ontario,Canada,73,16545,389
338,339,210,Abhinav Gupta,https://s.research.com/images/1c3bd196ab0b481a...,Facebook (United States),United States,85,41095,162
871,872,524,Abigail Sellen,https://s.research.com/images/9d5bd8a069876190...,Microsoft (United States),United States,69,20151,185
556,557,339,Adam Soroca,https://research.com/images/placeholder-schola...,Millennial Media LLC,United States,76,20550,87
...,...,...,...,...,...,...,...,...,...
403,404,241,Zohar Manna,https://s.research.com/images/3de029346822cc88...,Stanford University,United States,81,29659,195
946,947,81,Zongben Xu,https://s.research.com/images/ed6fdbcc0e88d657...,Xi'an Jiaotong University,China,68,15190,272
104,105,3,Zoubin Ghahramani,https://s.research.com/images/3692f85465d385b7...,University of Cambridge,United Kingdom,107,60829,313
719,720,440,Zygmunt J. Haas,https://s.research.com/images/d4ca1694e43e1705...,Cornell University,United States,72,34836,225


In [49]:
df_concat.loc['normal']

Unnamed: 0,World Rank,National Rank,Name,Image URLs,Affiliation,Country,H-Index,Citations,#DBLP
0,1,1,Anil K. Jain,https://s.research.com/images/2a967b4499197336...,Michigan State University,United States,201,247416,867
1,2,1,Yoshua Bengio,https://s.research.com/images/aec914cd458a74e2...,University of Montreal,Canada,197,637950,715
2,3,2,Jiawei Han,https://s.research.com/images/42382fdef4ef0953...,University of Illinois at Urbana-Champaign,United States,184,205519,993
3,4,1,Andrew Zisserman,https://s.research.com/images/2a3913cff9795e86...,University of Oxford,United Kingdom,175,301948,596
4,5,3,Michael I. Jordan,https://s.research.com/images/aa9fe06f07349099...,University of California,Berkeley,164,201905,612
...,...,...,...,...,...,...,...,...,...
995,996,592,Jayaram K. Udupa,https://s.research.com/images/7aea05d4628cc176...,University of Pennsylvania,United States,67,18391,325
996,997,591,Xiang-Gen Xia,https://s.research.com/images/41b61d1d8d568c65...,University of Delaware,United States,67,17285,462
997,998,593,Julia Hirschberg,https://s.research.com/images/4bc88ad0be9b7b95...,Columbia University,United States,67,14866,208
998,999,35,Leif Kobbelt,https://s.research.com/images/540ad1eea7b08b05...,RWTH Aachen University,Germany,67,18462,219


# Apply Function 

In [50]:
df.apply(lambda x: x[0])

World Rank                                                       1
National Rank                                                    1
Name                                                  Anil K. Jain
Image URLs       https://s.research.com/images/2a967b4499197336...
Affiliation                              Michigan State University
Country                                              United States
H-Index                                                        201
Citations                                                   247416
#DBLP                                                          867
dtype: object

In [51]:
df.apply(lambda x: x[0],axis=1)

0         1
1         2
2         3
3         4
4         5
       ... 
995     996
996     997
997     998
998     999
999    1000
Length: 1000, dtype: int64

In [54]:
df.apply(lambda x: x['Affiliation'],axis=1)

0                           Michigan State University
1                              University of Montreal
2          University of Illinois at Urbana-Champaign
3                                University of Oxford
4                            University of California
                            ...                      
995                        University of Pennsylvania
996                            University of Delaware
997                               Columbia University
998                            RWTH Aachen University
999    Hong Kong University of Science and Technology
Length: 1000, dtype: object

In [55]:
def national_top_three(rank):
  return rank <=3

national_rank = df[ df['National Rank'].apply(lambda x: national_top_three(x)) ]
national_rank

Unnamed: 0,World Rank,National Rank,Name,Image URLs,Affiliation,Country,H-Index,Citations,#DBLP
0,1,1,Anil K. Jain,https://s.research.com/images/2a967b4499197336...,Michigan State University,United States,201,247416,867
1,2,1,Yoshua Bengio,https://s.research.com/images/aec914cd458a74e2...,University of Montreal,Canada,197,637950,715
2,3,2,Jiawei Han,https://s.research.com/images/42382fdef4ef0953...,University of Illinois at Urbana-Champaign,United States,184,205519,993
3,4,1,Andrew Zisserman,https://s.research.com/images/2a3913cff9795e86...,University of Oxford,United Kingdom,175,301948,596
4,5,3,Michael I. Jordan,https://s.research.com/images/aa9fe06f07349099...,University of California,Berkeley,164,201905,612
...,...,...,...,...,...,...,...,...,...
862,863,3,Minos Garofalakis,https://s.research.com/images/a78d326a01d86192...,Technical University of Crete,Greece,70,16761,185
878,879,3,Patricia Melin,https://s.research.com/images/224759a57418d107...,Instituto Tecnológico de Tijuana,Mexico,69,15561,539
912,913,3,Ming-Syan Chen,https://s.research.com/images/3039e2dd1feff18d...,National Taiwan University,Taiwan,69,21865,382
954,955,2,Jan Bosch,https://s.research.com/images/c7c965ebfe93cae6...,Chalmers University of Technology,Sweden,68,20595,328


In [59]:
national_rank.sort_values(by=["Country","National Rank"])

Unnamed: 0,World Rank,National Rank,Name,Image URLs,Affiliation,Country,H-Index,Citations,#DBLP
8,9,1,Rajkumar Buyya,https://s.research.com/images/2994e0b16d912fb9...,University of Melbourne,Australia,154,117166,845
21,22,2,Dacheng Tao,https://s.research.com/images/72330d26d0dcc915...,University of Sydney,Australia,137,66473,1022
228,229,3,Ian Reid,https://s.research.com/images/53df9cd0782dbf43...,University of Adelaide,Australia,92,41959,341
119,120,1,Thomas A. Henzinger,https://s.research.com/images/2f5eca6562e3fea5...,Institute of Science and Technology Austria,Austria,104,59374,364
247,248,2,Horst Bischof,https://s.research.com/images/7c1dce640060d61c...,Graz University of Technology,Austria,91,42715,595
...,...,...,...,...,...,...,...,...,...
3,4,1,Andrew Zisserman,https://s.research.com/images/2a3913cff9795e86...,University of Oxford,United Kingdom,175,301948,596
98,99,2,Nicholas R. Jennings,https://s.research.com/images/3ac5b504ec1dd4d0...,Loughborough University,United Kingdom,108,61361,458
104,105,3,Zoubin Ghahramani,https://s.research.com/images/3692f85465d385b7...,University of Cambridge,United Kingdom,107,60829,313
0,1,1,Anil K. Jain,https://s.research.com/images/2a967b4499197336...,Michigan State University,United States,201,247416,867


# Aggregate

In [60]:
country_presence = df.groupby('Country')
country_presence.first()

Unnamed: 0_level_0,World Rank,National Rank,Name,Image URLs,Affiliation,H-Index,Citations,#DBLP
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Australia,9,1,Rajkumar Buyya,https://s.research.com/images/2994e0b16d912fb9...,University of Melbourne,154,117166,845
Austria,120,1,Thomas A. Henzinger,https://s.research.com/images/2f5eca6562e3fea5...,Institute of Science and Technology Austria,104,59374,364
Baltimore County,161,98,Tim Finin,https://s.research.com/images/49745edf0a51c611...,University of Maryland,98,45357,449
Belgium,133,1,Marco Dorigo,https://s.research.com/images/7c9e5565640fb89a...,Université Libre de Bruxelles,102,130488,408
Berkeley,5,3,Michael I. Jordan,https://s.research.com/images/aa9fe06f07349099...,University of California,164,201905,612
Brazil,384,1,Joel J. P. C. Rodrigues,https://s.research.com/images/acfdb8488335280e...,Federal University of Piauí,82,22549,831
CNRS,136,2,Didier Dubois,https://s.research.com/images/624c73ccd8e178dc...,Centre national de la recherche scientifique,101,57201,443
Canada,2,1,Yoshua Bengio,https://s.research.com/images/aec914cd458a74e2...,University of Montreal,197,637950,715
Chile,574,1,gonzalo navarro,https://s.research.com/images/b791f56c75f53b23...,University of Chile,76,25071,428
China,26,1,Xiaoou Tang,https://s.research.com/images/b9f11b569da0dde5...,Chinese University of Hong Kong,134,112615,450


In [None]:
country_presence.describe()

In [82]:
df[df.index.isin(list(country_presence.groups['Italy']))]

Unnamed: 0,World Rank,National Rank,Name,Image URLs,Affiliation,Country,H-Index,Citations,#DBLP
109,110,1,Luca Benini,https://s.research.com/images/cf161bed6e9648e1...,University of Bologna,Italy,105,45311,991
239,240,2,Lorenzo Bruzzone,https://s.research.com/images/5e0df31abc55c4b8...,University of Trento,Italy,91,35549,493
335,336,3,Nicu Sebe,https://s.research.com/images/e64c4f7a11ffdde3...,University of Trento,Italy,85,26324,454
412,413,4,Maurizio Lenzerini,https://s.research.com/images/cd2dc1706e2f2f34...,Sapienza University of Rome,Italy,81,28834,256
489,490,5,Enrico Zio,https://s.research.com/images/6ddf887c5feb0fe4...,Politecnico di Milano,Italy,78,21250,639
632,633,6,Diego Calvanese,https://s.research.com/images/67508fde704d8d2f...,Free University of Bozen-Bolzano,Italy,74,37713,360
649,650,7,Michele Zorzi,https://s.research.com/images/e5c708f7d26e0b93...,University of Padua,Italy,73,30209,585
679,680,8,Giuseppe De Giacomo,https://s.research.com/images/be36ba7402fdefbb...,Sapienza University of Rome,Italy,73,19904,259
686,687,9,Fabio Roli,https://s.research.com/images/b863cba2fbeb8c2b...,University of Genoa,Italy,73,19290,315
748,749,10,Andrea Massa,https://s.research.com/images/14c613fb70aa9953...,University of Trento,Italy,71,13277,319


In [83]:
country_presence['National Rank'].mean()

Country
Australia                              10.500000
Austria                                 4.500000
Baltimore County                      153.500000
Belgium                                 6.000000
Berkeley                              206.272727
Brazil                                  1.000000
CNRS                                    2.000000
Canada                                 19.864865
Chile                                   1.000000
China                                  45.586207
College Park                          212.700000
Czech Republic                          1.500000
Davis                                 312.666667
Denmark                                 2.000000
Engineering                           468.000000
Estonia                                 1.000000
Finland                                 1.500000
France                                  5.888889
Galway                                  2.000000
Germany                                18.000000
Greece      

In [84]:
country_and_university_presence = df.groupby(['Country','Affiliation'])
country_and_university_presence.first()

Unnamed: 0_level_0,Unnamed: 1_level_0,World Rank,National Rank,Name,Image URLs,H-Index,Citations,#DBLP
Country,Affiliation,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Australia,Australian National University,613,8,Richard Hartley,https://s.research.com/images/a2eb08b356a3f291...,74,68201,283
Australia,Queensland University of Technology,805,14,Peter Corke,https://s.research.com/images/e2fbccfa8d46b812...,70,28599,338
Australia,Swinburne University of Technology,868,15,Yang Xiang,https://s.research.com/images/54ba8ec9d0969fd9...,69,16062,461
Australia,UNSW Sydney,902,18,Xuemin Lin,https://s.research.com/images/2cb024056baf9d44...,69,17477,370
Australia,University of Adelaide,229,3,Ian Reid,https://s.research.com/images/53df9cd0782dbf43...,92,41959,341
...,...,...,...,...,...,...,...,...
United States,William & Mary,638,392,Denys Poshyvanyk,https://s.research.com/images/8b9217e3bc6799f5...,74,15011,192
United States,Yale University,565,347,Leandros Tassiulas,https://s.research.com/images/b1264cb1b432aad0...,76,30729,409
United States,ZapFraud,976,583,Markus Jakobsson,https://s.research.com/images/d6c9cac7ce7ebb2f...,68,18014,164
United States,Zillow Group (United States),416,246,Sing Bing Kang,https://s.research.com/images/c0baad35f59865dc...,81,23913,219


In [85]:
country_and_university_presence['H-Index'].mean()

Country        Affiliation                        
Australia      Australian National University         72.500000
               Queensland University of Technology    70.000000
               Swinburne University of Technology     69.000000
               UNSW Sydney                            69.000000
               University of Adelaide                 84.000000
                                                        ...    
United States  William & Mary                         74.000000
               Yale University                        72.333333
               ZapFraud                               68.000000
               Zillow Group (United States)           81.000000
               you.com                                70.000000
Name: H-Index, Length: 342, dtype: float64

In [87]:
country_and_university_presence['H-Index'].median()

Country        Affiliation                        
Australia      Australian National University         72.5
               Queensland University of Technology    70.0
               Swinburne University of Technology     69.0
               UNSW Sydney                            69.0
               University of Adelaide                 92.0
                                                      ... 
United States  William & Mary                         74.0
               Yale University                        72.0
               ZapFraud                               68.0
               Zillow Group (United States)           81.0
               you.com                                70.0
Name: H-Index, Length: 342, dtype: float64

### Cross Tab

In [88]:
pd.crosstab(df["Country"],df["Affiliation"],margins=True)

Affiliation,AT&T (United States),Aalborg University,Aalto University,Aarhus University,Algorand Foundation,Alibaba Group (China),Allen Institute for Artificial Intelligence,Amazon (United Kingdom),Amazon (United States),Apple (United States),...,Xidian University,Yahoo (United Kingdom),Yale University,ZapFraud,Zhejiang University,Zillow Group (United States),you.com,École Normale Supérieure,École Polytechnique Fédérale de Lausanne,All
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Australia,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,20
Austria,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,8
Baltimore County,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,2
Belgium,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,11
Berkeley,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,33
Brazil,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
CNRS,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
Canada,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,37
Chile,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
China,0,0,0,0,0,1,0,0,0,0,...,1,0,0,0,4,0,0,0,0,87


In [89]:
df['Affiliation'].value_counts()

University of California      80
MIT                           38
Stanford University           33
Carnegie Mellon University    32
Google (United States)        31
                              ..
Leiden University              1
Middlesex University           1
Syracuse University            1
University of Innsbruck        1
University of Delaware         1
Name: Affiliation, Length: 331, dtype: int64

In [92]:
for key, count in df['Affiliation'].value_counts().items():
  if key == "Zhejiang University":
    print(count)

4


In [93]:
df['Country'].value_counts()

United States                         493
China                                  87
United Kingdom                         57
Canada                                 37
Germany                                35
Berkeley                               33
Switzerland                            29
Australia                              20
Singapore                              18
Israel                                 15
Italy                                  13
Los Angeles                            13
Belgium                                11
College Park                           10
San Diego                              10
Netherlands                            10
France                                  9
Austria                                 8
Irvine                                  8
Santa Barbara                           6
Japan                                   6
Shenzhen                                5
Spain                                   5
Taiwan                            

### Pivot Table

In [94]:
pd.pivot_table(df,index=['Country','Affiliation'],values='H-Index',aggfunc=[np.mean, np.median, min, max, np.std])

Unnamed: 0_level_0,Unnamed: 1_level_0,mean,median,min,max,std
Unnamed: 0_level_1,Unnamed: 1_level_1,H-Index,H-Index,H-Index,H-Index,H-Index
Country,Affiliation,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
Australia,Australian National University,72.500000,72.5,71,74,2.121320
Australia,Queensland University of Technology,70.000000,70.0,70,70,
Australia,Swinburne University of Technology,69.000000,69.0,69,69,
Australia,UNSW Sydney,69.000000,69.0,69,69,
Australia,University of Adelaide,84.000000,92.0,68,92,13.856406
...,...,...,...,...,...,...
United States,William & Mary,74.000000,74.0,74,74,
United States,Yale University,72.333333,72.0,69,76,3.511885
United States,ZapFraud,68.000000,68.0,68,68,
United States,Zillow Group (United States),81.000000,81.0,81,81,


In [95]:
pd.pivot_table(df,index=['Country','Affiliation'],values=['H-Index','National Rank'],aggfunc=[np.mean, np.median, min, max, np.std])

Unnamed: 0_level_0,Unnamed: 1_level_0,mean,mean,median,median,min,min,max,max,std,std
Unnamed: 0_level_1,Unnamed: 1_level_1,H-Index,National Rank,H-Index,National Rank,H-Index,National Rank,H-Index,National Rank,H-Index,National Rank
Country,Affiliation,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2
Australia,Australian National University,72.500000,10.500000,72.5,10.5,71,8,74,13,2.121320,3.535534
Australia,Queensland University of Technology,70.000000,14.000000,70.0,14.0,70,14,70,14,,
Australia,Swinburne University of Technology,69.000000,15.000000,69.0,15.0,69,15,69,15,,
Australia,UNSW Sydney,69.000000,18.000000,69.0,18.0,69,18,69,18,,
Australia,University of Adelaide,84.000000,8.666667,92.0,4.0,68,3,92,19,13.856406,8.962886
...,...,...,...,...,...,...,...,...,...,...,...
United States,William & Mary,74.000000,392.000000,74.0,392.0,74,392,74,392,,
United States,Yale University,72.333333,441.666667,72.0,428.0,69,347,76,550,3.511885,102.187736
United States,ZapFraud,68.000000,583.000000,68.0,583.0,68,583,68,583,,
United States,Zillow Group (United States),81.000000,246.000000,81.0,246.0,81,246,81,246,,
