In [580]:
import json
import glob

from ballpark import ballpark
import numpy as np
import pandas as pd
from functional import seq
from plotly.graph_objs import *
import plotly.figure_factory as ff
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot

init_notebook_mode(connected=True)

In [582]:
# Create a list of jsons

pathlist = lambda: glob.iglob('./rankings_int/*/*.json')
instis = []
for path in pathlist():
    insti = json.load(open(path))
    insti['filename'] = path
    insti['category'] = path.split("/")[-2]
    instis.append(insti)
len(instis)

545

In [583]:
# Filter samples which miss perception score
# If perception score is the last field, if it is parsed probably things went right
# Note: Three PDFs do not contain perseption score
incomplete_samples = list(filter(lambda x: 'PerceptionScore' not in x.keys(), instis))
list(map(lambda x: x['filename'], incomplete_samples)), len(incomplete_samples)

([], 0)

In [584]:
df = pd.DataFrame(instis)
df = df.set_index('InstituteName')

In [585]:
# Average the publication statistics across data sources
engineering = df[df['category'] == 'engineering']

engineering = engineering[pd.notnull(engineering['TotalStudents'])]
engineering['TotalStudentsAcrossPrograms'] = list(map(lambda x: sum(x.values()), engineering['TotalStudents']))

engineering['TotalPhDStudents'] = engineering['FullTimePhDStudents'] + engineering['PartTimePhDStudents']
engineering = engineering[engineering['TotalPhDStudents'] != 0]

engineering['NormalizedNumberOfPublication'] = list(map(lambda x: np.mean(list(x.values())),
                                              engineering['NumberOfPublications']))
engineering['NormalizedNumberOfCitations'] = list(map(lambda x: np.mean(list(x.values())),
                                              engineering['NumberOfCitations']))
engineering['NormalizedTopCitedPapers'] = list(map(lambda x: np.mean(list(x.values())),
                                              engineering['TopCitedPapers']))
engineering['TotalSponsoredResearchProjectAmount'] = list(map(lambda x: np.sum(list(x.values())),
                                              engineering['SponsoredResearchProjectAmount']))
engineering['TotalConsultancyProjectsAmount'] = list(map(lambda x: np.sum(list(x.values())),
                                              engineering['ConsultancyProjectsAmount']))
engineering['TotalMaleStudents'] = list(map(lambda x: np.sum(list(x.values())),
                                              engineering['MaleStudents']))
engineering['TotalFemaleStudents'] = list(map(lambda x: np.sum(list(x.values())),
                                              engineering['FemaleStudents']))
engineering['TotalStudents'] = engineering['TotalMaleStudents'] + engineering['TotalFemaleStudents']
engineering['TotalStudentsWithGovernmentScholarships'] = list(map(lambda x: np.sum(list(x.values())),
                                              engineering['StudentsWithGovernmentScholarships']))
engineering['TotalStudentsWithInstituteScholarships'] = list(map(lambda x: np.sum(list(x.values())),
                                              engineering['StudentsWithInstituteScholarships']))
engineering['TotalStudentsWithPrivateScholarships'] = list(map(lambda x: np.sum(list(x.values())),
                                              engineering['StudentsWithPrivateScholarships']))
engineering['TotalStudentsWithNoScholarships'] = list(map(lambda x: np.sum(list(x.values())),
                                              engineering['StudentsWithNoScholarships']))

In [586]:
# Find total number of students
engineering['FacultyStudentRatio'] = engineering['TotalStudentsAcrossPrograms'] / engineering['TotalNumberOfFaculty']

data = engineering.sort_values(by=['FacultyStudentRatio'])

data = [Bar(y=data.index[20::-1],
            x=data['FacultyStudentRatio'][20::-1],
            orientation = 'h')]

layout = Layout(
    title='Top 20 Engineering Institutes By Students/Faculty Ratio',
    xaxis=dict(
        title='Students/Factuly'
    ),
    margin=Margin(l=400, pad=10)
)

fig = Figure(data=data, layout=layout)
plot(fig, filename = 'sfr.html', auto_open=False)
iplot(fig)

In [587]:
data = engineering.sort_values(by=['FacultyStudentRatio'])

trace = Table(
    header=dict(values=['Institute Name', 'Students/Factuly', 'Rank'],
                fill = dict(color='#C2D4FF'),
                align = ['left'] * 3),
                cells=dict(values=[data.index,
                                   data['FacultyStudentRatio'],
                                   np.arange(96) + 1],
                fill = dict(color='#F5F8FF'),
                align = ['left'] * 3))


data = [trace]
plot(data, filename = 'sfr_table.html', auto_open=False)
iplot(data)

In [588]:
# Find total number of students
engineering['FacultyPhDRatio'] = engineering['TotalPhDStudents'] / engineering['TotalNumberOfFaculty']

data = engineering.sort_values(by=['FacultyPhDRatio'])[-20:]

data = [Bar(y=data.index,
            x=data['FacultyPhDRatio'],
            orientation = 'h')]

layout = Layout(
    title='Engineering Institutes By PhD Students/Faculty Ratio',
    xaxis=dict(
        title='PhD/Factuly'
    ),
    margin=Margin(l=400, pad=10)
)

fig = Figure(data=data, layout=layout)
plot(fig, filename = 'pfr.html', auto_open=False)
iplot(fig)

In [589]:
# Find total number of students
data = engineering.sort_values(by=['FacultyPhDRatio'])

data = [Scatter(text=data.index,
            y=data['FacultyPhDRatio'])]

layout = Layout(
    title='Engineering Institutes By PhD Students/Faculty Ratio',
    yaxis=dict(
        title='PhD/Factuly'
    )
)

fig = Figure(data=data, layout=layout)
plot(fig, filename = 'pfr_line.html', auto_open=False)
iplot(fig)

In [641]:
data = engineering.sort_values(by=['FacultyPhDRatio'])[::-1]

trace = Table(
    header=dict(values=['Institute Name', 'PhD Students/Factuly', 'Rank'],
                fill = dict(color='#C2D4FF'),
                align = ['left'] * 3),
                cells=dict(values=[data.index,
                                   data['FacultyPhDRatio'],
                                   np.arange(96) + 1],
                fill = dict(color='#F5F8FF'),
                align = ['left'] * 3))


data = [trace]
plot(data, filename = 'pfr_table.html', auto_open=False)
iplot(data)

In [591]:
engineering['PublicationFacultyRatio'] = engineering['NormalizedNumberOfPublication'] / engineering['TotalNumberOfFaculty']

data = engineering.sort_values(by=['PublicationFacultyRatio'])[-20:]

data = [Bar(y=data.index,
            x=data['PublicationFacultyRatio'],
            orientation = 'h')]

layout = Layout(
    title='Top 20 Engineering Institutes By Publication/Faculty Ratio',
    xaxis=dict(
        title='Publication/Factuly'
    ),
    margin=Margin(l=400, pad=10)
)

fig = Figure(data=data, layout=layout)
plot(fig, filename = 'pubfr.html', auto_open=False)
iplot(fig)

In [640]:
data = engineering.sort_values(by=['PublicationFacultyRatio'])[::-1]

trace = Table(
    header=dict(values=['Institute Name', 'Publication/Factulty', 'Rank'],
                fill = dict(color='#C2D4FF'),
                align = ['left'] * 3),
                cells=dict(values=[data.index,
                                   data['PublicationFacultyRatio'],
                                   np.arange(96) + 1],
                fill = dict(color='#F5F8FF'),
                align = ['left'] * 3))


data = [trace] 
plot(data, filename = 'pubfr_table.html', auto_open=False)
iplot(data)

In [593]:
engineering['PublicationPhDRatio'] = engineering['NormalizedNumberOfPublication'] / engineering['TotalPhDStudents']

data = engineering.sort_values(by=['PublicationPhDRatio'])
data = data[data['TotalPhDStudents'] != 0][-20:]

data = [Bar(y=data.index,
            x=data['PublicationPhDRatio'],
            orientation = 'h')]

layout = Layout(
    title='Top 20 Engineering Institutes By Publication/PhD Students Ratio',
    xaxis=dict(
        title='Publication/PhD Students'
    ),
    margin=Margin(l=400, pad=10)
)

fig = Figure(data=data, layout=layout)
plot(fig, filename = 'pubphdr.html', auto_open=False)
iplot(fig)

In [594]:
data = engineering.sort_values(by=['PublicationPhDRatio'])[::-1]
data = data[data['TotalPhDStudents'] != 0]

trace = Table(
    header=dict(values=['Institute Name', 'Publication/PhD Student', 'Rank'],
                fill = dict(color='#C2D4FF'),
                align = ['left'] * 3),
                cells=dict(values=[data.index,
                                   data['PublicationPhDRatio'],
                                   np.arange(96) + 1],
                fill = dict(color='#F5F8FF'),
                align = ['left'] * 3))


data = [trace] 
plot(data, filename = 'pubphdr_table.html', auto_open=False)
iplot(data)

In [595]:
min_max_scale = lambda x: (x - x.min())/(x.max() - x.min())
cumsum_av = lambda x: np.cumsum(x) / (np.arange(x.shape[0]) + 1)

## Calculating Ranking By Research Performance

$$RP = PU(35) + QP(40) + IPR(15) + FPPP(10)$$

$$PU = 35 × f(P/FRQ)$$

- P is weighted number of publications as acertained from suitable third party sources.
- FRQ is the maximun of nominal number of faculty members as calculated on the basis of a required FSR of 1:15 or the available faculty in the institution.

$$QP = 20 × f (CC/P) + 20× f (TOP25P/P)$$

- Here CC is Total Citation Count over previous three years.

- TOP25P: Number of citations in top 25 percentile averaged over the previous three years.

$$IPR = 10× f (PG) + 5 × f (PP)$$

- PG is the number of patents granted over the previous three years.
- PP: No. of patents published over the previous three years.

$$FPPP = 7.5 × f (RF) + 2.5 × f (CF)$$

- RF is average annual research funding earnings (amount actually received in rupees) per faculty at institute level in previous three years.
- CF is average annual consultancy amount (amount actually received in rupees) per faculty at institute level in previous three years.

In [596]:
eps = 1e-3

faculty_empowerment_function = lambda x: max(np.sign(6 - x - eps) * np.exp(-(x - 6) ** 2 / 15),
                                             np.sign(x + eps - 6) * np.exp(-(x - 6) ** 2 / 1000))
X = np.arange(0, 25, 0.1)

trace = Scatter(
    x = X,
    y = list(map(faculty_empowerment_function, X))
)

layout = Layout(
    title='Productivity of a faculty with number of PhD students',
    xaxis=dict(
        title='PhD Students/Faculty'
    ),
    yaxis=dict(
        title='Productivity'
    )
)

fig = Figure(data=[trace], layout=layout)
plot(fig, filename = 'fac_emp.html', auto_open=False)
iplot(fig)

In [597]:
engineering['NormalizedFacultyUnits'] = engineering['TotalNumberOfFaculty'] * list(map(faculty_empowerment_function, engineering['TotalPhDStudents'] / engineering['TotalNumberOfFaculty']))

In [642]:
pu_by_faculty = 35 * min_max_scale(engineering['NormalizedNumberOfPublication'] / engineering['TotalNumberOfFaculty'])
pu_by_phd = 35 * min_max_scale(engineering['NormalizedNumberOfPublication'] / engineering['TotalPhDStudents'])
pu_by_norm_fac =  35 * min_max_scale(engineering['NormalizedNumberOfPublication'] / engineering['NormalizedFacultyUnits'])
                                     
cc_qp = 20 * min_max_scale(engineering['NormalizedNumberOfCitations'] / engineering['NormalizedNumberOfPublication'])
top_qp = 20 * min_max_scale(engineering['NormalizedTopCitedPapers'] / engineering['NormalizedNumberOfPublication'])

ipg = 10 * min_max_scale(engineering['PatentsGranted'])
ipg_by_faculty = 10 * min_max_scale(engineering['PatentsGranted'] / engineering['TotalNumberOfFaculty'])
ipg_by_phd = 10 * min_max_scale(engineering['PatentsGranted'] / engineering['TotalPhDStudents'])
ipg_by_norm_fac = 10 * min_max_scale(engineering['PatentsGranted'] / engineering['NormalizedFacultyUnits'])

                                         
ipp = 5 * min_max_scale(engineering['PatentsPublished'])
ipp_by_faculty = 5 * min_max_scale(engineering['PatentsPublished'] / engineering['TotalNumberOfFaculty'])
ipp_by_phd = 5 * min_max_scale(engineering['PatentsPublished'] / engineering['TotalPhDStudents'])
ipp_by_norm_fac = 5 * min_max_scale(engineering['PatentsPublished'] / engineering['NormalizedFacultyUnits'])

                                         
fpr_by_faculty = 7.5 * min_max_scale(engineering['TotalSponsoredResearchProjectAmount'] / engineering['TotalNumberOfFaculty'])
fpr_by_phd = 7.5 * min_max_scale(engineering['TotalSponsoredResearchProjectAmount'] / engineering['TotalPhDStudents'])
fpr_by_norm_fac = 7.5 * min_max_scale(engineering['TotalSponsoredResearchProjectAmount'] / engineering['NormalizedFacultyUnits'])

                                         
fpc_by_faculty = 2.5 * min_max_scale(engineering['TotalConsultancyProjectsAmount'] / engineering['TotalNumberOfFaculty'])
fpc_by_phd = 2.5 * min_max_scale(engineering['TotalConsultancyProjectsAmount'] / engineering['TotalPhDStudents'])
fpc_by_norm_fac = 2.5 * min_max_scale(engineering['TotalConsultancyProjectsAmount'] / engineering['NormalizedFacultyUnits'])
                                         
engineering['rp'] = pu_by_faculty + cc_qp + top_qp + ipg_by_faculty + ipp_by_faculty + fpr + fpc
engineering['rp_by_faculty'] = pu_by_faculty + cc_qp + top_qp + ipg_by_faculty + ipp_by_faculty + fpr_by_faculty + fpc_by_faculty
engineering['rp_by_phd'] = pu_by_phd + cc_qp + top_qp + ipg_by_phd + ipp_by_phd + fpr_by_phd + fpc_by_phd
engineering['rp_by_norm_fac'] = pu_by_norm_fac + cc_qp + top_qp + ipg_by_norm_fac + ipp_by_norm_fac + fpr_by_norm_fac + fpc_by_norm_fac

In [643]:
data = engineering.sort_values(by=['rp'])

data = data[data['TotalNumberOfFaculty'] > 200]
data = data[data['TotalPhDStudents'] > 100]

data = data[-20:]

data = [Bar(y=data.index,
            x=data['rp'],
            orientation = 'h')]

layout = Layout(
    title='Top 20 Engineering Institutes By RP',
    xaxis=dict(
        title='PR'
    ),
    margin=Margin(l=400, pad=10)
)

fig = Figure(data=data, layout=layout)
plot(fig, filename = 'rp.html', auto_open=False)
iplot(fig)

In [644]:
data = engineering.sort_values(by=['rp'])[::-1]

data = data[data['TotalNumberOfFaculty'] > 200]
data = data[data['TotalPhDStudents'] > 100]

trace = Table(
    header=dict(values=['Institute Name', 'RP', 'Rank'],
                fill = dict(color='#C2D4FF'),
                align = ['left'] * 3),
                cells=dict(values=[data.index,
                                   data['rp'],
                                   np.arange(58) + 1],
                fill = dict(color='#F5F8FF'),
                align = ['left'] * 3))


data = [trace] 
plot(data, filename = 'rp_table.html', auto_open=False)
iplot(data)

In [601]:
data = engineering.sort_values(by=['rp_by_faculty'])

data = data[data['TotalNumberOfFaculty'] > 200]
data = data[data['TotalPhDStudents'] > 100]

data = data[-20:]

data = [Bar(y=data.index,
            x=data['rp_by_faculty'],
            orientation = 'h')]

layout = Layout(
    title='Top 20 Engineering Institutes By RP Normalized By Faculty',
    xaxis=dict(
        title='PR Normalized By Faculty'
    ),
    margin=Margin(l=400, pad=10)
)

fig = Figure(data=data, layout=layout)
plot(fig, filename = 'rp_by_fac.html', auto_open=False)
iplot(fig)

In [602]:
data = engineering.sort_values(by=['rp_by_faculty'])[::-1]

data = data[data['TotalNumberOfFaculty'] > 200]
data = data[data['TotalPhDStudents'] > 100]

trace = Table(
    header=dict(values=['Institute Name', 'RP (Faculty Normalized)', 'Rank'],
                fill = dict(color='#C2D4FF'),
                align = ['left'] * 3),
                cells=dict(values=[data.index,
                                   data['rp_by_faculty'],
                                   np.arange(58) + 1],
                fill = dict(color='#F5F8FF'),
                align = ['left'] * 3))


data = [trace] 
plot(data, filename = 'rp_by_fac_table.html', auto_open=False)
iplot(data)

In [603]:
data = engineering.sort_values(by=['rp_by_phd'])

data = data[data['TotalNumberOfFaculty'] > 200]
data = data[data['TotalPhDStudents'] > 100]

data = data[-20:]

data = [Bar(y=data.index,
            x=data['rp_by_phd'],
            orientation = 'h')]

layout = Layout(
    title='Top 20 Engineering Institutes By RP Normalized By PhD Students',
    xaxis=dict(
        title='PR Normalized By PhD Students'
    ),
    margin=Margin(l=400, pad=10)
)

fig = Figure(data=data, layout=layout)
plot(fig, filename = 'rp_by_phd.html', auto_open=False)
iplot(fig)

In [604]:
data = engineering.sort_values(by=['rp_by_phd'])[::-1]

data = data[data['TotalNumberOfFaculty'] > 200]
data = data[data['TotalPhDStudents'] > 100]

trace = Table(
    header=dict(values=['Institute Name', 'RP (PhD Student Normalized)', 'Rank'],
                fill = dict(color='#C2D4FF'),
                align = ['left'] * 3),
                cells=dict(values=[data.index,
                                   data['rp_by_phd'],
                                   np.arange(58) + 1],
                fill = dict(color='#F5F8FF'),
                align = ['left'] * 3))


data = [trace] 
plot(data, filename = 'rp_by_phd_table.html', auto_open=False)
iplot(data)

In [605]:
data = engineering.sort_values(by=['rp_by_norm_fac'])

data = data[data['TotalNumberOfFaculty'] > 200]
data = data[data['TotalPhDStudents'] > 100]

data = data[-20:]

data = [Bar(y=data.index,
            x=data['rp_by_norm_fac'],
            orientation = 'h')]

layout = Layout(
    title='Top 20 Engineering Institutes By RP Normalized With Faculty Productivity Function',
    xaxis=dict(
        title='Faculty Empowerment Function Normalized PR'
    ),
    margin=Margin(l=400, pad=10)
)

fig = Figure(data=data, layout=layout)
plot(fig, filename = 'rp_by_fac_prod.html', auto_open=False)
iplot(fig)

In [606]:
data = engineering.sort_values(by=['rp_by_norm_fac'])[::-1]

data = data[data['TotalNumberOfFaculty'] > 200]
data = data[data['TotalPhDStudents'] > 100]

trace = Table(
    header=dict(values=['Institute Name', 'RP (Faculty Productivity Normalized)', 'Rank'],
                fill = dict(color='#C2D4FF'),
                align = ['left'] * 3),
                cells=dict(values=[data.index,
                                   data['rp_by_norm_fac'],
                                   np.arange(58) + 1],
                fill = dict(color='#F5F8FF'),
                align = ['left'] * 3))


data = [trace] 
plot(data, filename = 'rp_by_norm_fac.html', auto_open=False)
iplot(data)

In [645]:
data = engineering[engineering['TotalNumberOfFaculty'] > 200]
data = data[data['TotalPhDStudents'] > 100]

data_by_rp = data.sort_values(by=['rp'])
data_by_rp_fac = data.sort_values(by=['rp_by_faculty'])
data_by_rp_phd = data.sort_values(by=['rp_by_phd'])
data_by_norm_fac = data.sort_values(by=['rp_by_norm_fac'])

trace1 = Scatter(
    y=data_by_rp['NormalizedNumberOfCitations'] / data_by_rp['NormalizedNumberOfPublication'],
    name='Sorted By RP',
    text=data_by_rp.index
)

trace2 = Scatter(
    y=data_by_rp_fac['NormalizedNumberOfCitations'] / data_by_rp_fac['NormalizedNumberOfPublication'],
    name='Sorted By Faculty Nomalized RP',
    text=data_by_rp_fac.index
)

trace3 = Scatter(
    y=data_by_rp_phd['NormalizedNumberOfCitations'] / data_by_rp_phd['NormalizedNumberOfPublication'],
    name='Sorted By PhD Student Nomalized RP',
    text=data_by_rp_phd.index
)

trace4 = Scatter(
    y=data_by_norm_fac['NormalizedNumberOfCitations'] / data_by_norm_fac['NormalizedNumberOfPublication'],
    name='Sorted By Productivity Nomalized RP',
    text=data_by_norm_fac.index
)


data = [trace1, trace2, trace3, trace4]

layout = Layout(
    title='Comparing Quality of Research When Ranked By Different Matics',
    xaxis=dict(
        title='Position (smallest score to largest)'
    ),
    yaxis=dict(
        title='Citations Per Paper'
    )
)


fig = Figure(data=data, layout=layout)
plot(fig, filename = 'rp_comp.html', auto_open=False)
iplot(fig)

In [646]:
data = engineering[engineering['TotalNumberOfFaculty'] > 200]
data = data[data['TotalPhDStudents'] > 100]

trace1 = Scatter3d(
    x = data['TotalNumberOfFaculty'],
    y = data['TotalPhDStudents'],
    z = data['NormalizedNumberOfPublication'] / data['TotalNumberOfFaculty'],
    mode='markers',
    marker=dict(
        size='16',
        color = data['rp_by_faculty'], #set color equal to a variable
        colorscale='Viridis',
        showscale=True
    ),
    text = data.index
)


layout = Layout(
    title='Understanding Decideing Factors in RP',
    scene = dict(xaxis=dict(
        title='Total Number of Faculty'
    ),
    yaxis=dict(
        title='Total Number of PhD Students'
    ),
    zaxis=dict(
        title='Publications per Faculty'
    ))
)
data = [trace1]

fig = Figure(data=data, layout=layout)
plot(fig, filename = 'und_rp.html', auto_open=False)
iplot(fig)

In [609]:
data = engineering[engineering['TotalNumberOfFaculty'] > 200]
data = data[data['TotalPhDStudents'] > 100]

trace1 = Scatter3d(
    x = data['TotalNumberOfFaculty'],
    y = data['TotalPhDStudents'],
    z = data['NormalizedNumberOfPublication'] / data['NormalizedFacultyUnits'],
    mode='markers',
    marker=dict(
        size='16',
        color = data['rp_by_norm_fac'], #set color equal to a variable
        colorscale='Viridis',
        showscale=True
    ),
    text = data.index
)


layout = Layout(
    title='Understanding Decideing Factors in Faculty Productivity Normalized RP',
    scene = dict(xaxis=dict(
        title='Total Number of Faculty'
    ),
    yaxis=dict(
        title='Total Number of PhD Students'
    ),
    zaxis=dict(
        title='Publications per Faculty Unit'
    ))
)
data = [trace1]

fig = Figure(data=data, layout=layout)
plot(fig, filename = 'und_rp_form_fac.html', auto_open=False)
iplot(fig)

In [610]:
data = engineering[engineering['TotalNumberOfFaculty'] > 200]
data = data[data['TotalPhDStudents'] > 100]

trace1 = Scatter(
    x = data['TotalNumberOfFaculty'],
    y = data['TotalPhDStudents'],
    mode='markers',
    marker=dict(
        size='16',
        color = data['rp_by_norm_fac'], #set color equal to a variable
        colorscale='Viridis',
        showscale=True
    ),
    text = [f"{index} ({row['rp_by_norm_fac']})" for index, row in data.iterrows()],
    showlegend = False
)

X = np.arange(0, 1000, 5)

trace2 = Scatter(
    x = X,
    y = 3 * X,
    mode ='lines',
    showlegend = False,
    hoverinfo='none'
)

data = [trace1, trace2]

layout = Layout(
    title='RP Normalized By Faculty Productivity',
    margin=Margin(pad=20),
    xaxis=dict(
        title='Total Number of Faculty'
    ),
    yaxis=dict(
        title='Total Number of PhD Students'
    )
)

fig = Figure(data=data, layout=layout)
plot(fig, filename = 'rp_form_fac_scat.html', auto_open=False)
iplot(fig)

In [611]:
data = engineering[engineering['TotalNumberOfFaculty'] > 200]
data = data[data['TotalPhDStudents'] > 100]

trace1 = Scatter(
    x = data['NormalizedNumberOfPublication'],
    y = data['NormalizedNumberOfCitations'],
    mode='markers',
    marker=dict(
        size='16',
        color = data['NormalizedNumberOfCitations'] / data['NormalizedNumberOfPublication'], #set color equal to a variable
        colorscale='Viridis',
        showscale=True
    ),
    text = [f"{index} ({row['NormalizedNumberOfCitations'] / row['NormalizedNumberOfPublication']})"
            for index, row in data.iterrows()],
    showlegend = False
)

X = np.arange(0, 4500, 5)

trace2 = Scatter(
    x = X,
    y = 3 * X,
    mode ='lines',
    showlegend = False,
    hoverinfo='none'
)

data = [trace1, trace2]

layout = Layout(
    title='Average Number of Citations Per Paper',
    margin=Margin(pad=10),
    xaxis=dict(
        title='Total Number of Publcations'
    ),
    yaxis=dict(
        title='Total Number of Citations'
    )
)

fig = Figure(data=data, layout=layout)
plot(fig, filename = 'citation.html', auto_open=False)
iplot(fig)

In [612]:
data = engineering[engineering['TotalNumberOfFaculty'] > 200]
data = data[data['TotalPhDStudents'] > 100]

trace1 = Scatter(
    x = data['rp_by_norm_fac'],
    y = data['NormalizedNumberOfCitations'] / data['NormalizedNumberOfPublication'],
    mode='markers',
    marker=dict(
        size='16',
        color = data['FacultyPhDRatio'], #set color equal to a variable
        colorscale='Viridis',
        showscale=True
    ),
    text = [f"{index} ({row['FacultyPhDRatio']})"
            for index, row in data.iterrows()],
    showlegend = False
)

data = [trace1]

layout = Layout(
    title='PhD Students per Faculty Across RP and Citations per Paper',
    margin=Margin(pad=10),
    xaxis=dict(
        title='RP Normalized By Faculty Productivity'
    ),
    yaxis=dict(
        title='Citations per Paper'
    )
)

fig = Figure(data=data, layout=layout)
plot(fig, filename = 'citation_pfr.html', auto_open=False)
iplot(fig)

In [613]:
data = engineering.sort_values(by=['TotalSponsoredResearchProjectAmount'])[-20:]

trace1 = Bar(
    y=data.index,
    x=list(map(lambda x: x['2014-15'], data['SponsoredResearchProjectAmount'])),
    name='2014-15',
    orientation = 'h'
)

trace2 = Bar(
    y=data.index,
    x=list(map(lambda x: x['2015-16'], data['SponsoredResearchProjectAmount'])),
    name='2015-16',
    orientation = 'h'
)


trace3 = Bar(
    y=data.index,
    x=list(map(lambda x: x['2016-17'], data['SponsoredResearchProjectAmount'])),
    name='2016-17',
    orientation = 'h'
)

data = [trace1, trace2, trace3]

layout = Layout(
    title='Top 20 Engineering Institutes By Sponsored Projects Research',
    margin=Margin(l=400, pad=10),
    barmode='group'
)


fig = Figure(data=data, layout=layout)
plot(fig, filename = 'res_funding_bar.html', auto_open=False)
iplot(fig)

In [614]:
engineering['srpa_by_phd'] = engineering['TotalSponsoredResearchProjectAmount'] / engineering['TotalPhDStudents']

data = engineering[engineering['TotalNumberOfFaculty'] > 200]
data = data[data['TotalPhDStudents'] > 100]

trace1 = Scatter(
    x = data['TotalNumberOfFaculty'],
    y = data['TotalPhDStudents'],
    mode='markers',
    marker=dict(
        size='16',
        color = data['srpa_by_phd'], #set color equal to a variable
        colorscale='Viridis',
        showscale=True
    ),
    text = [f"{index} ({ballpark(row['srpa_by_phd'])})" for index, row in data.iterrows()],
    showlegend = False
)

X = np.arange(0, 1000, 5)

trace2 = Scatter(
    x = X,
    y = 3 * X,
    mode ='lines',
    showlegend = False,
    hoverinfo='none'
)

data = [trace1, trace2]

layout = Layout(
    title='Total Amount of Sponsored Projects Per PhD Student Over Last 3 Years',
    margin=Margin(pad=20),
    xaxis=dict(
        title='Total Number of Faculty'
    ),
    yaxis=dict(
        title='Total Number of PhD Students'
    )
)

fig = Figure(data=data, layout=layout)
plot(fig, filename = 'srpa_by_phd_scatter.html', auto_open=False)
iplot(fig)

In [647]:
data = engineering[engineering['TotalNumberOfFaculty'] > 200]
data = data[data['TotalPhDStudents'] > 100]

trace1 = Scatter(
    x = data['rp_by_norm_fac'],
    y = data['PerceptionScore'],
    mode='markers',
    marker=dict(
        size='16',
        color = data['srpa_by_phd'], #set color equal to a variable
        colorscale='Viridis',
        showscale=True
    ),
    text = [f"{index} ({ballpark(row['srpa_by_phd'])})" for index, row in data.iterrows()],
    showlegend = False
)

data = [trace1]

layout = Layout(
    title='Total Amount of Sponsored Projects Per PhD Student Over Last 3 Years',
    margin=Margin(pad=20),
    xaxis=dict(
        title='Faculty Productivity Normalized RP'
    ),
    yaxis=dict(
        title='Perception Score'
    )
)

fig = Figure(data=data, layout=layout)
plot(fig, filename = 'srpa_phd_rp_scatter.html', auto_open=False)
iplot(fig)

In [615]:
data = engineering.sort_values(by=['srpa_by_phd'])[::-1]

trace = Table(
    header=dict(values=['Institute Name', 'Total Amount of Sponsored Projects Per PhD Student Over Last 3 Years', 'Rank'],
                fill = dict(color='#C2D4FF'),
                align = ['left'] * 3),
                cells=dict(values=[data.index,
                                   ballpark(data['srpa_by_phd']),
                                   np.arange(96) + 1],
                fill = dict(color='#F5F8FF'),
                align = ['left'] * 3))


data = [trace] 
plot(data, filename = 'srpa_by_phd_table.html', auto_open=False)
iplot(data)

In [616]:
data = engineering.sort_values(by=['TotalConsultancyProjectsAmount'])[-20:]

trace1 = Bar(
    y=data.index,
    x=list(map(lambda x: x['2014-15'], data['ConsultancyProjectsAmount'])),
    name='2014-15',
    orientation = 'h'
)

trace2 = Bar(
    y=data.index,
    x=list(map(lambda x: x['2015-16'], data['ConsultancyProjectsAmount'])),
    name='2015-16',
    orientation = 'h'
)


trace3 = Bar(
    y=data.index,
    x=list(map(lambda x: x['2016-17'], data['ConsultancyProjectsAmount'])),
    name='2016-17',
    orientation = 'h'
)

data = [trace1, trace2, trace3]

layout = Layout(
    title='Top 20 Engineering Institutes By Consultancy Projects Research',
    margin=Margin(l=400, pad=10),
    barmode='group'
)


fig = Figure(data=data, layout=layout)
plot(fig, filename = 'cpa_bar.html', auto_open=False)
iplot(fig)

In [617]:
engineering['cpa_by_phd'] = engineering['TotalConsultancyProjectsAmount'] / engineering['TotalPhDStudents']

data = engineering[engineering['TotalNumberOfFaculty'] > 200]
data = data[data['TotalPhDStudents'] > 100]

trace1 = Scatter(
    x = data['TotalNumberOfFaculty'],
    y = data['TotalPhDStudents'],
    mode='markers',
    marker=dict(
        size='16',
        color = data['cpa_by_phd'], #set color equal to a variable
        colorscale='Viridis',
        showscale=True
    ),
    text = [f"{index} ({ballpark(row['cpa_by_phd'])})" for index, row in data.iterrows()],
    showlegend = False
)

X = np.arange(0, 1000, 5)

trace2 = Scatter(
    x = X,
    y = 3 * X,
    mode ='lines',
    showlegend = False,
    hoverinfo='none'
)

data = [trace1, trace2]

layout = Layout(
    title='Total Amount of Consultancy Projects Per PhD Student Over Last 3 Years',
    margin=Margin(pad=20),
    xaxis=dict(
        title='Total Number of Faculty'
    ),
    yaxis=dict(
        title='Total Number of PhD Students'
    )
)

fig = Figure(data=data, layout=layout)
plot(fig, filename = 'cpa_by_phd_scatter.html', auto_open=False)
iplot(fig)

In [618]:
data = engineering.sort_values(by=['cpa_by_phd'])[::-1]

trace = Table(
    header=dict(values=['Institute Name', 'Total Amount of Consultancy Projects Per PhD Student Over Last 3 Years', 'Rank'],
                fill = dict(color='#C2D4FF'),
                align = ['left'] * 3),
                cells=dict(values=[data.index,
                                   ballpark(data['cpa_by_phd']),
                                   np.arange(96) + 1],
                fill = dict(color='#F5F8FF'),
                align = ['left'] * 3))


data = [trace] 
plot(data, filename = 'cpa_by_phd_table.html', auto_open=False)
iplot(data)

In [619]:
engineering['UGAvMedianSalary'] = (seq(list(engineering['MedianSalary']))
                                    .map(lambda x: x.items())
                                    .map(lambda x: filter(lambda y: y[0].startswith("UG"), x))
                                    .map(lambda x: map(lambda y: y[1], x))
                                    .map(list)
                                    .map(np.mean)).to_list()

engineering['PGAvMedianSalary'] = (seq(list(engineering['MedianSalary']))
                                    .map(lambda x: x.items())
                                    .map(lambda x: filter(lambda y: y[0].startswith("PG"), x))
                                    .map(lambda x: map(lambda y: y[1], x))
                                    .map(list)
                                    .map(np.mean)).to_list()


Mean of empty slice.


invalid value encountered in double_scalars



In [620]:
data = engineering[pd.notnull(engineering['UGAvMedianSalary'])]
data = data.sort_values(by=['UGAvMedianSalary'])[-20:]

data = [Bar(y=data.index,
            x=data['UGAvMedianSalary'],
            orientation = 'h')]

layout = Layout(
    title='Top 20 Engineering Institutes By Median Undegraduate Salary',
    xaxis=dict(
        title='Salary in Rs. per anum'
    ),
    margin=Margin(l=400, pad=10)
)

fig = Figure(data=data, layout=layout)
plot(fig, filename = 'ug_salary.html', auto_open=False)
iplot(fig)

In [621]:
data = engineering.sort_values(by=['UGAvMedianSalary'])[::-1]
data = data[pd.notnull(data['UGAvMedianSalary'])]

trace = Table(
    header=dict(values=['Institute Name', 'Median UG Salary', 'Rank'],
                fill = dict(color='#C2D4FF'),
                align = ['left'] * 3),
                cells=dict(values=[data.index,
                                   ballpark(data['UGAvMedianSalary']),
                                   np.arange(96) + 1],
                fill = dict(color='#F5F8FF'),
                align = ['left'] * 3))


data = [trace] 
plot(data, filename = 'ug_salary_table.html', auto_open=False)
iplot(data)

In [622]:
data = engineering[pd.notnull(engineering['PGAvMedianSalary'])]
data = data.sort_values(by=['PGAvMedianSalary'])[-20:]

data = [Bar(y=data.index,
            x=data['PGAvMedianSalary'],
            orientation = 'h')]

layout = Layout(
    title='Top 20 Engineering Institutes By Median Post-Graduate Salary',
    xaxis=dict(
        title='Salary in Rs. per anum'
    ),
    margin=Margin(l=400, pad=10)
)

fig = Figure(data=data, layout=layout)
plot(fig, filename = 'pg_salary.html', auto_open=False)
iplot(fig)

In [623]:
data = engineering.sort_values(by=['PGAvMedianSalary'])[::-1]
data = data[pd.notnull(data['PGAvMedianSalary'])]

trace = Table(
    header=dict(values=['Institute Name', 'Median PG Salary', 'Rank'],
                fill = dict(color='#C2D4FF'),
                align = ['left'] * 3),
                cells=dict(values=[data.index,
                                   ballpark(data['PGAvMedianSalary']),
                                   np.arange(96) + 1],
                fill = dict(color='#F5F8FF'),
                align = ['left'] * 3))


data = [trace] 
plot(data, filename = 'pg_salary_table.html', auto_open=False)
iplot(data)

In [624]:
data = engineering.sort_values(by=['PerceptionScore'])[-20:]

data = [Bar(y=data.index,
            x=data['PerceptionScore'],
            orientation = 'h')]

layout = Layout(
    title='Top 20 Engineering Institutes By Perception Score',
    xaxis=dict(
        title='Perception Score'
    ),
    margin=Margin(l=400, pad=10)
)

fig = Figure(data=data, layout=layout)
plot(fig, filename = 'ps.html', auto_open=False)
iplot(fig)

In [625]:
data = engineering.sort_values(by=['PerceptionScore'])[::-1]

trace = Table(
    header=dict(values=['Institute Name', 'Perception Score', 'Rank'],
                fill = dict(color='#C2D4FF'),
                align = ['left'] * 3),
                cells=dict(values=[data.index,
                                   data['PerceptionScore'],
                                   np.arange(96) + 1],
                fill = dict(color='#F5F8FF'),
                align = ['left'] * 3))


data = [trace] 
plot(data, filename = 'ps_table.html', auto_open=False)
iplot(data)

In [626]:
data = engineering[engineering['TotalNumberOfFaculty'] > 200]
data = data[data['TotalPhDStudents'] > 100]

trace1 = Scatter(
    x = data['UGAvMedianSalary'],
    y = data['rp_by_norm_fac'],
    mode='markers',
    marker=dict(
        size='16',
        color = data['PerceptionScore'], #set color equal to a variable
        colorscale='Viridis',
        showscale=True
    ),
    text = [f"{index} ({row['PerceptionScore']})" for index, row in data.iterrows()],
    showlegend = False
)

layout = Layout(
    title='UG Median Salary vs Research Score vs Perception Score',
    margin=Margin(pad=10),
    yaxis=dict(
        title='Research Score'
    ),
    xaxis=dict(
        title='UG Median Salary'
    )
)

fig = Figure(data=[trace1], layout=layout)
plot(fig, filename = 'inti_overall_percep.html', auto_open=False)
iplot(fig)

In [627]:
engineering['SexRatio'] = engineering['TotalMaleStudents'] / engineering['TotalFemaleStudents']

In [636]:
data = engineering[engineering['TotalNumberOfFaculty'] > 200]
data = data[data['TotalPhDStudents'] > 100]

trace1 = Scatter(
    x = data['UGAvMedianSalary'],
    y = data['rp_by_norm_fac'],
    mode='markers',
    marker=dict(
        size='16',
        color = data['SexRatio'], #set color equal to a variable
        colorscale='Viridis',
        showscale=True
    ),
    text = [f"{index} ({row['SexRatio']})" for index, row in data.iterrows()],
    showlegend = False
)

layout = Layout(
    title='UG Median Salary vs Research Score vs Sex Ratio',
    margin=Margin(pad=10),
    yaxis=dict(
        title='Research Score'
    ),
    xaxis=dict(
        title='UG Median Salary'
    )
)

fig = Figure(data=[trace1], layout=layout)
plot(fig, filename = 'sex_ratio_overall.html', auto_open=False)
iplot(fig)

In [629]:
engineering['gov_schol_ratio'] = engineering['TotalStudentsWithGovernmentScholarships'] / engineering['TotalStudents']
engineering['insti_schol_ratio'] = engineering['TotalStudentsWithInstituteScholarships'] / engineering['TotalStudents']
engineering['pri_schol_ratio'] = engineering['TotalStudentsWithPrivateScholarships'] / engineering['TotalStudents']
engineering['no_schol_ratio'] = engineering['TotalStudentsWithNoScholarships'] / engineering['TotalStudents']

In [630]:
data = engineering.sort_values(by=['gov_schol_ratio'])[-20:]

data = [Bar(y=data.index,
            x=data['gov_schol_ratio'],
            orientation = 'h')]

layout = Layout(
    title='Govremnet Scholarship Ratio',
    xaxis=dict(
        title='Fraction of Students Receiving Govrenment Scholarships'
    ),
    margin=Margin(l=400, pad=10)
)

fig = Figure(data=data, layout=layout)
plot(fig, filename = 'gov_schol.html', auto_open=False)
iplot(fig)

In [631]:
data = engineering.sort_values(by=['gov_schol_ratio'])[::-1]

trace = Table(
    header=dict(values=['Institute Name', 'Fraction of Students Receiving Government Scholarships', 'Rank'],
                fill = dict(color='#C2D4FF'),
                align = ['left'] * 3),
                cells=dict(values=[data.index,
                                   data['gov_schol_ratio'],
                                   np.arange(96) + 1],
                fill = dict(color='#F5F8FF'),
                align = ['left'] * 3))


data = [trace] 
plot(data, filename = 'gov_schol_table.html', auto_open=False)
iplot(data)

In [639]:
data = engineering.sort_values(by=['insti_schol_ratio'])[-20:]

data = [Bar(y=data.index,
            x=data['insti_schol_ratio'],
            orientation = 'h')]

layout = Layout(
    title='Institute Scholarship Ratio',
    xaxis=dict(
        title='Fraction of Students Receiving Institute Scholarships'
    ),
    margin=Margin(l=400, pad=10)
)

fig = Figure(data=data, layout=layout)
plot(fig, filename = 'insti_schol.html', auto_open=False)
iplot(fig)

In [633]:
data = engineering.sort_values(by=['insti_schol_ratio'])[::-1]

trace = Table(
    header=dict(values=['Institute Name', 'Fraction of Students Receiving Institute Scholarships', 'Rank'],
                fill = dict(color='#C2D4FF'),
                align = ['left'] * 3),
                cells=dict(values=[data.index,
                                   data['insti_schol_ratio'],
                                   np.arange(96) + 1],
                fill = dict(color='#F5F8FF'),
                align = ['left'] * 3))


data = [trace] 
plot(data, filename = 'insti_schol_table.html', auto_open=False)
iplot(data)

In [634]:
data = engineering[engineering['TotalNumberOfFaculty'] > 200]
data = data[data['TotalPhDStudents'] > 100]

trace1 = Scatter(
    x = data['UGAvMedianSalary'],
    y = data['rp_by_norm_fac'],
    mode='markers',
    marker=dict(
        size='16',
        color = data['gov_schol_ratio'], #set color equal to a variable
        colorscale='Viridis',
        showscale=True
    ),
    text = [f"{index} ({row['gov_schol_ratio']})" for index, row in data.iterrows()],
    showlegend = False
)

layout = Layout(
    title='UG Median Salary vs Research Score vs Government Scholorship ratio',
    margin=Margin(pad=10),
    yaxis=dict(
        title='Research Score'
    ),
    xaxis=dict(
        title='UG Median Salary'
    )
)

fig = Figure(data=[trace1], layout=layout)
plot(fig, filename='gov_schol_scatter.html', auto_open=False)
iplot(fig)

In [635]:
data = engineering[engineering['TotalNumberOfFaculty'] > 200]
data = data[data['TotalPhDStudents'] > 100]

trace1 = Scatter(
    x = data['UGAvMedianSalary'],
    y = data['rp_by_norm_fac'],
    mode='markers',
    marker=dict(
        size='16',
        color = data['insti_schol_ratio'], #set color equal to a variable
        colorscale='Viridis',
        showscale=True
    ),
    text = [f"{index} ({row['insti_schol_ratio']})" for index, row in data.iterrows()],
    showlegend = False
)

layout = Layout(
    title='UG Median Salary vs Research Score vs Institute Scholorship ratio',
    margin=Margin(pad=10),
    yaxis=dict(
        title='Research Score'
    ),
    xaxis=dict(
        title='UG Median Salary'
    )
)

fig = Figure(data=[trace1], layout=layout)
plot(fig, filename='insti_schol_scatter.html', auto_open=False)
iplot(fig)