# Imports

In [1]:
import os
import numpy as np
import pandas as pd

# Constants

In [2]:
PROCESSED_FILE_DIRECTORY = r"..\data\excel"

REPORT_FILE_DIRECTORY = r"..\data\report"

# Get Files

In [3]:
files = os.listdir(PROCESSED_FILE_DIRECTORY)

files = [f for f in files if os.path.isfile(PROCESSED_FILE_DIRECTORY + '/' + f)]

# Data Processing

## Default Table Schema

In [4]:
def get_empty_dataframe(file, sheet_name):
    if sheet_name == "0":
        data_df = pd.DataFrame({'2021-22': [0],
                                            '2020-21': [0],
                                            '2019-20': [0],
                                            '2018-19': [0],
                                            '2017-18': [0],
                                            '2016-17': [0],
                                            'Institution': file.split('.')[0]
                                           })
    elif sheet_name == "1":
        data_df = pd.DataFrame({'Total Students': [0], 
                                            'Institution': file.split('.')[0]                    
                    })
    elif sheet_name == "2":
        data_df = pd.DataFrame({'Median salary UG': [0], 
                                            'Institution': file.split('.')[0]                    
                    })
    elif sheet_name == "3":
        data_df = pd.DataFrame({'Median salary PG': [0], 
                                            'Institution': file.split('.')[0]                    
                    })   
    elif sheet_name == "4":
        data_df = pd.DataFrame({'Total': [0], 
                                            'Institution': file.split('.')[0]
                    })    
    elif sheet_name == "5":
        data_df = pd.DataFrame({
                        '2021-22': [0],
                        '2020-21': [0],
                        '2019-20': [0],
                        'Institution': file.split('.')[0],
                        'Capex1': [0],
                        'Capex2': [0],
                        'Capex3': [0]
                    }) 
    elif sheet_name == "6":
        data_df = pd.DataFrame({
                        '2021-22': [0],
                        '2020-21': [0],
                        '2019-20': [0],
                        'Institution': file.split('.')[0],
                        'Opex1': [0],
                        'Opex2': [0],
                        'Opex3': [0]
                    })
    elif sheet_name == "7":
        data_df = pd.DataFrame({
                        'Sponsored Average': [0], 
                        'Institution': file.split('.')[0]
                    })  
    elif sheet_name == "8":
        data_df = pd.DataFrame({
                        'Consultancy Average': [0], 
                        'Institution': file.split('.')[0]
                    })    
    elif sheet_name == "10":
        data_df = pd.DataFrame({
                        'Answer': [None], 
                        'Institution': file.split('.')[0]
                    })        
    return data_df

## Sheet 0

In [5]:
def process_sheet0():
    nirf_df = pd.DataFrame()
    
    try:
        for file in files:
            if "1" in pd.ExcelFile(PROCESSED_FILE_DIRECTORY + '\\' + file).sheet_names:            
                #print(f"Processing the sheet 0 of the file: {file}")

                excel_file = PROCESSED_FILE_DIRECTORY + '\\' + file
                data_df = pd.read_excel(excel_file, sheet_name="0")
                
                data_df = data_df.iloc[:, 1:]
                data_df.replace('-', 0, inplace=True)                

                data_df['2019-20'] = data_df['2019-20'].astype(np.int64)
                data_df['2018-19'] = data_df['2018-19'].astype(np.int64)
                data_df['2017-18'] = data_df['2017-18'].astype(np.int64)
                data_df['2016-17'] = data_df['2016-17'].astype(np.int64)

                data_df['Total'] = data_df['2021-22'] + data_df['2020-21'] + data_df['2019-20'] + \
                data_df['2018-19'] + data_df['2017-18'] + data_df['2016-17']    
            else:
                data_df = get_empty_dataframe(file, "0")
            
            data_df.fillna(0, inplace=True)
            nirf_df = pd.concat([nirf_df, data_df])           
    except Exception as ex:
        print(f"Error while processing the sheet 0 of the file: {file}")
        print(str(ex))
        nirf_df = get_empty_dataframe(file, "0")
    finally:
        nirf_df = nirf_df.groupby('Institution').sum('Total')[['Total']]
        nirf_df = nirf_df.transpose()
        nirf_df.rename(index={'Total': 'Total sanctioned approved intake for all programs'}, inplace=True)    

    return nirf_df

### Sheet 1

In [6]:
def process_sheet1():
    nirf_df = pd.DataFrame()
    
    try:
        for file in files:
            if "1" in pd.ExcelFile(PROCESSED_FILE_DIRECTORY + '\\' + file).sheet_names:
                #print(f"Processing the sheet 1 of the file: {file}")
                excel_file = PROCESSED_FILE_DIRECTORY + '\\' + file
                data_df = pd.read_excel(excel_file, sheet_name="1")

                data_df = data_df[['Total Students', 'Institution']]
            else:
                data_df = get_empty_dataframe(file, "1")
                
            data_df.fillna(0, inplace=True)        
            nirf_df = pd.concat([nirf_df, data_df])                        
    except Exception as ex:
        print(f"Error while processing the sheet 1 of the file: {file}")
        print(str(ex))
        nirf_df = get_empty_dataframe(file, "1") 
    finally:                
        nirf_df = nirf_df.groupby('Institution').sum('Total Students')
        nirf_df = nirf_df.transpose()
        nirf_df.rename(index={'Total Students': 'Total number of students enrolled in all programs'}, inplace=True)        
    
    return nirf_df

In [7]:
def process_sheet1_1():
    nirf_df = pd.DataFrame()
    
    try:
        for file in files:
            if "1" in pd.ExcelFile(PROCESSED_FILE_DIRECTORY + '\\' + file).sheet_names:
                #print(f"Processing the sheet 1 of the file: {file}")
                excel_file = PROCESSED_FILE_DIRECTORY + '\\' + file
                data_df = pd.read_excel(excel_file, sheet_name="1")

                data_df = data_df.iloc[:, [7, 8, 13]]
                
                data_df['Total Students'] = data_df['Economically\rBackward\r(Including male\r& female)'] + \
                data_df['Socially\rChallenged\r(SC+ST+OBC\rIncluding male\r& female)']
            else:
                data_df = get_empty_dataframe(file)
                
            data_df.fillna(0, inplace=True)        
            nirf_df = pd.concat([nirf_df, data_df])                        
    except Exception as ex:
        print(f"Error while processing the sheet 1 of the file: {file}")
        print(str(ex))
        nirf_df = get_empty_dataframe(file) 
    finally:                
        nirf_df = nirf_df.groupby('Institution').sum('Total Students')
        nirf_df = nirf_df.transpose()
        nirf_df = nirf_df.iloc[[-1], :]
        nirf_df.rename(index={'Total Students': 'Total number of economically socially challenged \
        students enrolled in all programs'}, inplace=True)        
    
    return nirf_df

In [8]:
def process_sheet1_2():
    nirf_df = pd.DataFrame()
    
    try:
        for file in files:
            if "1" in pd.ExcelFile(PROCESSED_FILE_DIRECTORY + '\\' + file).sheet_names:
                #print(f"Processing the sheet 1 of the file: {file}")
                excel_file = PROCESSED_FILE_DIRECTORY + '\\' + file
                data_df = pd.read_excel(excel_file, sheet_name="1")

                data_df = data_df.iloc[:, [9, 10, 11, 13]]
            else:
                data_df = get_empty_dataframe(file)
                
            data_df.fillna(0, inplace=True)        
            nirf_df = pd.concat([nirf_df, data_df])                        
    except Exception as ex:
        print(f"Error while processing the sheet 1 of the file: {file}")
        print(str(ex))
        nirf_df = get_empty_dataframe(file) 
    finally:                
        nirf_df = nirf_df.groupby('Institution').sum()
        nirf_df = nirf_df.transpose()
        #nirf_df.rename(index={'Total Students': 'Total number of economically socially challenged \
        #students enrolled in all programs'}, inplace=True)        
    
    return nirf_df

In [22]:
process_sheet1_2()

Institution,IR-P-C-10250,IR-P-C-10283,IR-P-C-11091,IR-P-C-18900,IR-P-C-19322,IR-P-C-19611,IR-P-C-19613,IR-P-C-24007,IR-P-C-24505,IR-P-C-30768,...,IR-P-U-0536,IR-P-U-0541,IR-P-U-0562,IR-P-U-0575,IR-P-U-0643,IR-P-U-0724,IR-P-U-0747,IR-P-U-0811,IR-P-U-0853,IR-P-U-0938
No. of students\rreceiving full\rtuition fee\rreimbursement\rfrom the State\rand Central\rGovernment,116,53,85,132,13,95,117,263,0,46,...,12,7,62,59,111,265,6,45,58,0
No. of students\rreceiving full\rtuition fee\rreimbursement\rfrom Institution\rFunds,56,30,0,32,5,45,32,13,1,58,...,0,2,5,127,21,0,102,0,154,8
No. of students\rreceiving full\rtuition fee\rreimbursement\rfrom the Private\rBodies,18,0,0,13,0,15,4,2,1,0,...,0,0,1,11,6,3,2,0,1,0


## Sheet 2

In [9]:
def process_sheet2():
    nirf_df = pd.DataFrame()
    
    try:
        for file in files:
            try:
                if "2" in pd.ExcelFile(PROCESSED_FILE_DIRECTORY + '\\' + file).sheet_names:
                    #print(f"Processing the sheet 2 of the file: {file}")
                    excel_file = PROCESSED_FILE_DIRECTORY + '\\' + file
                    data_df = pd.read_excel(excel_file, sheet_name="2")
                    
                    columns = data_df.columns.to_list()
                    median_index = 0
                    institute_index = 0
                    
                    for index, element in enumerate(columns):
                        if element.split()[0] == 'Median':
                            median_index = index                            
                        elif element.split()[0] == 'Institution':
                            institute_index = index

                    data_df = pd.DataFrame({
                        'Median salary UG': data_df.iloc[:, median_index].values,
                        'Institution': data_df.iloc[:, institute_index].values
                    })

                    data_df['Median salary UG'] = data_df['Median salary UG'].str.split(r"(").str[0].astype(np.float64)
                else:
                    data_df = get_empty_dataframe(file, "2")
            except Exception as ex:
                data_df = get_empty_dataframe(file, "2")
            finally:  
                data_df.fillna(0, inplace=True)
                nirf_df = pd.concat([nirf_df, data_df])           
    except Exception as ex:
        print(f"Error while processing the sheet 2 of the file: {file}")
        print(str(ex))
        nirf_df = get_empty_dataframe(file, "2")
    finally:            
        nirf_df = nirf_df.groupby('Institution').mean('Median salary UG')
        nirf_df = nirf_df.transpose()           
    
    return nirf_df

## Sheet 3

In [10]:
def process_sheet3():
    nirf_df = pd.DataFrame()
    
    try:
        for file in files:
            try:
                if "3" in pd.ExcelFile(PROCESSED_FILE_DIRECTORY + '\\' + file).sheet_names:
                    #print(f"Processing the sheet 2 of the file: {file}")

                    excel_file = PROCESSED_FILE_DIRECTORY + '\\' + file
                    data_df = pd.read_excel(excel_file, sheet_name="3")

                    columns = data_df.columns.to_list()
                    median_index = 0
                    institute_index = 0

                    for index, element in enumerate(columns):
                        if element.split()[0] == 'Median':
                            median_index = index
                        elif element.split()[0] == 'Institution':
                            institute_index = index

                    data_df = pd.DataFrame({
                                'Median salary PG': data_df.iloc[:, median_index].values,
                                'Institution': data_df.iloc[:, institute_index].values
                            })

                    data_df['Median salary PG'] = data_df['Median salary PG'].str.split(r"(").str[0].astype(np.float64)
                else:
                    data_df = get_empty_dataframe(file, "3")
            except Exception as ex:
                print(f"Error while processing the sheet 3 of the file: {file}")
                print(str(ex))
                data_df = get_empty_dataframe(file, "3")
            finally:    
                data_df.fillna(0, inplace=True)
                nirf_df = pd.concat([nirf_df, data_df])
    except Exception as ex:
        print(f"Error while processing the sheet 3 of the file: {file}")
        print(str(ex))
        data_df = get_empty_dataframe(file, "3")        
    finally:
        nirf_df.fillna(0, inplace=True)
        nirf_df = nirf_df.groupby('Institution').mean('Median salary PG')
        nirf_df = nirf_df.transpose() 
    
    return nirf_df

## Sheet 4

In [11]:
def process_sheet4():
    nirf_df = pd.DataFrame()
    
    try:    
        for file in files:
            try:
                if "4" in pd.ExcelFile(PROCESSED_FILE_DIRECTORY + '\\' + file).sheet_names:
                    #print(f"Processing the sheet 4 of the file: {file}")

                    excel_file = PROCESSED_FILE_DIRECTORY + '\\' + file
                    data_df = pd.read_excel(excel_file, sheet_name="4")

                    data_df.rename(columns={'Unnamed: 0': 'Total'}, inplace=True)
                    data_df = data_df.iloc[1:3, [1, 4]]
                    data_df['Total'] = data_df['Total'].astype(np.int64) 
                else:
                    data_df = get_empty_dataframe(file, "4")
            except Exception as ex:
                print(f"Error while processing the sheet 3 of the file: {file}")
                print(str(ex))
                data_df = get_empty_dataframe(file, "3")
            finally:    
                data_df.fillna(0, inplace=True)
                nirf_df = pd.concat([nirf_df, data_df])                              
    except Exception as ex:
        print(f"Error while processing the sheet 4 of the file: {file}")
        print(str(ex))
        nirf_df = get_empty_dataframe(file, "4")
    finally:            
        nirf_df = nirf_df.groupby('Institution').sum('Total')
        nirf_df = nirf_df.transpose()
        nirf_df.rename(index={'Total': 'Total number of PhD students enrolled'}, inplace=True)
    
    return nirf_df

In [12]:
def process_sheet4_1():
    nirf_df = pd.DataFrame()
    
    try:    
        for file in files:
            try:
                if "4" in pd.ExcelFile(PROCESSED_FILE_DIRECTORY + '\\' + file).sheet_names:
                    #print(f"Processing the sheet 4 of the file: {file}")

                    excel_file = PROCESSED_FILE_DIRECTORY + '\\' + file
                    data_df = pd.read_excel(excel_file, sheet_name="4")

                    data_df = data_df.iloc[[1], 1:]
                    data_df.fillna(0, inplace=True)
                    
                    data_df['Unnamed: 0'] = data_df['Unnamed: 0'].astype(np.int64)
                    data_df['Unnamed: 1'] = data_df['Unnamed: 1'].astype(np.int64)
                    data_df['Unnamed: 2'] = data_df['Unnamed: 2'].astype(np.int64)                    

                    data_df['Total'] = (data_df['Unnamed: 0'] + data_df['Unnamed: 1'] + data_df['Unnamed: 2'])/3              
                    data_df = data_df[['Total', 'Institution']]
                else:
                    data_df = get_empty_dataframe(file, "4")
            except Exception as ex:
                print(f"Error while processing the sheet 3 of the file: {file}")
                print(str(ex))
                data_df = get_empty_dataframe(file, "3")
            finally:    
                data_df.fillna(0, inplace=True)
                nirf_df = pd.concat([nirf_df, data_df])            
    except Exception as ex:
        print(f"Error while processing the sheet 4 of the file: {file}")
        print(str(ex))
        nirf_df = get_empty_dataframe(file, "4") 
    finally:            
        nirf_df = nirf_df.set_index('Institution')
        nirf_df.sort_index(inplace=True)
        nirf_df = nirf_df.transpose()
        nirf_df.rename(index={'Total': 'Average Full Time PhD students graduated'}, inplace=True)
    
    return nirf_df

In [13]:
def process_sheet4_2():
    nirf_df = pd.DataFrame()
    
    try:    
        for file in files:
            try:
                if "4" in pd.ExcelFile(PROCESSED_FILE_DIRECTORY + '\\' + file).sheet_names:
                    #print(f"Processing the sheet 4 of the file: {file}")

                    excel_file = PROCESSED_FILE_DIRECTORY + '\\' + file
                    data_df = pd.read_excel(excel_file, sheet_name="4")

                    data_df = data_df.iloc[[6], 1:]
                    data_df.fillna(0, inplace=True)
                    
                    data_df['Unnamed: 0'] = data_df['Unnamed: 0'].astype(np.int64)
                    data_df['Unnamed: 1'] = data_df['Unnamed: 1'].astype(np.int64)
                    data_df['Unnamed: 2'] = data_df['Unnamed: 2'].astype(np.int64)

                    data_df['Total'] = (data_df['Unnamed: 0'] + data_df['Unnamed: 1'] + data_df['Unnamed: 2'])/3              
                    data_df = data_df[['Total', 'Institution']]
                    data_df.fillna(0, inplace=True)
                else:
                    data_df = get_empty_dataframe(file, "4")
            except Exception as ex:
                print(f"Error while processing the sheet 3 of the file: {file}")
                print(str(ex))
                data_df = get_empty_dataframe(file, "3")
            finally:    
                nirf_df = pd.concat([nirf_df, data_df])             
    except Exception as ex:
        print(f"Error while processing the sheet 4 of the file: {file}")
        print(str(ex))
        nirf_df = get_empty_dataframe(file, "4")    
    finally:            
        nirf_df = nirf_df.set_index('Institution')
        nirf_df.sort_index(inplace=True)
        nirf_df = nirf_df.transpose()
        nirf_df.rename(index={'Total': 'Average Part Time PhD students graduated'}, inplace=True)
    
    return nirf_df

## Sheet 5

In [14]:
def process_sheet5():
    nirf_df = pd.DataFrame()
    
    try:
        for file in files:
            try:
                if "5" in pd.ExcelFile(PROCESSED_FILE_DIRECTORY + '\\' + file).sheet_names:
                    #print(f"Processing the sheet 5 of the file: {file}")

                    excel_file = PROCESSED_FILE_DIRECTORY + '\\' + file
                    data_df = pd.read_excel(excel_file, sheet_name="5")

                    data_df = data_df.iloc[2:, 1:]
                    data_df.fillna(0, inplace=True)

                    data_df['2021-22'] = data_df['2021-22'].str.split(r"(").str[0].astype(np.int64)

                    data_df['2020-21'] = data_df['2020-21'].str.split(r"(").str[0].astype(np.int64)

                    data_df['2019-20'] = data_df['2019-20'].str.split(r"(").str[0].astype(np.int64)

                    data_df['Capex1'] = round((np.sum(data_df['2021-22'])/100000),1)

                    data_df['Capex2'] = round((np.sum(data_df['2020-21'])/100000),1)

                    data_df['Capex3'] = round((np.sum(data_df['2019-20'])/100000),1)
                else:
                    data_df = get_empty_dataframe(file, "5")
            except Exception as ex:
                print(f"Error while processing the sheet 3 of the file: {file}")
                print(str(ex))
                data_df = get_empty_dataframe(file, "3")
            finally:
                nirf_df = pd.concat([nirf_df, data_df])       
    except Exception as ex:
        print(f"Error while processing the sheet 5 of the file: {file}")
        print(str(ex))
        nirf_df = get_empty_dataframe(file, "5")     
    finally:            
        nirf_df = nirf_df[['Capex1', 'Capex2', 'Capex3', 'Institution']].sort_values('Institution')
        nirf_df = nirf_df.set_index('Institution')
        nirf_df = nirf_df.groupby('Institution').head(1)
        nirf_df = nirf_df.transpose()      
    
    return nirf_df

## Sheet 6

In [15]:
def process_sheet6():
    nirf_df = pd.DataFrame()
    
    try:
        for file in files:
            try:
                if "6" in pd.ExcelFile(PROCESSED_FILE_DIRECTORY + '\\' + file).sheet_names:
                    #print(f"Processing the sheet 6 of the file: {file}")

                    excel_file = PROCESSED_FILE_DIRECTORY + '\\' + file
                    data_df = pd.read_excel(excel_file, sheet_name="6")

                    data_df = data_df.iloc[2:, 1:]
                    data_df.fillna(0, inplace=True)
                    
                    data_df['2021-22'] = data_df['2021-22'].str.split(r"(").str[0].astype(np.int64)

                    data_df['2020-21'] = data_df['2020-21'].str.split(r"(").str[0].astype(np.int64)

                    data_df['2019-20'] = data_df['2019-20'].str.split(r"(").str[0].astype(np.int64)

                    data_df['Opex1'] = round((np.sum(data_df['2021-22'])/1000000),1)

                    data_df['Opex2'] = round((np.sum(data_df['2020-21'])/1000000),1)

                    data_df['Opex3'] = round((np.sum(data_df['2019-20'])/1000000),1)                
                else:
                    data_df = get_empty_dataframe(file, "6")
            except Exception as ex:
                print(f"Error while processing the sheet 3 of the file: {file}")
                print(str(ex))
                data_df = get_empty_dataframe(file, "3")
            finally:
                nirf_df = pd.concat([nirf_df, data_df])               
    except Exception as ex:
        print(f"Error while processing the sheet 6 of the file: {file}")
        print(str(ex))
        nirf_df = get_empty_dataframe(file, "6")
    finally:            
        nirf_df = nirf_df[['Opex1', 'Opex2', 'Opex3', 'Institution']].sort_values('Institution')
        nirf_df = nirf_df.set_index('Institution')
        nirf_df = nirf_df.groupby('Institution').head(1)
        nirf_df = nirf_df.transpose() 
    
    return nirf_df

## Sheet 7

In [16]:
def process_sheet7():
    nirf_df = pd.DataFrame()
    
    try:
        for file in files:
            try:
                if "7" in pd.ExcelFile(PROCESSED_FILE_DIRECTORY + '\\' + file).sheet_names:
                    #print(f"Processing the sheet 7 of the file: {file}")

                    excel_file = PROCESSED_FILE_DIRECTORY + '\\' + file
                    data_df = pd.read_excel(excel_file, sheet_name="7")

                    data_df = data_df.iloc[[2], 1:]
                    data_df.fillna(0, inplace=True)
                    
                    data_df['2021-22'] = data_df['2021-22'].astype(np.float64)
                    data_df['2020-21'] = data_df['2020-21'].astype(np.float64)
                    data_df['2019-20'] = data_df['2019-20'].astype(np.float64)

                    data_df['Sponsored Average'] = (data_df['2021-22'] + data_df['2020-21'] + data_df['2019-20'])/3
                    data_df = data_df[['Sponsored Average', 'Institution']]
                else:
                    data_df = get_empty_dataframe(file, "7")
            except Exception as ex:
                print(f"Error while processing the sheet 3 of the file: {file}")
                print(str(ex))
                data_df = get_empty_dataframe(file, "3")
            finally:
                nirf_df = pd.concat([nirf_df, data_df])   
    except Exception as ex:
        print(f"Error while processing the sheet 7 of the file: {file}")
        print(str(ex))
        nirf_df = get_empty_dataframe(file, "7")
    finally:            
        nirf_df = nirf_df.groupby('Institution').mean('Sponsored Average')
        nirf_df = nirf_df.transpose() 
    
    return nirf_df

## Sheet 8

In [17]:
def process_sheet8():
    nirf_df = pd.DataFrame()
    
    try:
        for file in files:
            try:
                if "8" in pd.ExcelFile(PROCESSED_FILE_DIRECTORY + '\\' + file).sheet_names:
                    #print(f"Processing the sheet 8 of the file: {file}")

                    excel_file = PROCESSED_FILE_DIRECTORY + '\\' + file
                    data_df = pd.read_excel(excel_file, sheet_name="8")

                    data_df = data_df.iloc[[2], 1:]
                    data_df.fillna(0, inplace=True)
                    
                    data_df['2021-22'] = data_df['2021-22'].astype(np.float64)
                    data_df['2020-21'] = data_df['2020-21'].astype(np.float64)
                    data_df['2019-20'] = data_df['2019-20'].astype(np.float64)

                    data_df['Consultancy Average'] = (data_df['2021-22'] + data_df['2020-21'] + data_df['2019-20'])/3
                    data_df = data_df[['Consultancy Average', 'Institution']]
                else:
                    data_df = get_empty_dataframe(file, "8")
            except Exception as ex:
                print(f"Error while processing the sheet 3 of the file: {file}")
                print(str(ex))
                data_df = get_empty_dataframe(file, "3")
            finally:
                nirf_df = pd.concat([nirf_df, data_df]) 
    except Exception as ex:
        print(f"Error while processing the sheet 8 of the file: {file}")
        print(str(ex))
        nirf_df = get_empty_dataframe(file, "8") 
    finally:        
        nirf_df = nirf_df.groupby('Institution').mean('Consultancy Average')
        nirf_df = nirf_df.transpose()
    
    return nirf_df

## Sheet 10

In [18]:
def process_sheet10():
    nirf_df = pd.DataFrame()
    
    try:
        for file in files:
            try:
                if "10" in pd.ExcelFile(PROCESSED_FILE_DIRECTORY + '\\' + file).sheet_names:
                    #print(f"Processing the sheet {sheet_name} of the file: {file}")

                    excel_file = PROCESSED_FILE_DIRECTORY + '\\' + file
                    data_df = pd.read_excel(excel_file, sheet_name="10")
                    data_df.fillna(0, inplace=True)
                else:
                    data_df = get_empty_dataframe(file, "10")
            except Exception as ex:
                print(f"Error while processing the sheet 3 of the file: {file}")
                print(str(ex))
                data_df = get_empty_dataframe(file, "3")
            finally:
                nirf_df = pd.concat([nirf_df, data_df])
    except Exception as ex:
        print(f"Error while processing the sheet 10 of the file: {file}")
        print(str(ex))
        nirf_df = get_empty_dataframe(file, "10")
    finally:
        nirf_df = nirf_df[['Answer', 'Institution']].sort_values('Institution')
        nirf_df = nirf_df.set_index('Institution')
        nirf_df.sort_index(inplace=True)
        nirf_df = nirf_df.transpose()
        nirf_df.rename(index={'Answer': 'Number of faculty members'}, inplace=True)
        
    return nirf_df

In [19]:
nirf_df = pd.DataFrame()

data = process_sheet0()
nirf_df = pd.concat([nirf_df, data])

data = process_sheet1()
nirf_df = pd.concat([nirf_df, data])

data = process_sheet1_1()
nirf_df = pd.concat([nirf_df, data])

data = process_sheet1_2()
nirf_df = pd.concat([nirf_df, data])

data = process_sheet2()
nirf_df = pd.concat([nirf_df, data])

data = process_sheet3()
nirf_df = pd.concat([nirf_df, data])

nirf_df = nirf_df.transpose()
nirf_df['Median Salary'] = (nirf_df['Median salary UG'] + nirf_df['Median salary PG'])/2
nirf_df = nirf_df.transpose()

data = process_sheet4()
nirf_df = pd.concat([nirf_df, data])

data = process_sheet4_1()
nirf_df = pd.concat([nirf_df, data])

data = process_sheet4_2()
nirf_df = pd.concat([nirf_df, data])

data = process_sheet5()
nirf_df = pd.concat([nirf_df, data])

nirf_df = nirf_df.transpose()
nirf_df['capexperstudavg'] = (nirf_df['Capex1'] + \
                              nirf_df['Capex2'] + nirf_df['Capex3'])/ \
(3 * (nirf_df['Total sanctioned approved intake for all programs'] + nirf_df['Total number of PhD students enrolled']))
nirf_df = nirf_df.transpose()

data = process_sheet6()
nirf_df = pd.concat([nirf_df, data])

nirf_df = nirf_df.transpose()
nirf_df['opexperstudavg'] = (nirf_df['Opex1'] + \
                              nirf_df['Opex2'] + nirf_df['Opex3'])/ \
(3 * (nirf_df['Total sanctioned approved intake for all programs'] + nirf_df['Total number of PhD students enrolled']))
nirf_df = nirf_df.transpose()

data = process_sheet7()
nirf_df = pd.concat([nirf_df, data])

data = process_sheet8()
nirf_df = pd.concat([nirf_df, data])

data = process_sheet10()
nirf_df = pd.concat([nirf_df, data])

nirf_df

Error while processing the sheet 3 of the file: IR-P-C-19322.xlsx
could not convert string to float: 'Median salary of\rplaced graduates per\rannum'
Error while processing the sheet 3 of the file: IR-P-C-19611.xlsx
could not convert string to float: 'Median salary of\rplaced graduates per\rannum'
Error while processing the sheet 3 of the file: IR-P-C-24007.xlsx
could not convert string to float: 'Median salary of\rplaced graduates per\rannum'
Error while processing the sheet 3 of the file: IR-P-C-24505.xlsx
could not convert string to float: 'Median salary of\rplaced graduates per\rannum'
Error while processing the sheet 3 of the file: IR-P-C-35430.xlsx
could not convert string to float: 'Median salary of\rplaced graduates per\rannum'
Error while processing the sheet 3 of the file: IR-P-C-39486.xlsx
could not convert string to float: 'Median salary of\rplaced graduates per\rannum'
Error while processing the sheet 3 of the file: IR-P-C-40080.xlsx
could not convert string to float: 'Medi

Institution,IR-P-C-10250,IR-P-C-10283,IR-P-C-11091,IR-P-C-18900,IR-P-C-19322,IR-P-C-19611,IR-P-C-19613,IR-P-C-24007,IR-P-C-24505,IR-P-C-30768,...,IR-P-U-0536,IR-P-U-0541,IR-P-U-0562,IR-P-U-0575,IR-P-U-0643,IR-P-U-0724,IR-P-U-0747,IR-P-U-0811,IR-P-U-0853,IR-P-U-0938
Total sanctioned approved intake for all programs,358.0,454.0,360.0,495.0,745.0,670.0,484.0,748.0,680.0,260.0,...,261.0,280.0,213.0,336.0,427.0,718.0,520.0,276.0,800.0,410.0
Total number of students enrolled in all programs,336.0,487.0,412.0,577.0,740.0,638.0,477.0,645.0,676.0,274.0,...,237.0,290.0,218.0,305.0,398.0,664.0,519.0,276.0,883.0,437.0
Total number of economically socially challenged students enrolled in all programs,259.0,83.0,285.0,360.0,710.0,485.0,388.0,645.0,309.0,104.0,...,23.0,198.0,80.0,298.0,166.0,626.0,152.0,75.0,352.0,118.0
No. of students\rreceiving full\rtuition fee\rreimbursement\rfrom the State\rand Central\rGovernment,116.0,53.0,85.0,132.0,13.0,95.0,117.0,263.0,0.0,46.0,...,12.0,7.0,62.0,59.0,111.0,265.0,6.0,45.0,58.0,0.0
No. of students\rreceiving full\rtuition fee\rreimbursement\rfrom Institution\rFunds,56.0,30.0,0.0,32.0,5.0,45.0,32.0,13.0,1.0,58.0,...,0.0,2.0,5.0,127.0,21.0,0.0,102.0,0.0,154.0,8.0
No. of students\rreceiving full\rtuition fee\rreimbursement\rfrom the Private\rBodies,18.0,0.0,0.0,13.0,0.0,15.0,4.0,2.0,1.0,0.0,...,0.0,0.0,1.0,11.0,6.0,3.0,2.0,0.0,1.0,0.0
Median salary UG,316333.333333,370000.0,211333.333333,403333.333333,188000.0,258000.0,190000.0,198333.333333,231000.0,361666.666667,...,238866.666667,21621.621622,250000.0,396921.666667,247333.333333,301666.666667,382000.0,66666.666667,440000.0,236661.666667
Median salary PG,345333.333333,516666.666667,315028.0,403333.333333,0.0,0.0,212000.0,0.0,0.0,486666.666667,...,96000.0,0.0,400000.0,387066.666667,328666.666667,0.0,0.0,260000.0,617333.333333,326092.666667
Median Salary,330833.333333,443333.333333,263180.666667,403333.333333,94000.0,129000.0,201000.0,99166.666667,115500.0,424166.666667,...,167433.333333,10810.810811,325000.0,391994.166667,288000.0,150833.333333,191000.0,163333.333333,528666.666667,281377.166667
Total number of PhD students enrolled,36.0,15.0,32.0,8.0,39.0,2.0,9.0,31.0,30.0,8.0,...,39.0,0.0,31.0,66.0,25.0,15.0,77.0,19.0,141.0,20.0


In [20]:
nirf_df.to_excel(REPORT_FILE_DIRECTORY + "/PredictionReport.xlsx", freeze_panes=(1, 1),)