In [1]:
import numpy as np
import pandas as pd
import random

In [2]:
def makeDF(tuples, header):
    '''Assumes tuples as Python tuples both empty or non empty; header as a tuple with a convention
       as (RollNumber, Name, Exam-[name]-[max-marks], ..., Lab-[name]-[max-marks], ...,
       Asgn-[name]-[max-marks], ..., Oth-[name]-[max-marks],)

       Returns a Pandas DataFrame with all NULL values replaced by Cipher, and adds a fraud column
       for figuring out cheating factor for later functions.'''

    # make rows, column IDs and marks as a list for DataFrame initialization

    row_index = [int(i) for i in range(1, len(tuples) + 1)]
    col_index = list(header)
    values = list(tuples)

    # DataFrame initialisation

    df = pd.DataFrame(tuples, row_index, col_index)

    # Handling of NULLs

    for col in list(df.columns):
        df[col] = df[col].fillna(value=0)

    # Make a copy of last given exams marks

    df['fraud'] = 0
    df['fraud'] = df[df.columns[-2]]

    return df


def scaleMarks(df):
    '''Assumes df as a Pandas DataFrame.

       Returns a Pandas DataFrame with marks scaled up according to the max-marks defined in the
       column headings'''

    # iterate through all columns and scale marks using apply() attribute of DataFrames

    for exam in list(df.columns):
        if len(exam.split('-')) > 2:
            df[exam] = df[exam].apply(lambda x: x * 100 \
                                                / int(exam.split('-')[2]))

    return df


def createAvg(marks):
    '''Assumes marks as a Pandas DataFrame.

       Returns a DataFrame with added columns for overall weighted average, and individual exam,
       lab, assignments and other evaluations average'''

    # initilaize columns as zero

    marks['overall'] = 0
    marks['avgExam'] = 0
    marks['avgLab'] = 0
    marks['avgAsgn'] = 0
    marks['avgOth'] = 0

    # initialize count variables as zero

    exams = 0
    lab = 0
    asgn = 0
    oth = 0

    # iterate through the column list, filter and sum based on '-' as the additional columns do not have a '-'

    for exam in list(marks.columns):
        if exam.lower().startswith('exam'):
            marks['avgExam'] += marks[exam]
            exams += 1
        elif exam.lower().startswith('lab'):

            marks['avgLab'] += marks[exam]
            lab += 1
        elif exam.lower().startswith('asgn'):

            marks['avgAsgn'] += marks[exam]
            asgn += 1
        elif exam.lower().startswith('oth'):

            marks['avgOth'] += marks[exam]
            oth += 1
        else:

            continue

    # weight and scale marks and divide by total number of instances of similar type counted.
    # Weights based on the strictness  and students' interest in overall exam process

    marks['overall'] = 0.5 * marks['avgExam'] / exams + 0.3 \
                       * marks['avgLab'] / lab + 0.1 * marks['avgAsgn'] / asgn + 0.1 \
                       * marks['avgOth'] / oth

    return marks


def createChMarks(marks):
    '''Assumes marks as a Pandas DataFrame.

       Returns a DataFrame with added column ChMarks which would be used further for overall cheating status'''

    # Not included marks for Assignments as they are done by students AT HOME

    marks['ChMarks'] = (marks['avgExam'] + marks['avgLab']
                        + marks['avgOth']) / 3
    return marks


def variance(df):
    '''Assumes df as a Pandas DataFrame.

       Returns the same DataFrame with added column for variance which has variance for all scores for a particular
       student'''

    # Figure out first the columns to be considered for variance calculation. Used '-' as an identifier again

    ls = list(df.columns)
    buffer = []
    for i in range(len(ls)):
        if len(ls[i].split('-')) > 2:
            buffer.append(ls[i])
        else:
            continue

    # initialise column var with iteration based indices so as to use the power of apply() attribute

    df['var'] = [int(i) for i in range(len(df[df.columns[0]]))]

    # make a dummy row index for slicing DataFrame for calculation

    row_index = [int(i) for i in range(1, 1 + len(df[df.columns[0]]))]

    # use the value in var as an indirect reference for the whole row and use the describe() attribute to get std

    df['var'] = df['var'].apply(lambda x: df.loc[row_index,
                                                 buffer].iloc[x].describe()['std'] ** 2)

    return df


def CI(marks, column):
    '''Assumes marks as a Pandas DataFrame and column and a string.

       Returns the 95% confidence interval for the given data as a tuple with entries as (low, high)'''

    column = str(column)

    # CI = mean +- 2*std_error; std_error = std_deviation/sqrt(total observations)

    std_error = marks[column].describe()['std'] / len(marks['avgExam']) \
                ** 0.5
    mean = marks[column].describe()['mean']

    return (mean - 2 * std_error, mean + 2 * std_error)


def width(tup):
    '''Assumes tup as tuple.

       Returns an integer as the difference of 2nd and 1st values of tuple'''

    return tup[1] - tup[0]


def CourseStats(marks):
    '''Assumes marks as a Pandas DataFrame.

       Returns a tuple with values as : (course_difficulty, cheat_risk, list(cheat_flagged),
                                         avg_marks, quartile1, quartile2, quartile3,)

       course_difficulty (str) : HIGH/MODERATE/EASY based on the weighted average and cut-off marks
       cheat_risk (str) : HIGH/MODERATE/LOW based on the spread of Assignment and Other Exam marks
       cheat_flagged (list) : A list of 5 RollNumbers who we believe with some confidence are
                              indulged in academic malpractices in the class as a whole.
       avg_marks (str) : A range of marks where the most of students lie in between.
       quartile1, quartile2, quartile3 (int) : The stastical quartile scores for the overall analysis.'''

    # Calculate course difficulty based on 3rd Quartile scores of students.

    marker = marks['overall'].describe()['75%']
    if marker > 0 and marker < 40:
        course_difficulty = 'HIGH'
    elif marker > 40 and marker < 75:
        course_difficulty = 'MODERATE'
    else:
        course_difficulty = 'EASY'

    # Calculate the probability of cheating based on the width of assignment scores and other marks combined

    cheatProb = 1 - width(CI(marks, 'avgAsgn')) / width(CI(marks,
                                                           'ChMarks'))
    if cheatProb > 0.7 and cheatProb < 1:
        cheat_risk = 'HIGH'
    elif cheatProb > 0.4 and cheatProb < 0.7:
        cheat_risk = 'MODERATE'
    else:
        cheat_risk = 'LOW'

    # Flag out top 5 students whose overall scores and assignment socres tell two different stories

    marks['cheatflagged'] = 0

    marks['cheatflagged'] = marks['avgAsgn'] - marks['ChMarks']
    cheat_flagged = marks.sort_values('cheatflagged', ascending=False)['RollNumber'].iloc[1:6]

    # Calculate the range of marks for most students
    avg_marks = str(round(CI(marks, 'overall')[0], 2)) + '-' + str(round(CI(marks, 'overall')[1], 2))

    # Calculate quartile scores for weighted marks

    quartile1 = round(marks['overall'].describe()['25%'], 2)
    quartile2 = round(marks['overall'].describe()['50%'], 2)
    quartile3 = round(marks['overall'].describe()['75%'], 2)

    return (
        course_difficulty,
        cheat_risk,
        list(cheat_flagged),
        avg_marks,
        [quartile1,
         quartile2,
         quartile3]
    )


def ExamStats(marks):
    '''Assumes marks as a Pandas DataFrame.

       Returns a tuple with values as : (exam_difficulty, cheat_risk, list(cheat_flagged),
                                         avg_marks, quartile1, quartile2, quartile3,)

       exam_difficulty (str) : HIGH/MODERATE/EASY based on the exam performance
       cheat_risk (str) : HIGH/MODERATE/LOW based on the unevenness in marks
       cheat_flagged (list) : A list of 5 RollNumbers who we believe with some confidence should
                              be re-evaluated
       avg_marks (str) : A range of marks where the most of students lie in between.
       quartile1, quartile2, quartile3 (int) : The stastical quartile scores for the overall analysis.'''

    # Figure out the name of last exam and store it in location

    temp = list(marks.columns)
    count = 1
    for i in range(len(temp)):
        if len(temp[i].split('-')) > 2:
            count += 1
    location = temp[count]

    # Calculate the difficulty based on 2nd quartile cut-offs

    marker = marks[location].describe()['50%']
    if marker > 0 and marker < 40:
        exam_difficulty = 'HIGH'
    elif marker > 40 and marker < 75:
        exam_difficulty = 'MODERATE'
    else:
        exam_difficulty = 'EASY'

    # Build the frequency table for digit occurences, add the numbers not present in DataFrame with zero occurence

    freq_df = marks['fraud'].apply(lambda x: int(x % 10)).value_counts()

    for i in range(10):
        try:
            if freq_df.loc[i] >= 0:
                continue
        except:
            freq_df.loc[i] = 0

    # Calculate the variance of the same Dataframe and figure out cheating risk

    cheat_var = freq_df.describe()['std'] ** 2
    if cheat_var < 15:
        cheat_risk = 'LOW'
    if cheat_var > 15 and cheat_var < 80:
        cheat_risk = 'MODERATE'
    else:
        cheat_risk = 'HIGH'

    # Find the number with most occurences, sample 5 random roll numbers with that number for re-evaluation

    max_repeat = freq_df.index[0]
    marks['fraud'] = marks['fraud'].apply(lambda x: int(x % 10))
    suspicious = marks[marks['fraud'] == max_repeat]['fraud']
    if len(suspicious) > 5:
        check_sheets_index = random.sample(list(range(0, len(suspicious))), 5)
        cheat_flagged = []
        for index in check_sheets_index:
            cheat_flagged.append(marks['RollNumber'].iloc[index])
    else:
        cheat_flagged = []

    # Calculate the range of marks for most students

    avg_marks = str(round(CI(marks, location)[0], 2)) + '-' + str(round(CI(marks, location)[1],2))

    # Calculate quartile scores for exam marks

    quartile1 = round(marks[location].describe()['25%'], 2)
    quartile2 = round(marks[location].describe()['50%'], 2)
    quartile3 = round(marks[location].describe()['75%'], 2)

    return (
        exam_difficulty,
        cheat_risk,
        cheat_flagged,
        avg_marks,
        [quartile1,
         quartile2,
         quartile3],
    )


def PersistentLabels(df):
    '''Assumes df as a Pandas DataFrame.

       Returns a tuple with values as (consistent, moderately_varying, highly_varying,)

       consistent (list) : RollNumbers have almost no variation in their marks obtained so far.
       moderately_varying (list) : RollNumbers have some variation in their marks obtained so far.
       highly_varying (list) : RollNumbers have a high variation in their marks obtained so far.'''

    # calculate and filter the roll number list

    consistent = list(df[df['var'] < 30]['RollNumber'])
    moderately_varying = list(df[(df['var'] > 30) & (df['var']
                                                     < 150)]['RollNumber'])
    highly_varying = list(df[df['var'] > 150]['RollNumber'])

    return (consistent, moderately_varying, highly_varying)


def PerformanceLabels(df):
    '''Assumes df as a Pandas DataFrame.

       Returns a tuple with values as (exceptional, promising, average, needy,)

       exceptional (list) : RollNumbers with really good performance overall.
       promising (list) : RollNumbers who can be pushed to top with a little efforts.
       average (list) : RollNumbers who are just a few steps from failing marks and need some attention.
       needy (list) : RollNumbers who are in an immediate need of attention.'''

    # Calculate and filter the roll number list

    exceptional = list(df[df['overall'] > 85]['RollNumber'])
    promising = list(df[(df['overall'] < 85) & (df['overall']
                                                > 50)]['RollNumber'])
    average = list(df[(df['overall'] < 50) & (df['overall']
                                              > 30)]['RollNumber'])
    needy = list(df[df['overall'] < 30]['RollNumber'])

    return (exceptional, promising, average, needy)


def mainFunc(df):
    '''Assumes df as a Pandas DataFrame.

       Returns the top needy students based on algo as a list.'''

    # initialise an empty column to save scores

    df['temp'] = 1 / df['overall'] + df['var']
    return list((df.sort_values('temp', ascending=False)['RollNumber'])[0:5])

def getRank(df, exam):
    '''Assumes df as a Pandas dataframe, amd exam as a string.
    
       Returns a dataframe with ranks according to roll number.'''
    
    #Sort values according to the particular exam on Roll number column. 
    examRank = df.sort_values(exam, ascending = False)['RollNumber']
    
    #initialise a dummy column later to be used as the rank.
    temp = [int(i) + 1 for i in range(len(df['RollNumber']))]
    
    #join the two columns in a dataframe and sort according to Roll number. 
    df1 = pd.DataFrame({exam : temp, 'RollNumber' : examRank})
    df1.sort_values('RollNumber', inplace=True)
    
    return df1

def getRankMatrix(df):
    '''Assumes df as a Pandas DataFrame.
    
        Returns a tuple of tuples with individual type of exam ranks.'''
    
    #find individual ranks for classes of exams
    df1 = getRank(df, 'avgExam')
    df2 = getRank(df, 'avgLab')
    df3 = getRank(df, 'avgAsgn')
    df4 = getRank(df, 'avgOth')
    df5 = getRank(df, 'overall')
    
    #make the combines dataframe
    temp = df['RollNumber']
    dfRank = pd.DataFrame({'RollNumber' : temp, 'ClassRank' : df5['overall'], 'ExamRank' : df1['avgExam'], 'LabRank' : df2['avgLab'], 'AsgnRank' : df3['avgAsgn'], 'OthRank' : df4['avgOth']})

    return tuple([tuple(x) for x in dfRank.to_records(index=False)])

def ExamDetails(df):
    '''Assumes df as a Pandas DataFrame.
    
    Returns a listof lists with individual exam analysis'''
    
    details = []
    
    #for all the exams entered, find CI, max marks and exam name.
    for exam in df.columns:
        if len(exam.split('-')) > 2:
            avgCI = str(round(CI(df, exam)[0], 2)) + '-' + str(round(CI(df, exam)[1], 2))
            examName = exam.split('-')[1]
            maxMarks = round(max(df[exam]), 2) 
            details.append([examName, avgCI, maxMarks])
    
    return details

def findBestExam(i):
    '''Assumes i as an int.
    
       Returns a string with the value as the exam with maximum marks in any Dataframe record'''
    
    #find exams
    evals = []
    for exam in df.columns:
        if len(exam.split('-')) > 2:
            evals.append(exam)
    
    #Boolean Series with True at the desired location
    check = df[evals].iloc[i] == df.iloc[i]['best']
    
    #list of all conducted exam columns
    temp = df[evals].columns
    
    #find the name of exam, as per the faculty
    for seek in range(len(temp)):
        if check[seek] == True:
            exam = temp[seek].split('-')[1]
    
    return exam

def findWorstExam(i):
    '''Assumes i as an int.
    
       Returns a string with the value as the exam with minimum marks in any Dataframe record'''
    
    #find exams
    evals = []
    for exam in df.columns:
        if len(exam.split('-')) > 2:
            evals.append(exam)
    
    #Boolean Series with True at the desired location
    check = df[evals].iloc[i] == df.iloc[i]['worst']
    
    #list of all conducted exam columns
    temp = df[evals].columns
    
    #find the name of exam, as per the faculty
    for seek in range(len(temp)):
        if check[seek] == True:
            exam = temp[seek].split('-')[1]
    
    return exam

def studentMarks(df):
    '''Assumes df as a Pandas DataFrame.
    
       Returns a tuple of tuples, with best exam and worst exam performances.'''
    
    #find all the exam names
    evals = []
    for exam in df.columns:
        if len(exam.split('-')) > 2:
            evals.append(exam)
    #initialise new attributes with their iterative location index to use the power of lambda functions.
    df['best'] = [i for i in range(len(df['RollNumber']))]
    df['worst'] = [i for i in range(len(df['RollNumber']))]
    df['bestExam'] = [i for i in range(len(df['RollNumber']))]
    df['worstExam'] = [i for i in range(len(df['RollNumber']))]

    #Find the max or min oerformance of the record.
    df['best'] = df['best'].apply(lambda x : max(df[evals].iloc[x]))
    df['worst'] = df['worst'].apply(lambda x : min(df[evals].iloc[x]))
    
    #Find the best or worst exam name.
    df['bestExam'] = df['bestExam'].apply(findBestExam)
    df['worstExam'] = df['worstExam'].apply(findWorstExam)
    
    #rounding off
    df['best'] = df['best'].apply(lambda x : round(x, 2))
    df['worst'] = df['worst'].apply(lambda x : round(x, 2))
    
    #Make new dataframe
    df1 = df[['RollNumber', 'best', 'worst', 'bestExam', 'worstExam']]

    #Coercion
    ret  = tuple([tuple(x) for x in df1.to_records(index=False)])
    
    return ret


In [3]:
tuples = np.random.randn(267, 14)
headers =  ['RollNumber', 'Name', 'exam-mid-35', 'exam-end-50', 'lab-basic01-20','lab-basic02-20','lab-basic03-20','asgn-basic01-15','asgn-basic02-15','asgn-basic03-15','asgn-basic04-15','oth-quiz01-30', 'oth-quiz02-30', 'oth-quiz03-30']
roll = [i for i in range(1, 268)]
max_marks = [1, 1, 35, 50, 20, 20, 20, 15, 15, 15, 15, 30, 30, 30]
df = pd.DataFrame(tuples)
for i in range(14):
    df[i] = df[i].apply(lambda x : int((x*100)%max_marks[i]))
df.columns = headers
df['RollNumber'] = roll
df['fraud'] = 0
df['fraud'] = df['oth-quiz03-30']

In [4]:
df.head()

Unnamed: 0,RollNumber,Name,exam-mid-35,exam-end-50,lab-basic01-20,lab-basic02-20,lab-basic03-20,asgn-basic01-15,asgn-basic02-15,asgn-basic03-15,asgn-basic04-15,oth-quiz01-30,oth-quiz02-30,oth-quiz03-30,fraud
0,1,0,29,47,9,10,8,9,5,11,14,14,28,25,25
1,2,0,8,10,17,8,6,10,9,2,6,15,1,11,11
2,3,0,25,19,10,11,16,11,11,11,12,20,27,26,26
3,4,0,7,28,16,13,0,14,14,10,2,13,11,9,9
4,5,0,18,37,11,9,8,7,1,14,4,27,16,24,24


In [5]:
df = scaleMarks(df)
df.head()

Unnamed: 0,RollNumber,Name,exam-mid-35,exam-end-50,lab-basic01-20,lab-basic02-20,lab-basic03-20,asgn-basic01-15,asgn-basic02-15,asgn-basic03-15,asgn-basic04-15,oth-quiz01-30,oth-quiz02-30,oth-quiz03-30,fraud
0,1,0,82.857143,94.0,45.0,50.0,40.0,60.0,33.333333,73.333333,93.333333,46.666667,93.333333,83.333333,25
1,2,0,22.857143,20.0,85.0,40.0,30.0,66.666667,60.0,13.333333,40.0,50.0,3.333333,36.666667,11
2,3,0,71.428571,38.0,50.0,55.0,80.0,73.333333,73.333333,73.333333,80.0,66.666667,90.0,86.666667,26
3,4,0,20.0,56.0,80.0,65.0,0.0,93.333333,93.333333,66.666667,13.333333,43.333333,36.666667,30.0,9
4,5,0,51.428571,74.0,55.0,45.0,40.0,46.666667,6.666667,93.333333,26.666667,90.0,53.333333,80.0,24


In [6]:
df = createAvg(df)
df = createChMarks(df)
df = variance(df)

Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the documentation here:
https://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike


In [7]:
df.head()

Unnamed: 0,RollNumber,Name,exam-mid-35,exam-end-50,lab-basic01-20,lab-basic02-20,lab-basic03-20,asgn-basic01-15,asgn-basic02-15,asgn-basic03-15,...,oth-quiz02-30,oth-quiz03-30,fraud,overall,avgExam,avgLab,avgAsgn,avgOth,ChMarks,var
0,1,0,82.857143,94.0,45.0,50.0,40.0,60.0,33.333333,73.333333,...,93.333333,83.333333,25,71.65873,176.857143,135.0,260.0,223.333333,178.396825,553.124356
1,2,0,22.857143,20.0,85.0,40.0,30.0,66.666667,60.0,13.333333,...,3.333333,36.666667,11,33.714286,42.857143,155.0,180.0,90.0,95.952381,233.895056
2,3,0,71.428571,38.0,50.0,55.0,80.0,73.333333,73.333333,73.333333,...,90.0,86.666667,26,61.468254,109.428571,185.0,300.0,243.333333,179.253968,960.049663
3,4,0,20.0,56.0,80.0,65.0,0.0,93.333333,93.333333,66.666667,...,36.666667,30.0,9,43.833333,76.0,145.0,266.666667,110.0,110.333333,657.176802
4,5,0,51.428571,74.0,55.0,45.0,40.0,46.666667,6.666667,93.333333,...,53.333333,80.0,24,57.134921,125.428571,140.0,173.333333,223.333333,162.920635,904.213152


In [8]:
CourseStats(df)

('MODERATE',
 'LOW',
 [250, 252, 172, 82, 132],
 '46.28-49.26',
 [38.55, 47.6, 56.57])

In [9]:
df.head()

Unnamed: 0,RollNumber,Name,exam-mid-35,exam-end-50,lab-basic01-20,lab-basic02-20,lab-basic03-20,asgn-basic01-15,asgn-basic02-15,asgn-basic03-15,...,oth-quiz03-30,fraud,overall,avgExam,avgLab,avgAsgn,avgOth,ChMarks,var,cheatflagged
0,1,0,82.857143,94.0,45.0,50.0,40.0,60.0,33.333333,73.333333,...,83.333333,25,71.65873,176.857143,135.0,260.0,223.333333,178.396825,553.124356,81.603175
1,2,0,22.857143,20.0,85.0,40.0,30.0,66.666667,60.0,13.333333,...,36.666667,11,33.714286,42.857143,155.0,180.0,90.0,95.952381,233.895056,84.047619
2,3,0,71.428571,38.0,50.0,55.0,80.0,73.333333,73.333333,73.333333,...,86.666667,26,61.468254,109.428571,185.0,300.0,243.333333,179.253968,960.049663,120.746032
3,4,0,20.0,56.0,80.0,65.0,0.0,93.333333,93.333333,66.666667,...,30.0,9,43.833333,76.0,145.0,266.666667,110.0,110.333333,657.176802,156.333333
4,5,0,51.428571,74.0,55.0,45.0,40.0,46.666667,6.666667,93.333333,...,80.0,24,57.134921,125.428571,140.0,173.333333,223.333333,162.920635,904.213152,10.412698


In [10]:
ExamStats(df)

('MODERATE',
 'MODERATE',
 [23, 26, 24, 29, 32],
 '42.73-49.9',
 [20.0, 43.33, 73.33])

In [11]:
PersistentLabels(df)

([],
 [],
 [1,
  2,
  3,
  4,
  5,
  6,
  7,
  8,
  9,
  10,
  11,
  12,
  13,
  14,
  15,
  16,
  17,
  18,
  19,
  20,
  21,
  22,
  23,
  24,
  25,
  26,
  27,
  28,
  29,
  30,
  31,
  32,
  33,
  34,
  35,
  36,
  37,
  38,
  39,
  40,
  41,
  42,
  43,
  44,
  45,
  46,
  47,
  48,
  49,
  50,
  51,
  52,
  53,
  54,
  55,
  56,
  57,
  58,
  59,
  60,
  61,
  62,
  63,
  64,
  65,
  66,
  67,
  68,
  69,
  70,
  71,
  72,
  73,
  74,
  75,
  76,
  77,
  78,
  79,
  80,
  81,
  82,
  83,
  84,
  85,
  86,
  87,
  88,
  89,
  90,
  91,
  92,
  93,
  94,
  95,
  96,
  97,
  98,
  99,
  100,
  101,
  102,
  103,
  104,
  105,
  106,
  107,
  108,
  109,
  110,
  111,
  112,
  113,
  114,
  115,
  116,
  117,
  118,
  119,
  120,
  121,
  122,
  123,
  124,
  125,
  126,
  127,
  128,
  129,
  130,
  131,
  132,
  133,
  134,
  135,
  136,
  137,
  138,
  139,
  140,
  141,
  142,
  143,
  144,
  145,
  146,
  147,
  148,
  149,
  150,
  151,
  152,
  153,
  154,
  155,
  156,
  157,

In [12]:
PerformanceLabels(df)

([],
 [1,
  3,
  5,
  9,
  11,
  13,
  14,
  15,
  22,
  23,
  24,
  25,
  31,
  33,
  35,
  37,
  39,
  40,
  41,
  42,
  51,
  55,
  56,
  58,
  60,
  63,
  65,
  66,
  68,
  70,
  74,
  76,
  77,
  79,
  82,
  85,
  88,
  89,
  94,
  96,
  97,
  99,
  104,
  106,
  108,
  109,
  110,
  112,
  117,
  125,
  127,
  129,
  131,
  132,
  138,
  140,
  141,
  145,
  146,
  147,
  151,
  154,
  158,
  159,
  160,
  162,
  165,
  166,
  168,
  170,
  171,
  173,
  175,
  177,
  178,
  182,
  186,
  188,
  189,
  191,
  193,
  194,
  195,
  196,
  199,
  200,
  201,
  203,
  205,
  208,
  210,
  216,
  218,
  221,
  222,
  223,
  224,
  227,
  232,
  236,
  237,
  238,
  239,
  242,
  243,
  247,
  248,
  249,
  253,
  259,
  260,
  261,
  262,
  263],
 [2,
  4,
  6,
  7,
  8,
  10,
  12,
  17,
  18,
  19,
  20,
  21,
  26,
  27,
  28,
  29,
  30,
  32,
  34,
  36,
  38,
  43,
  44,
  45,
  46,
  47,
  48,
  49,
  50,
  52,
  53,
  54,
  57,
  59,
  61,
  62,
  64,
  67,
  69,
  72,
  73,
 

In [13]:
mainFunc(df)

[165, 103, 102, 76, 92]

In [14]:
df.head()

Unnamed: 0,RollNumber,Name,exam-mid-35,exam-end-50,lab-basic01-20,lab-basic02-20,lab-basic03-20,asgn-basic01-15,asgn-basic02-15,asgn-basic03-15,...,fraud,overall,avgExam,avgLab,avgAsgn,avgOth,ChMarks,var,cheatflagged,temp
0,1,0,82.857143,94.0,45.0,50.0,40.0,60.0,33.333333,73.333333,...,5,71.65873,176.857143,135.0,260.0,223.333333,178.396825,553.124356,81.603175,553.138311
1,2,0,22.857143,20.0,85.0,40.0,30.0,66.666667,60.0,13.333333,...,1,33.714286,42.857143,155.0,180.0,90.0,95.952381,233.895056,84.047619,233.924717
2,3,0,71.428571,38.0,50.0,55.0,80.0,73.333333,73.333333,73.333333,...,6,61.468254,109.428571,185.0,300.0,243.333333,179.253968,960.049663,120.746032,960.065932
3,4,0,20.0,56.0,80.0,65.0,0.0,93.333333,93.333333,66.666667,...,9,43.833333,76.0,145.0,266.666667,110.0,110.333333,657.176802,156.333333,657.199616
4,5,0,51.428571,74.0,55.0,45.0,40.0,46.666667,6.666667,93.333333,...,4,57.134921,125.428571,140.0,173.333333,223.333333,162.920635,904.213152,10.412698,904.230654


In [15]:
getRankMatrix(df)

((1, 8, 10, 143, 25, 17),
 (2, 232, 232, 101, 143, 224),
 (3, 38, 107, 53, 6, 6),
 (4, 163, 179, 122, 21, 192),
 (5, 63, 71, 128, 162, 16),
 (6, 130, 156, 48, 201, 201),
 (7, 186, 228, 87, 19, 134),
 (8, 248, 262, 151, 71, 29),
 (9, 88, 102, 51, 233, 129),
 (10, 122, 124, 120, 34, 215),
 (11, 96, 135, 89, 79, 48),
 (12, 198, 142, 249, 200, 137),
 (13, 42, 40, 50, 140, 260),
 (14, 43, 69, 68, 99, 57),
 (15, 93, 53, 214, 199, 71),
 (16, 251, 208, 240, 206, 240),
 (17, 158, 174, 88, 157, 220),
 (18, 216, 259, 17, 151, 104),
 (19, 141, 85, 238, 101, 160),
 (20, 127, 128, 219, 98, 3),
 (21, 184, 170, 233, 85, 81),
 (22, 85, 108, 164, 15, 15),
 (23, 33, 28, 166, 42, 119),
 (24, 105, 76, 193, 66, 187),
 (25, 34, 56, 14, 243, 142),
 (26, 170, 199, 117, 54, 53),
 (27, 230, 222, 229, 65, 66),
 (28, 201, 178, 185, 165, 225),
 (29, 247, 225, 218, 175, 184),
 (30, 153, 160, 181, 70, 108),
 (31, 64, 131, 5, 43, 249),
 (32, 223, 231, 160, 22, 144),
 (33, 60, 74, 61, 205, 115),
 (34, 194, 236, 36, 141

In [16]:
studentMarks(df)

((1, 94.0, 33.33, 'end', 'basic02'),
 (2, 85.0, 3.33, 'basic01', 'quiz02'),
 (3, 90.0, 38.0, 'quiz02', 'end'),
 (4, 93.33, 0.0, 'basic02', 'basic03'),
 (5, 93.33, 6.67, 'basic03', 'basic02'),
 (6, 95.0, 13.33, 'basic02', 'quiz03'),
 (7, 95.0, 0.0, 'basic02', 'mid'),
 (8, 90.0, 0.0, 'quiz02', 'end'),
 (9, 92.0, 3.33, 'end', 'quiz03'),
 (10, 90.0, 0.0, 'basic03', 'basic01'),
 (11, 93.33, 11.43, 'quiz01', 'mid'),
 (12, 73.33, 0.0, 'quiz01', 'basic03'),
 (13, 93.33, 0.0, 'basic04', 'quiz01'),
 (14, 93.33, 0.0, 'basic04', 'basic02'),
 (15, 90.0, 5.0, 'quiz02', 'basic03'),
 (16, 65.0, 0.0, 'basic01', 'basic03'),
 (17, 95.0, 10.0, 'basic02', 'basic03'),
 (18, 93.33, 0.0, 'basic04', 'quiz01'),
 (19, 82.86, 0.0, 'mid', 'basic04'),
 (20, 96.67, 0.0, 'quiz02', 'basic04'),
 (21, 93.33, 0.0, 'basic02', 'basic01'),
 (22, 96.67, 10.0, 'quiz01', 'basic03'),
 (23, 86.67, 10.0, 'basic04', 'quiz02'),
 (24, 93.33, 0.0, 'basic04', 'basic01'),
 (25, 85.0, 6.67, 'basic03', 'basic04'),
 (26, 96.67, 0.0, 'quiz

In [17]:
ExamDetails(df)

[['mid', '45.97-52.89', 97.14],
 ['end', '44.37-51.36', 98.0],
 ['basic01', '41.03-48.15', 95.0],
 ['basic02', '45.23-52.41', 95.0],
 ['basic03', '44.82-51.92', 95.0],
 ['basic01', '44.19-51.29', 93.33],
 ['basic02', '43.05-49.93', 93.33],
 ['basic03', '43.16-49.87', 93.33],
 ['basic04', '41.07-48.32', 93.33],
 ['quiz01', '40.31-47.45', 96.67],
 ['quiz02', '45.18-52.62', 96.67],
 ['quiz03', '42.73-49.9', 96.67]]