In [9]:
import numpy as np
import pandas as pd
import random

In [2]:
def makeDF(tuples, header):
    '''Assumes tuples as Python tuples both empty or non empty; header as a tuple with a convention
       as (RollNumber, Name, Exam-[name]-[max-marks], ..., Lab-[name]-[max-marks], ...,
       Asgn-[name]-[max-marks], ..., Oth-[name]-[max-marks],)

       Returns a Pandas DataFrame with all NULL values replaced by Cipher, and adds a fraud column
       for figuring out cheating factor for later functions.'''

    # make rows, column IDs and marks as a list for DataFrame initialization

    row_index = [int(i) for i in range(1, len(tuples) + 1)]
    col_index = list(header)
    values = list(tuples)

    # DataFrame initialisation

    df = pd.DataFrame(tuples, row_index, col_index)

    # Handling of NULLs

    for col in list(df.columns):
        df[col] = df[col].fillna(value=0)

    # Make a copy of last given exams marks

    df['fraud'] = 0
    df['fraud'] = df[df.columns[-2]]

    return df


def scaleMarks(df):
    '''Assumes df as a Pandas DataFrame.

       Returns a Pandas DataFrame with marks scaled up according to the max-marks defined in the
       column headings'''

    # iterate through all columns and scale marks using apply() attribute of DataFrames

    for exam in list(df.columns):
        if len(exam.split('-')) > 2:
            df[exam] = df[exam].apply(lambda x: x * 100 \
                                                / int(exam.split('-')[2]))

    return df


def createAvg(marks):
    '''Assumes marks as a Pandas DataFrame.

       Returns a DataFrame with added columns for overall weighted average, and individual exam,
       lab, assignments and other evaluations average'''

    # initilaize columns as zero

    marks['overall'] = 0
    marks['avgExam'] = 0
    marks['avgLab'] = 0
    marks['avgAsgn'] = 0
    marks['avgOth'] = 0

    # initialize count variables as zero

    exams = 0
    lab = 0
    asgn = 0
    oth = 0

    # iterate through the column list, filter and sum based on '-' as the additional columns do not have a '-'

    for exam in list(marks.columns):
        if exam.lower().startswith('exam'):
            marks['avgExam'] += marks[exam]
            exams += 1
        elif exam.lower().startswith('lab'):

            marks['avgLab'] += marks[exam]
            lab += 1
        elif exam.lower().startswith('asgn'):

            marks['avgAsgn'] += marks[exam]
            asgn += 1
        elif exam.lower().startswith('oth'):

            marks['avgOth'] += marks[exam]
            oth += 1
        else:

            continue

    # weight and scale marks and divide by total number of instances of similar type counted.
    # Weights based on the strictness  and students' interest in overall exam process

    marks['overall'] = 0.5 * marks['avgExam'] / exams + 0.3 \
                       * marks['avgLab'] / lab + 0.1 * marks['avgAsgn'] / asgn + 0.1 \
                       * marks['avgOth'] / oth

    return marks


def createChMarks(marks):
    '''Assumes marks as a Pandas DataFrame.

       Returns a DataFrame with added column ChMarks which would be used further for overall cheating status'''

    # Not included marks for Assignments as they are done by students AT HOME

    marks['ChMarks'] = (marks['avgExam'] + marks['avgLab']
                        + marks['avgOth']) / 3
    return marks


def variance(df):
    '''Assumes df as a Pandas DataFrame.

       Returns the same DataFrame with added column for variance which has variance for all scores for a particular
       student'''

    # Figure out first the columns to be considered for variance calculation. Used '-' as an identifier again

    ls = list(df.columns)
    buffer = []
    for i in range(len(ls)):
        if len(ls[i].split('-')) > 2:
            buffer.append(ls[i])
        else:
            continue

    # initialise column var with iteration based indices so as to use the power of apply() attribute

    df['var'] = [int(i) for i in range(len(df[df.columns[0]]))]

    # make a dummy row index for slicing DataFrame for calculation

    row_index = [int(i) for i in range(1, 1 + len(df[df.columns[0]]))]

    # use the value in var as an indirect reference for the whole row and use the describe() attribute to get std

    df['var'] = df['var'].apply(lambda x: df.loc[row_index,
                                                 buffer].iloc[x].describe()['std'] ** 2)

    return df


def CI(marks, column):
    '''Assumes marks as a Pandas DataFrame and column and a string.

       Returns the 95% confidence interval for the given data as a tuple with entries as (low, high)'''

    column = str(column)

    # CI = mean +- 2*std_error; std_error = std_deviation/sqrt(total observations)

    std_error = marks[column].describe()['std'] / len(marks['avgExam']) \
                ** 0.5
    mean = marks[column].describe()['mean']

    return (mean - 2 * std_error, mean + 2 * std_error)


def width(tup):
    '''Assumes tup as tuple.

       Returns an integer as the difference of 2nd and 1st values of tuple'''

    return tup[1] - tup[0]


def CourseStats(marks):
    '''Assumes marks as a Pandas DataFrame.

       Returns a tuple with values as : (course_difficulty, cheat_risk, list(cheat_flagged),
                                         avg_marks, quartile1, quartile2, quartile3,)

       course_difficulty (str) : HIGH/MODERATE/EASY based on the weighted average and cut-off marks
       cheat_risk (str) : HIGH/MODERATE/LOW based on the spread of Assignment and Other Exam marks
       cheat_flagged (list) : A list of 5 RollNumbers who we believe with some confidence are
                              indulged in academic malpractices in the class as a whole.
       avg_marks (str) : A range of marks where the most of students lie in between.
       quartile1, quartile2, quartile3 (int) : The stastical quartile scores for the overall analysis.'''

    # Calculate course difficulty based on 3rd Quartile scores of students.

    marker = marks['overall'].describe()['75%']
    if marker > 0 and marker < 40:
        course_difficulty = 'HIGH'
    elif marker > 40 and marker < 75:
        course_difficulty = 'MODERATE'
    else:
        course_difficulty = 'EASY'

    # Calculate the probability of cheating based on the width of assignment scores and other marks combined

    cheatProb = 1 - width(CI(marks, 'avgAsgn')) / width(CI(marks,
                                                           'ChMarks'))
    if cheatProb > 0.7 and cheatProb < 1:
        cheat_risk = 'HIGH'
    elif cheatProb > 0.4 and cheatProb < 0.7:
        cheat_risk = 'MODERATE'
    else:
        cheat_risk = 'LOW'

    # Flag out top 5 students whose overall scores and assignment socres tell two different stories

    marks['cheatflagged'] = 0

    marks['cheatflagged'] = marks['avgAsgn'] - marks['ChMarks']
    cheat_flagged = marks.sort_values('cheatflagged', ascending=False)['RollNumber'].iloc[1:6]

    # Calculate the range of marks for most students
    avg_marks = str(round(CI(marks, 'overall')[0], 2)) + '-' + str(round(CI(marks, 'overall')[1], 2))

    # Calculate quartile scores for weighted marks

    quartile1 = round(marks['overall'].describe()['25%'], 2)
    quartile2 = round(marks['overall'].describe()['50%'], 2)
    quartile3 = round(marks['overall'].describe()['75%'], 2)

    return (
        course_difficulty,
        cheat_risk,
        list(cheat_flagged),
        avg_marks,
        [quartile1,
         quartile2,
         quartile3]
    )


def ExamStats(marks):
    '''Assumes marks as a Pandas DataFrame.

       Returns a tuple with values as : (exam_difficulty, cheat_risk, list(cheat_flagged),
                                         avg_marks, quartile1, quartile2, quartile3,)

       exam_difficulty (str) : HIGH/MODERATE/EASY based on the exam performance
       cheat_risk (str) : HIGH/MODERATE/LOW based on the unevenness in marks
       cheat_flagged (list) : A list of 5 RollNumbers who we believe with some confidence should
                              be re-evaluated
       avg_marks (str) : A range of marks where the most of students lie in between.
       quartile1, quartile2, quartile3 (int) : The stastical quartile scores for the overall analysis.'''

    # Figure out the name of last exam and store it in location

    temp = list(marks.columns)
    count = 1
    for i in range(len(temp)):
        if len(temp[i].split('-')) > 2:
            count += 1
    location = temp[count]

    # Calculate the difficulty based on 2nd quartile cut-offs

    marker = marks[location].describe()['50%']
    if marker > 0 and marker < 40:
        exam_difficulty = 'HIGH'
    elif marker > 40 and marker < 75:
        exam_difficulty = 'MODERATE'
    else:
        exam_difficulty = 'EASY'

    # Build the frequency table for digit occurences, add the numbers not present in DataFrame with zero occurence

    freq_df = marks['fraud'].apply(lambda x: int(x % 10)).value_counts()

    for i in range(10):
        try:
            if freq_df.loc[i] >= 0:
                continue
        except:
            freq_df.loc[i] = 0

    # Calculate the variance of the same Dataframe and figure out cheating risk

    cheat_var = freq_df.describe()['std'] ** 2
    if cheat_var < 15:
        cheat_risk = 'LOW'
    if cheat_var > 15 and cheat_var < 80:
        cheat_risk = 'MODERATE'
    else:
        cheat_risk = 'HIGH'

    # Find the number with most occurences, sample 5 random roll numbers with that number for re-evaluation

    max_repeat = freq_df.index[0]
    marks['fraud'] = marks['fraud'].apply(lambda x: int(x % 10))
    suspicious = marks[marks['fraud'] == max_repeat]['fraud']
    if len(suspicious) > 5:
        check_sheets_index = random.sample(list(range(0, len(suspicious))), 5)
        cheat_flagged = []
        for index in check_sheets_index:
            cheat_flagged.append(marks['RollNumber'].iloc[index])
    else:
        cheat_flagged = []

    # Calculate the range of marks for most students

    avg_marks = str(round(CI(marks, location)[0], 2)) + '-' + str(round(CI(marks, location)[1],2))

    # Calculate quartile scores for exam marks

    quartile1 = round(marks[location].describe()['25%'], 2)
    quartile2 = round(marks[location].describe()['50%'], 2)
    quartile3 = round(marks[location].describe()['75%'], 2)

    return (
        exam_difficulty,
        cheat_risk,
        cheat_flagged,
        avg_marks,
        [quartile1,
         quartile2,
         quartile3],
    )


def PersistentLabels(df):
    '''Assumes df as a Pandas DataFrame.

       Returns a tuple with values as (consistent, moderately_varying, highly_varying,)

       consistent (list) : RollNumbers have almost no variation in their marks obtained so far.
       moderately_varying (list) : RollNumbers have some variation in their marks obtained so far.
       highly_varying (list) : RollNumbers have a high variation in their marks obtained so far.'''

    # calculate and filter the roll number list

    consistent = list(df[df['var'] < 30]['RollNumber'])
    moderately_varying = list(df[(df['var'] > 30) & (df['var']
                                                     < 150)]['RollNumber'])
    highly_varying = list(df[df['var'] > 150]['RollNumber'])

    return (consistent, moderately_varying, highly_varying)


def PerformanceLabels(df):
    '''Assumes df as a Pandas DataFrame.

       Returns a tuple with values as (exceptional, promising, average, needy,)

       exceptional (list) : RollNumbers with really good performance overall.
       promising (list) : RollNumbers who can be pushed to top with a little efforts.
       average (list) : RollNumbers who are just a few steps from failing marks and need some attention.
       needy (list) : RollNumbers who are in an immediate need of attention.'''

    # Calculate and filter the roll number list

    exceptional = list(df[df['overall'] > 85]['RollNumber'])
    promising = list(df[(df['overall'] < 85) & (df['overall']
                                                > 50)]['RollNumber'])
    average = list(df[(df['overall'] < 50) & (df['overall']
                                              > 30)]['RollNumber'])
    needy = list(df[df['overall'] < 30]['RollNumber'])

    return (exceptional, promising, average, needy)


def mainFunc(df):
    '''Assumes df as a Pandas DataFrame.

       Returns the top needy students based on algo as a list.'''

    # initialise an empty column to save scores

    df['temp'] = 1 / df['overall'] + df['var']
    return list((df.sort_values('temp', ascending=False)['RollNumber'])[0:5])

def getRank(df, exam):
    '''Assumes df as a Pandas dataframe, amd exam as a string.
    
       Returns a dataframe with ranks according to roll number.'''
    
    #Sort values according to the particular exam on Roll number column. 
    examRank = df.sort_values(exam, ascending = False)['RollNumber']
    
    #initialise a dummy column later to be used as the rank.
    temp = [int(i) + 1 for i in range(len(df['RollNumber']))]
    
    #join the two columns in a dataframe and sort according to Roll number. 
    df1 = pd.DataFrame({exam : temp, 'RollNumber' : examRank})
    df1.sort_values('RollNumber', inplace=True)
    
    return df1

def getRankMatrix(df):
    '''Assumes df as a Pandas DataFrame.
    
        Returns a tuple of tuples with individual type of exam ranks.'''
    
    #find individual ranks for classes of exams
    df1 = getRank(df, 'avgExam')
    df2 = getRank(df, 'avgLab')
    df3 = getRank(df, 'avgAsgn')
    df4 = getRank(df, 'avgOth')
    df5 = getRank(df, 'overall')
    
    #make the combines dataframe
    temp = df['RollNumber']
    dfRank = pd.DataFrame({'RollNumber' : temp, 'ClassRank' : df5['overall'], 'ExamRank' : df1['avgExam'], 'LabRank' : df2['avgLab'], 'AsgnRank' : df3['avgAsgn'], 'OthRank' : df4['avgOth']})

    return tuple([tuple(x) for x in dfRank.to_records(index=False)])

def ExamDetails(df):
    '''Assumes df as a Pandas DataFrame.
    
    Returns a listof lists with individual exam analysis'''
    
    details = []
    
    #for all the exams entered, find CI, max marks and exam name.
    for exam in df.columns:
        if len(exam.split('-')) > 2:
            avgCI = str(round(CI(df, exam)[0], 2)) + '-' + str(round(CI(df, exam)[1], 2))
            examName = exam.split('-')[1]
            maxMarks = round(max(df[exam]), 2) 
            details.append([examName, avgCI, maxMarks])
    
    return details

def findBestExam(i):
    '''Assumes i as an int.
    
       Returns a string with the value as the exam with maximum marks in any Dataframe record'''
    
    #find exams
    evals = []
    for exam in df.columns:
        if len(exam.split('-')) > 2:
            evals.append(exam)
    
    #Boolean Series with True at the desired location
    check = df[evals].iloc[i] == df.iloc[i]['best']
    
    #list of all conducted exam columns
    temp = df[evals].columns
    
    #find the name of exam, as per the faculty
    for seek in range(len(temp)):
        if check[seek] == True:
            exam = temp[seek].split('-')[1]
    
    return exam

def findWorstExam(i):
    '''Assumes i as an int.
    
       Returns a string with the value as the exam with minimum marks in any Dataframe record'''
    
    #find exams
    evals = []
    for exam in df.columns:
        if len(exam.split('-')) > 2:
            evals.append(exam)
    
    #Boolean Series with True at the desired location
    check = df[evals].iloc[i] == df.iloc[i]['worst']
    
    #list of all conducted exam columns
    temp = df[evals].columns
    
    #find the name of exam, as per the faculty
    for seek in range(len(temp)):
        if check[seek] == True:
            exam = temp[seek].split('-')[1]
    
    return exam

def studentMarks(df):
    '''Assumes df as a Pandas DataFrame.
    
       Returns a tuple of tuples, with best exam and worst exam performances.'''
    
    #find all the exam names
    evals = []
    for exam in df.columns:
        if len(exam.split('-')) > 2:
            evals.append(exam)
    #initialise new attributes with their iterative location index to use the power of lambda functions.
    df['best'] = [i for i in range(len(df['RollNumber']))]
    df['worst'] = [i for i in range(len(df['RollNumber']))]
    df['bestExam'] = [i for i in range(len(df['RollNumber']))]
    df['worstExam'] = [i for i in range(len(df['RollNumber']))]

    #Find the max or min oerformance of the record.
    df['best'] = df['best'].apply(lambda x : max(df[evals].iloc[x]))
    df['worst'] = df['worst'].apply(lambda x : min(df[evals].iloc[x]))
    
    #Find the best or worst exam name.
    df['bestExam'] = df['bestExam'].apply(findBestExam)
    df['worstExam'] = df['worstExam'].apply(findWorstExam)
    
    #rounding off
    df['best'] = df['best'].apply(lambda x : round(x, 2))
    df['worst'] = df['worst'].apply(lambda x : round(x, 2))
    
    #Make new dataframe
    df1 = df[['RollNumber', 'best', 'worst', 'bestExam', 'worstExam']]

    #Coercion
    ret  = tuple([tuple(x) for x in df1.to_records(index=False)])
    
    return ret


In [3]:
tuples = np.random.randn(267, 14)
headers =  ['RollNumber', 'Name', 'exam-mid-35', 'exam-end-50', 'lab-basic01-20','lab-basic02-20','lab-basic03-20','asgn-basic01-15','asgn-basic02-15','asgn-basic03-15','asgn-basic04-15','oth-quiz01-30', 'oth-quiz02-30', 'oth-quiz03-30']
roll = [i for i in range(1, 268)]
max_marks = [1, 1, 35, 50, 20, 20, 20, 15, 15, 15, 15, 30, 30, 30]
df = pd.DataFrame(tuples)
for i in range(14):
    df[i] = df[i].apply(lambda x : int((x*100)%max_marks[i]))
df.columns = headers
df['RollNumber'] = roll
df['fraud'] = 0
df['fraud'] = df['oth-quiz03-30']

In [4]:
df.head()

Unnamed: 0,RollNumber,Name,exam-mid-35,exam-end-50,lab-basic01-20,lab-basic02-20,lab-basic03-20,asgn-basic01-15,asgn-basic02-15,asgn-basic03-15,asgn-basic04-15,oth-quiz01-30,oth-quiz02-30,oth-quiz03-30,fraud
0,1,0,2,20,6,19,8,9,2,12,6,13,18,16,16
1,2,0,26,15,17,10,10,4,8,2,4,11,25,18,18
2,3,0,6,20,1,4,9,8,0,5,2,13,28,26,26
3,4,0,16,7,7,17,12,7,3,14,7,12,5,23,23
4,5,0,13,43,17,1,12,10,11,10,13,5,20,23,23


In [5]:
df = scaleMarks(df)
df.head()

Unnamed: 0,RollNumber,Name,exam-mid-35,exam-end-50,lab-basic01-20,lab-basic02-20,lab-basic03-20,asgn-basic01-15,asgn-basic02-15,asgn-basic03-15,asgn-basic04-15,oth-quiz01-30,oth-quiz02-30,oth-quiz03-30,fraud
0,1,0,5.714286,40.0,30.0,95.0,40.0,60.0,13.333333,80.0,40.0,43.333333,60.0,53.333333,16
1,2,0,74.285714,30.0,85.0,50.0,50.0,26.666667,53.333333,13.333333,26.666667,36.666667,83.333333,60.0,18
2,3,0,17.142857,40.0,5.0,20.0,45.0,53.333333,0.0,33.333333,13.333333,43.333333,93.333333,86.666667,26
3,4,0,45.714286,14.0,35.0,85.0,60.0,46.666667,20.0,93.333333,46.666667,40.0,16.666667,76.666667,23
4,5,0,37.142857,86.0,85.0,5.0,60.0,66.666667,73.333333,66.666667,86.666667,16.666667,66.666667,76.666667,23


In [6]:
df = createAvg(df)
df = createChMarks(df)
df = variance(df)

Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the documentation here:
https://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike


In [7]:
df.head()

Unnamed: 0,RollNumber,Name,exam-mid-35,exam-end-50,lab-basic01-20,lab-basic02-20,lab-basic03-20,asgn-basic01-15,asgn-basic02-15,asgn-basic03-15,...,oth-quiz02-30,oth-quiz03-30,fraud,overall,avgExam,avgLab,avgAsgn,avgOth,ChMarks,var
0,1,0,5.714286,40.0,30.0,95.0,40.0,60.0,13.333333,80.0,...,60.0,53.333333,16,37.984127,45.714286,165.0,193.333333,156.666667,122.460317,548.960266
1,2,0,74.285714,30.0,85.0,50.0,50.0,26.666667,53.333333,13.333333,...,83.333333,60.0,18,53.571429,104.285714,185.0,120.0,180.0,156.428571,878.7707
2,3,0,17.142857,40.0,5.0,20.0,45.0,53.333333,0.0,33.333333,...,93.333333,86.666667,26,31.230159,57.142857,70.0,100.0,223.333333,116.825397,686.572459
3,4,0,45.714286,14.0,35.0,85.0,60.0,46.666667,20.0,93.333333,...,16.666667,76.666667,23,42.539683,59.714286,180.0,206.666667,133.333333,124.349206,729.339243
4,5,0,37.142857,86.0,85.0,5.0,60.0,66.666667,73.333333,66.666667,...,66.666667,76.666667,23,58.452381,123.142857,150.0,293.333333,160.0,144.380952,595.811362


In [8]:
CourseStats(df)

('MODERATE',
 'LOW',
 [82, 209, 242, 179, 119],
 '46.07-48.91',
 [38.61, 47.81, 55.93])

In [9]:
df.head()

Unnamed: 0,RollNumber,Name,exam-mid-35,exam-end-50,lab-basic01-20,lab-basic02-20,lab-basic03-20,asgn-basic01-15,asgn-basic02-15,asgn-basic03-15,...,oth-quiz03-30,fraud,overall,avgExam,avgLab,avgAsgn,avgOth,ChMarks,var,cheatflagged
0,1,0,5.714286,40.0,30.0,95.0,40.0,60.0,13.333333,80.0,...,53.333333,16,37.984127,45.714286,165.0,193.333333,156.666667,122.460317,548.960266,70.873016
1,2,0,74.285714,30.0,85.0,50.0,50.0,26.666667,53.333333,13.333333,...,60.0,18,53.571429,104.285714,185.0,120.0,180.0,156.428571,878.7707,-36.428571
2,3,0,17.142857,40.0,5.0,20.0,45.0,53.333333,0.0,33.333333,...,86.666667,26,31.230159,57.142857,70.0,100.0,223.333333,116.825397,686.572459,-16.825397
3,4,0,45.714286,14.0,35.0,85.0,60.0,46.666667,20.0,93.333333,...,76.666667,23,42.539683,59.714286,180.0,206.666667,133.333333,124.349206,729.339243,82.31746
4,5,0,37.142857,86.0,85.0,5.0,60.0,66.666667,73.333333,66.666667,...,76.666667,23,58.452381,123.142857,150.0,293.333333,160.0,144.380952,595.811362,148.952381


In [10]:
ExamStats(df)

('MODERATE',
 'MODERATE',
 [16, 11, 39, 19, 6],
 '43.96-51.12',
 [20.0, 46.67, 73.33])

In [11]:
PersistentLabels(df)

([],
 [],
 [1,
  2,
  3,
  4,
  5,
  6,
  7,
  8,
  9,
  10,
  11,
  12,
  13,
  14,
  15,
  16,
  17,
  18,
  19,
  20,
  21,
  22,
  23,
  24,
  25,
  26,
  27,
  28,
  29,
  30,
  31,
  32,
  33,
  34,
  35,
  36,
  37,
  38,
  39,
  40,
  41,
  42,
  43,
  44,
  45,
  46,
  47,
  48,
  49,
  50,
  51,
  52,
  53,
  54,
  55,
  56,
  57,
  58,
  59,
  60,
  61,
  62,
  63,
  64,
  65,
  66,
  67,
  68,
  69,
  70,
  71,
  72,
  73,
  74,
  75,
  76,
  77,
  78,
  79,
  80,
  81,
  82,
  83,
  84,
  85,
  86,
  87,
  88,
  89,
  90,
  91,
  92,
  93,
  94,
  95,
  96,
  97,
  98,
  99,
  100,
  101,
  102,
  103,
  104,
  105,
  106,
  107,
  108,
  109,
  110,
  111,
  112,
  113,
  114,
  115,
  116,
  117,
  118,
  119,
  120,
  121,
  122,
  123,
  124,
  125,
  126,
  127,
  128,
  129,
  130,
  131,
  132,
  133,
  134,
  135,
  136,
  137,
  138,
  139,
  140,
  141,
  142,
  143,
  144,
  145,
  146,
  147,
  148,
  149,
  150,
  151,
  152,
  153,
  154,
  155,
  156,
  157,

In [12]:
PerformanceLabels(df)

([],
 [2,
  5,
  7,
  8,
  10,
  12,
  13,
  18,
  21,
  24,
  25,
  26,
  27,
  31,
  33,
  34,
  37,
  38,
  39,
  43,
  44,
  45,
  46,
  47,
  49,
  51,
  52,
  53,
  54,
  56,
  62,
  65,
  66,
  67,
  68,
  70,
  83,
  87,
  94,
  96,
  97,
  98,
  99,
  101,
  105,
  110,
  112,
  113,
  115,
  116,
  118,
  122,
  125,
  126,
  127,
  129,
  131,
  132,
  140,
  141,
  143,
  144,
  145,
  146,
  149,
  154,
  157,
  159,
  163,
  166,
  167,
  168,
  171,
  172,
  173,
  174,
  175,
  176,
  178,
  188,
  191,
  196,
  200,
  207,
  208,
  214,
  215,
  223,
  225,
  227,
  230,
  231,
  232,
  235,
  238,
  239,
  240,
  244,
  249,
  250,
  252,
  253,
  254,
  256,
  259,
  263,
  264,
  266,
  267],
 [1,
  3,
  4,
  6,
  9,
  11,
  14,
  16,
  17,
  19,
  20,
  22,
  23,
  29,
  30,
  35,
  36,
  40,
  41,
  42,
  50,
  55,
  57,
  58,
  59,
  61,
  63,
  64,
  69,
  71,
  72,
  74,
  75,
  76,
  77,
  79,
  80,
  81,
  84,
  85,
  86,
  88,
  89,
  90,
  92,
  93,
  95,
 

In [13]:
mainFunc(df)

[207, 55, 240, 196, 53]

In [14]:
df.head()

Unnamed: 0,RollNumber,Name,exam-mid-35,exam-end-50,lab-basic01-20,lab-basic02-20,lab-basic03-20,asgn-basic01-15,asgn-basic02-15,asgn-basic03-15,...,fraud,overall,avgExam,avgLab,avgAsgn,avgOth,ChMarks,var,cheatflagged,temp
0,1,0,5.714286,40.0,30.0,95.0,40.0,60.0,13.333333,80.0,...,6,37.984127,45.714286,165.0,193.333333,156.666667,122.460317,548.960266,70.873016,548.986593
1,2,0,74.285714,30.0,85.0,50.0,50.0,26.666667,53.333333,13.333333,...,8,53.571429,104.285714,185.0,120.0,180.0,156.428571,878.7707,-36.428571,878.789367
2,3,0,17.142857,40.0,5.0,20.0,45.0,53.333333,0.0,33.333333,...,6,31.230159,57.142857,70.0,100.0,223.333333,116.825397,686.572459,-16.825397,686.60448
3,4,0,45.714286,14.0,35.0,85.0,60.0,46.666667,20.0,93.333333,...,3,42.539683,59.714286,180.0,206.666667,133.333333,124.349206,729.339243,82.31746,729.36275
4,5,0,37.142857,86.0,85.0,5.0,60.0,66.666667,73.333333,66.666667,...,3,58.452381,123.142857,150.0,293.333333,160.0,144.380952,595.811362,148.952381,595.82847


In [15]:
getRankMatrix(df)

((1, 208, 241, 89, 122, 115),
 (2, 81, 109, 57, 236, 65),
 (3, 241, 222, 243, 246, 17),
 (4, 175, 216, 66, 96, 146),
 (5, 46, 61, 119, 9, 109),
 (6, 173, 202, 133, 88, 62),
 (7, 70, 87, 58, 198, 56),
 (8, 1, 4, 6, 187, 145),
 (9, 131, 155, 151, 72, 26),
 (10, 12, 68, 4, 73, 22),
 (11, 168, 220, 67, 52, 144),
 (12, 10, 23, 26, 125, 94),
 (13, 13, 12, 114, 15, 179),
 (14, 196, 158, 112, 267, 267),
 (15, 248, 206, 267, 67, 139),
 (16, 122, 183, 20, 256, 47),
 (17, 201, 218, 142, 195, 80),
 (18, 40, 35, 118, 57, 216),
 (19, 245, 238, 215, 147, 136),
 (20, 214, 173, 252, 101, 106),
 (21, 24, 52, 62, 4, 121),
 (22, 217, 230, 92, 238, 158),
 (23, 135, 41, 261, 127, 184),
 (24, 25, 44, 86, 98, 16),
 (25, 15, 54, 2, 158, 166),
 (26, 14, 34, 27, 180, 28),
 (27, 59, 30, 172, 93, 203),
 (28, 261, 251, 176, 89, 265),
 (29, 121, 116, 61, 258, 220),
 (30, 186, 180, 97, 179, 261),
 (31, 56, 67, 132, 144, 7),
 (32, 252, 223, 263, 143, 113),
 (33, 41, 5, 238, 243, 73),
 (34, 77, 110, 15, 185, 255),
 (35

In [16]:
studentMarks(df)

((1, 95.0, 5.71, 'basic02', 'mid'),
 (2, 85.0, 13.33, 'basic01', 'basic03'),
 (3, 93.33, 0.0, 'quiz02', 'basic02'),
 (4, 93.33, 14.0, 'basic03', 'end'),
 (5, 86.67, 5.0, 'basic04', 'basic02'),
 (6, 86.67, 16.0, 'basic01', 'end'),
 (7, 95.0, 0.0, 'basic01', 'basic01'),
 (8, 94.29, 3.33, 'mid', 'quiz02'),
 (9, 86.67, 13.33, 'quiz03', 'basic01'),
 (10, 95.0, 0.0, 'basic02', 'basic03'),
 (11, 95.0, 2.86, 'basic01', 'mid'),
 (12, 90.0, 20.0, 'quiz03', 'quiz02'),
 (13, 93.33, 6.67, 'basic04', 'quiz03'),
 (14, 78.0, 3.33, 'end', 'quiz03'),
 (15, 86.67, 0.0, 'basic02', 'basic03'),
 (16, 93.33, 4.0, 'quiz01', 'end'),
 (17, 73.33, 0.0, 'quiz02', 'basic03'),
 (18, 93.33, 6.67, 'basic04', 'quiz02'),
 (19, 93.33, 5.0, 'basic01', 'basic02'),
 (20, 86.67, 0.0, 'quiz01', 'quiz02'),
 (21, 93.33, 0.0, 'basic02', 'quiz01'),
 (22, 95.0, 0.0, 'basic02', 'basic04'),
 (23, 94.29, 0.0, 'mid', 'basic01'),
 (24, 95.0, 13.33, 'basic03', 'basic03'),
 (25, 95.0, 6.67, 'basic01', 'quiz02'),
 (26, 95.0, 0.0, 'basic0

In [17]:
ExamDetails(df)

[['mid', '44.32-51.37', 97.14],
 ['end', '44.57-51.4', 98.0],
 ['basic01', '43.33-50.42', 95.0],
 ['basic02', '42.55-49.59', 95.0],
 ['basic03', '44.69-51.6', 95.0],
 ['basic01', '43.11-50.37', 93.33],
 ['basic02', '43.33-50.4', 93.33],
 ['basic03', '41.28-48.21', 93.33],
 ['basic04', '45.61-52.87', 93.33],
 ['quiz01', '43.68-50.88', 96.67],
 ['quiz02', '43.68-50.61', 96.67],
 ['quiz03', '43.96-51.12', 96.67]]

In [16]:
import requests
import imaplib
import string
from bs4 import BeautifulSoup
from urllib.parse import urlparse

def _request(method, url, session=None, **kwargs):
    headers = kwargs.get("headers") or dict()
    headers.update(requests.utils.default_headers())
    headers["User-Agent"] = "AppleWebKit/537.36 (KHTML, like Gecko) " \
    						#"Mozilla/5.0 (X11; Linux x86_64) " \
                            
                            #"Chrome/56.0.2924.87 Safari/537.36"
    kwargs["headers"] = headers
    if session:
        return session.request(method, url, **kwargs)
    else:
        return requests.request(method, url, **kwargs)

def _get(url, session=None, **kwargs):
    return _request('get', url, session=session, **kwargs)

def _post(url, session=None, **kwargs):
    return _request('post', url, session=session, **kwargs)

def _check_google(username, email, pw):
    with requests.Session() as session:
        r = _get("https://accounts.google.com/ServiceLogin", session=session)
        soup = BeautifulSoup(r.text, "html.parser")
        hidden_inputs = soup.find_all("input", type="hidden")
        data = {}
        for i in hidden_inputs:
            data.update({i.get('name', ''): i.get('value', '')})
        data.update({'checkConnection': 'youtube'})
        data.update({'Email': email})
        data.update({'Passwd': pw})
        r = _post("https://accounts.google.com/signin/challenge/sl/password",
                  data=data, session=session)

        i = imaplib.IMAP4_SSL('imap.gmail.com')
        try:
            i.login(email, pw)
            var =  True
        except:
            var = False
         
        return var

def _check_twitter(username, email, pw):
    with requests.Session() as session:
        r = _get("https://mobile.twitter.com/login", session=session)
        tk = session.cookies.get("_mb_tk")
        if not tk or r.status_code != 200:
            r = _get("https://mobile.twitter.com/i/nojs_router?path=%2Flogin", session=session)
            r = _get("https://mobile.twitter.com/login", session=session)
            tk = session.cookies.get("_mb_tk")
        if not tk or r.status_code != 200:
            return False
        r = _post("https://mobile.twitter.com/sessions", data={
            "authenticity_token": tk,
            "session[username_or_email]": username,
            "session[password]": pw,
            "remember_me": 0,
            "wfa": 1,
            "redirect_after_login": "/home"
        }, session=session)
        url = urlparse(r.url)
        return url.path != "/login/error"

def _check_github(username, email, pw):
    with requests.Session() as session:
        r = _get("https://github.com/login", session=session)
        soup = BeautifulSoup(r.text, "html.parser")
        i = soup.select_one("input[name='authenticity_token']")
        token = i["value"]
        r = _post("https://github.com/session", session=session, data={
            "utf8": "✓",
            "commit": "Sign in",
            "authenticity_token": token,
            "login": username,
            "password": pw,
        })
        url = urlparse(r.url)
        return url.path != "/session" and url.path != "/login"

def _check_fb(username, email, pw):
    with requests.Session() as session:
        r = _get("https://www.facebook.com", session=session)
        if r.status_code != 200:
            return False
        r = _post("https://www.facebook.com/login.php?login_attempt=1&lwv=100", data={
            "email": email,
            "pass": pw,
            "legacy_return": 0,
            "timezone": 480,
        }, session=session)
        url = urlparse(r.url)
        return url.path != "/login.php"

def _check_hn(username, email, pw):
    r = _post("https://news.ycombinator.com", data={
        "goto": "news",
        "acct": username,
        "pw": pw
    }, allow_redirects=False)
    return "Bad login" not in r.text

checks = {
    "Twitter": _check_twitter,
    "Facebook": _check_fb,
    "GitHub": _check_github,
    "Hacker News": _check_hn,
    "Google": _check_google
}

def check_pass(pw, email, username):
    errors = list()
    username = username or email
    for check in checks:
        try:
            if checks[check](username, email, pw):
                errors.append("Your password must not be the same as your {} password".format(check))
        except:
            pass
    return errors
    
print(check_pass('123456', 'tanayrathore21@gmail.com', 'tanayrathore21'))


[]


[]
