In [1]:
import numpy as np
import pandas as pd
import random

In [2]:
def makeDF(tuples, header):
    '''Assumes tuples as Python tuples both empty or non empty; header as a tuple with a convention
       as (RollNumber, Name, Exam-[name]-[max-marks], ..., Lab-[name]-[max-marks], ...,
       Asgn-[name]-[max-marks], ..., Oth-[name]-[max-marks],)

       Returns a Pandas DataFrame with all NULL values replaced by Cipher, and adds a fraud column
       for figuring out cheating factor for later functions.'''

    # make rows, column IDs and marks as a list for DataFrame initialization

    row_index = [int(i) for i in range(1, len(tuples) + 1)]
    col_index = list(header)
    values = list(tuples)

    # DataFrame initialisation

    df = pd.DataFrame(tuples, row_index, col_index)

    # Handling of NULLs

    for col in list(df.columns):
        df[col] = df[col].fillna(value=0)

    # Make a copy of last given exams marks

    df['fraud'] = 0
    df['fraud'] = df[df.columns[-2]]

    return df


def scaleMarks(df):
    '''Assumes df as a Pandas DataFrame.

       Returns a Pandas DataFrame with marks scaled up according to the max-marks defined in the
       column headings'''

    # iterate through all columns and scale marks using apply() attribute of DataFrames

    for exam in list(df.columns):
        if len(exam.split('-')) > 2:
            df[exam] = df[exam].apply(lambda x: x * 100 \
                                                / int(exam.split('-')[2]))

    return df


def createAvg(marks):
    '''Assumes marks as a Pandas DataFrame.

       Returns a DataFrame with added columns for overall weighted average, and individual exam,
       lab, assignments and other evaluations average'''

    # initilaize columns as zero

    marks['overall'] = 0
    marks['avgExam'] = 0
    marks['avgLab'] = 0
    marks['avgAsgn'] = 0
    marks['avgOth'] = 0

    # initialize count variables as zero

    exams = 0
    lab = 0
    asgn = 0
    oth = 0

    # iterate through the column list, filter and sum based on '-' as the additional columns do not have a '-'

    for exam in list(marks.columns):
        if exam.lower().startswith('exam'):
            marks['avgExam'] += marks[exam]
            exams += 1
        elif exam.lower().startswith('lab'):

            marks['avgLab'] += marks[exam]
            lab += 1
        elif exam.lower().startswith('asgn'):

            marks['avgAsgn'] += marks[exam]
            asgn += 1
        elif exam.lower().startswith('oth'):

            marks['avgOth'] += marks[exam]
            oth += 1
        else:

            continue

    # weight and scale marks and divide by total number of instances of similar type counted.
    # Weights based on the strictness  and students' interest in overall exam process

    marks['overall'] = 0.5 * marks['avgExam'] / exams + 0.3 \
                       * marks['avgLab'] / lab + 0.1 * marks['avgAsgn'] / asgn + 0.1 \
                       * marks['avgOth'] / oth

    return marks


def createChMarks(marks):
    '''Assumes marks as a Pandas DataFrame.

       Returns a DataFrame with added column ChMarks which would be used further for overall cheating status'''

    # Not included marks for Assignments as they are done by students AT HOME

    marks['ChMarks'] = (marks['avgExam'] + marks['avgLab']
                        + marks['avgOth']) / 3
    return marks


def variance(df):
    '''Assumes df as a Pandas DataFrame.

       Returns the same DataFrame with added column for variance which has variance for all scores for a particular
       student'''

    # Figure out first the columns to be considered for variance calculation. Used '-' as an identifier again

    ls = list(df.columns)
    buffer = []
    for i in range(len(ls)):
        if len(ls[i].split('-')) > 2:
            buffer.append(ls[i])
        else:
            continue

    # initialise column var with iteration based indices so as to use the power of apply() attribute

    df['var'] = [int(i) for i in range(len(df[df.columns[0]]))]

    # make a dummy row index for slicing DataFrame for calculation

    row_index = [int(i) for i in range(1, 1 + len(df[df.columns[0]]))]

    # use the value in var as an indirect reference for the whole row and use the describe() attribute to get std

    df['var'] = df['var'].apply(lambda x: df.loc[row_index,
                                                 buffer].iloc[x].describe()['std'] ** 2)

    return df


def CI(marks, column):
    '''Assumes marks as a Pandas DataFrame and column and a string.

       Returns the 95% confidence interval for the given data as a tuple with entries as (low, high)'''

    column = str(column)

    # CI = mean +- 2*std_error; std_error = std_deviation/sqrt(total observations)

    std_error = marks[column].describe()['std'] / len(marks['avgExam']) \
                ** 0.5
    mean = marks[column].describe()['mean']

    return (mean - 2 * std_error, mean + 2 * std_error)


def width(tup):
    '''Assumes tup as tuple.

       Returns an integer as the difference of 2nd and 1st values of tuple'''

    return tup[1] - tup[0]


def CourseStats(marks):
    '''Assumes marks as a Pandas DataFrame.

       Returns a tuple with values as : (course_difficulty, cheat_risk, list(cheat_flagged),
                                         avg_marks, quartile1, quartile2, quartile3,)

       course_difficulty (str) : HIGH/MODERATE/EASY based on the weighted average and cut-off marks
       cheat_risk (str) : HIGH/MODERATE/LOW based on the spread of Assignment and Other Exam marks
       cheat_flagged (list) : A list of 5 RollNumbers who we believe with some confidence are
                              indulged in academic malpractices in the class as a whole.
       avg_marks (str) : A range of marks where the most of students lie in between.
       quartile1, quartile2, quartile3 (int) : The stastical quartile scores for the overall analysis.'''

    # Calculate course difficulty based on 3rd Quartile scores of students.

    marker = marks['overall'].describe()['75%']
    if marker > 0 and marker < 40:
        course_difficulty = 'HIGH'
    elif marker > 40 and marker < 75:
        course_difficulty = 'MODERATE'
    else:
        course_difficulty = 'EASY'

    # Calculate the probability of cheating based on the width of assignment scores and other marks combined

    cheatProb = 1 - width(CI(marks, 'avgAsgn')) / width(CI(marks,
                                                           'ChMarks'))
    if cheatProb > 0.7 and cheatProb < 1:
        cheat_risk = 'HIGH'
    elif cheatProb > 0.4 and cheatProb < 0.7:
        cheat_risk = 'MODERATE'
    else:
        cheat_risk = 'LOW'

    # Flag out top 5 students whose overall scores and assignment socres tell two different stories

    marks['cheatflagged'] = 0

    marks['cheatflagged'] = marks['avgAsgn'] - marks['ChMarks']
    cheat_flagged = marks.sort_values('cheatflagged', ascending=False)['RollNumber'].iloc[1:6]

    # Calculate the range of marks for most students
    avg_marks = str(round(CI(marks, 'overall')[0], 2)) + '-' + str(round(CI(marks, 'overall')[1], 2))

    # Calculate quartile scores for weighted marks

    quartile1 = round(marks['overall'].describe()['25%'], 2)
    quartile2 = round(marks['overall'].describe()['50%'], 2)
    quartile3 = round(marks['overall'].describe()['75%'], 2)

    return (
        course_difficulty,
        cheat_risk,
        list(cheat_flagged),
        avg_marks,
        [quartile1,
         quartile2,
         quartile3]
    )


def ExamStats(marks):
    '''Assumes marks as a Pandas DataFrame.

       Returns a tuple with values as : (exam_difficulty, cheat_risk, list(cheat_flagged),
                                         avg_marks, quartile1, quartile2, quartile3,)

       exam_difficulty (str) : HIGH/MODERATE/EASY based on the exam performance
       cheat_risk (str) : HIGH/MODERATE/LOW based on the unevenness in marks
       cheat_flagged (list) : A list of 5 RollNumbers who we believe with some confidence should
                              be re-evaluated
       avg_marks (str) : A range of marks where the most of students lie in between.
       quartile1, quartile2, quartile3 (int) : The stastical quartile scores for the overall analysis.'''

    # Figure out the name of last exam and store it in location

    temp = list(marks.columns)
    count = 1
    for i in range(len(temp)):
        if len(temp[i].split('-')) > 2:
            count += 1
    location = temp[count]

    # Calculate the difficulty based on 2nd quartile cut-offs

    marker = marks[location].describe()['50%']
    if marker > 0 and marker < 40:
        exam_difficulty = 'HIGH'
    elif marker > 40 and marker < 75:
        exam_difficulty = 'MODERATE'
    else:
        exam_difficulty = 'EASY'

    # Build the frequency table for digit occurences, add the numbers not present in DataFrame with zero occurence

    freq_df = marks['fraud'].apply(lambda x: int(x % 10)).value_counts()

    for i in range(10):
        try:
            if freq_df.loc[i] >= 0:
                continue
        except:
            freq_df.loc[i] = 0

    # Calculate the variance of the same Dataframe and figure out cheating risk

    cheat_var = freq_df.describe()['std'] ** 2
    if cheat_var < 15:
        cheat_risk = 'LOW'
    if cheat_var > 15 and cheat_var < 80:
        cheat_risk = 'MODERATE'
    else:
        cheat_risk = 'HIGH'

    # Find the number with most occurences, sample 5 random roll numbers with that number for re-evaluation

    max_repeat = freq_df.index[0]
    marks['fraud'] = marks['fraud'].apply(lambda x: int(x % 10))
    suspicious = marks[marks['fraud'] == max_repeat]['fraud']
    if len(suspicious) > 5:
        check_sheets_index = random.sample(list(range(0, len(suspicious))), 5)
        cheat_flagged = []
        for index in check_sheets_index:
            cheat_flagged.append(marks['RollNumber'].iloc[index])
    else:
        cheat_flagged = []

    # Calculate the range of marks for most students

    avg_marks = str(round(CI(marks, location)[0], 2)) + '-' + str(round(CI(marks, location)[1],2))

    # Calculate quartile scores for exam marks

    quartile1 = round(marks[location].describe()['25%'], 2)
    quartile2 = round(marks[location].describe()['50%'], 2)
    quartile3 = round(marks[location].describe()['75%'], 2)

    return (
        exam_difficulty,
        cheat_risk,
        cheat_flagged,
        avg_marks,
        [quartile1,
         quartile2,
         quartile3],
    )


def PersistentLabels(df):
    '''Assumes df as a Pandas DataFrame.

       Returns a tuple with values as (consistent, moderately_varying, highly_varying,)

       consistent (list) : RollNumbers have almost no variation in their marks obtained so far.
       moderately_varying (list) : RollNumbers have some variation in their marks obtained so far.
       highly_varying (list) : RollNumbers have a high variation in their marks obtained so far.'''

    # calculate and filter the roll number list

    consistent = list(df[df['var'] < 30]['RollNumber'])
    moderately_varying = list(df[(df['var'] > 30) & (df['var']
                                                     < 150)]['RollNumber'])
    highly_varying = list(df[df['var'] > 150]['RollNumber'])

    return (consistent, moderately_varying, highly_varying)


def PerformanceLabels(df):
    '''Assumes df as a Pandas DataFrame.

       Returns a tuple with values as (exceptional, promising, average, needy,)

       exceptional (list) : RollNumbers with really good performance overall.
       promising (list) : RollNumbers who can be pushed to top with a little efforts.
       average (list) : RollNumbers who are just a few steps from failing marks and need some attention.
       needy (list) : RollNumbers who are in an immediate need of attention.'''

    # Calculate and filter the roll number list

    exceptional = list(df[df['overall'] > 85]['RollNumber'])
    promising = list(df[(df['overall'] < 85) & (df['overall']
                                                > 50)]['RollNumber'])
    average = list(df[(df['overall'] < 50) & (df['overall']
                                              > 30)]['RollNumber'])
    needy = list(df[df['overall'] < 30]['RollNumber'])

    return (exceptional, promising, average, needy)


def mainFunc(df):
    '''Assumes df as a Pandas DataFrame.

       Returns the top needy students based on algo as a list.'''

    # initialise an empty column to save scores

    df['temp'] = 1 / df['overall'] + df['var']
    return list((df.sort_values('temp', ascending=False)['RollNumber'])[0:5])

def getRank(df, exam):
    '''Assumes df as a Pandas dataframe, amd exam as a string.
    
       Returns a dataframe with ranks according to roll number.'''
    
    #Sort values according to the particular exam on Roll number column. 
    examRank = df.sort_values(exam, ascending = False)['RollNumber']
    
    #initialise a dummy column later to be used as the rank.
    temp = [int(i) + 1 for i in range(len(df['RollNumber']))]
    
    #join the two columns in a dataframe and sort according to Roll number. 
    df1 = pd.DataFrame({exam : temp, 'RollNumber' : examRank})
    df1.sort_values('RollNumber', inplace=True)
    
    return df1

def getRankMatrix(df):
    '''Assumes df as a Pandas DataFrame.
    
        Returns a tuple of tuples with individual type of exam ranks.'''
    
    #find individual ranks for classes of exams
    df1 = getRank(df, 'avgExam')
    df2 = getRank(df, 'avgLab')
    df3 = getRank(df, 'avgAsgn')
    df4 = getRank(df, 'avgOth')
    df5 = getRank(df, 'overall')
    
    #make the combines dataframe
    temp = df['RollNumber']
    dfRank = pd.DataFrame({'RollNumber' : temp, 'ClassRank' : df5['overall'], 'ExamRank' : df1['avgExam'], 'LabRank' : df2['avgLab'], 'AsgnRank' : df3['avgAsgn'], 'OthRank' : df4['avgOth']})

    return tuple([tuple(x) for x in dfRank.to_records(index=False)])

def ExamDetails(df):
    '''Assumes df as a Pandas DataFrame.
    
    Returns a listof lists with individual exam analysis'''
    
    details = []
    
    #for all the exams entered, find CI, max marks and exam name.
    for exam in df.columns:
        if len(exam.split('-')) > 2:
            avgCI = str(round(CI(df, exam)[0], 2)) + '-' + str(round(CI(df, exam)[1], 2))
            examName = exam.split('-')[1]
            maxMarks = round(max(df[exam]), 2) 
            details.append([examName, avgCI, maxMarks])
    
    return details

def findBestExam(i):
    '''Assumes i as an int.
    
       Returns a string with the value as the exam with maximum marks in any Dataframe record'''
    
    #find exams
    evals = []
    for exam in df.columns:
        if len(exam.split('-')) > 2:
            evals.append(exam)
    
    #Boolean Series with True at the desired location
    check = df[evals].iloc[i] == df.iloc[i]['best']
    
    #list of all conducted exam columns
    temp = df[evals].columns
    
    #find the name of exam, as per the faculty
    for seek in range(len(temp)):
        if check[seek] == True:
            exam = temp[seek].split('-')[1]
    
    return exam

def findWorstExam(i):
    '''Assumes i as an int.
    
       Returns a string with the value as the exam with minimum marks in any Dataframe record'''
    
    #find exams
    evals = []
    for exam in df.columns:
        if len(exam.split('-')) > 2:
            evals.append(exam)
    
    #Boolean Series with True at the desired location
    check = df[evals].iloc[i] == df.iloc[i]['worst']
    
    #list of all conducted exam columns
    temp = df[evals].columns
    
    #find the name of exam, as per the faculty
    for seek in range(len(temp)):
        if check[seek] == True:
            exam = temp[seek].split('-')[1]
    
    return exam

def studentMarks(df):
    '''Assumes df as a Pandas DataFrame.
    
       Returns a tuple of tuples, with best exam and worst exam performances.'''
    
    #find all the exam names
    evals = []
    for exam in df.columns:
        if len(exam.split('-')) > 2:
            evals.append(exam)
    #initialise new attributes with their iterative location index to use the power of lambda functions.
    df['best'] = [i for i in range(len(df['RollNumber']))]
    df['worst'] = [i for i in range(len(df['RollNumber']))]
    df['bestExam'] = [i for i in range(len(df['RollNumber']))]
    df['worstExam'] = [i for i in range(len(df['RollNumber']))]

    #Find the max or min oerformance of the record.
    df['best'] = df['best'].apply(lambda x : max(df[evals].iloc[x]))
    df['worst'] = df['worst'].apply(lambda x : min(df[evals].iloc[x]))
    
    #Find the best or worst exam name.
    df['bestExam'] = df['bestExam'].apply(findBestExam)
    df['worstExam'] = df['worstExam'].apply(findWorstExam)
    
    #rounding off
    df['best'] = df['best'].apply(lambda x : round(x, 2))
    df['worst'] = df['worst'].apply(lambda x : round(x, 2))
    
    #Make new dataframe
    df1 = df[['RollNumber', 'best', 'worst', 'bestExam', 'worstExam']]

    #Coercion
    ret  = tuple([tuple(x) for x in df1.to_records(index=False)])
    
    return ret


In [3]:
tuples = np.random.randn(267, 14)
headers =  ['RollNumber', 'Name', 'exam-mid-35', 'exam-end-50', 'lab-basic01-20','lab-basic02-20','lab-basic03-20','asgn-basic01-15','asgn-basic02-15','asgn-basic03-15','asgn-basic04-15','oth-quiz01-30', 'oth-quiz02-30', 'oth-quiz03-30']
roll = [i for i in range(1, 268)]
max_marks = [1, 1, 35, 50, 20, 20, 20, 15, 15, 15, 15, 30, 30, 30]
df = pd.DataFrame(tuples)
for i in range(14):
    df[i] = df[i].apply(lambda x : int((x*100)%max_marks[i]))
df.columns = headers
df['RollNumber'] = roll
df['fraud'] = 0
df['fraud'] = df['oth-quiz03-30']

In [4]:
df.head()

Unnamed: 0,RollNumber,Name,exam-mid-35,exam-end-50,lab-basic01-20,lab-basic02-20,lab-basic03-20,asgn-basic01-15,asgn-basic02-15,asgn-basic03-15,asgn-basic04-15,oth-quiz01-30,oth-quiz02-30,oth-quiz03-30,fraud
0,1,0,4,5,7,19,2,12,3,14,7,5,8,18,18
1,2,0,16,40,17,2,17,12,4,2,8,13,9,19,19
2,3,0,32,46,14,0,14,4,2,13,8,5,9,27,27
3,4,0,28,3,8,16,3,6,6,6,4,4,19,13,13
4,5,0,19,36,18,8,2,8,5,7,12,19,18,16,16


In [5]:
df = scaleMarks(df)
df.head()

Unnamed: 0,RollNumber,Name,exam-mid-35,exam-end-50,lab-basic01-20,lab-basic02-20,lab-basic03-20,asgn-basic01-15,asgn-basic02-15,asgn-basic03-15,asgn-basic04-15,oth-quiz01-30,oth-quiz02-30,oth-quiz03-30,fraud
0,1,0,11.428571,10.0,35.0,95.0,10.0,80.0,20.0,93.333333,46.666667,16.666667,26.666667,60.0,18
1,2,0,45.714286,80.0,85.0,10.0,85.0,80.0,26.666667,13.333333,53.333333,43.333333,30.0,63.333333,19
2,3,0,91.428571,92.0,70.0,0.0,70.0,26.666667,13.333333,86.666667,53.333333,16.666667,30.0,90.0,27
3,4,0,80.0,6.0,40.0,80.0,15.0,40.0,40.0,40.0,26.666667,13.333333,63.333333,43.333333,13
4,5,0,54.285714,72.0,90.0,40.0,10.0,53.333333,33.333333,46.666667,80.0,63.333333,60.0,53.333333,16


In [6]:
df = createAvg(df)
df = createChMarks(df)
df = variance(df)

Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the documentation here:
https://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike


In [7]:
df.head()

Unnamed: 0,RollNumber,Name,exam-mid-35,exam-end-50,lab-basic01-20,lab-basic02-20,lab-basic03-20,asgn-basic01-15,asgn-basic02-15,asgn-basic03-15,...,oth-quiz02-30,oth-quiz03-30,fraud,overall,avgExam,avgLab,avgAsgn,avgOth,ChMarks,var
0,1,0,11.428571,10.0,35.0,95.0,10.0,80.0,20.0,93.333333,...,26.666667,60.0,18,28.801587,21.428571,140.0,240.0,103.333333,88.253968,760.910122
1,2,0,45.714286,80.0,85.0,10.0,85.0,80.0,26.666667,13.333333,...,30.0,63.333333,19,58.31746,125.714286,180.0,173.333333,136.666667,147.460317,1181.992098
2,3,0,91.428571,92.0,70.0,0.0,70.0,26.666667,13.333333,86.666667,...,30.0,90.0,27,68.912698,183.428571,140.0,180.0,136.666667,153.365079,583.686027
3,4,0,80.0,6.0,40.0,80.0,15.0,40.0,40.0,40.0,...,63.333333,43.333333,13,42.666667,86.0,135.0,146.666667,120.0,113.666667,457.022676
4,5,0,54.285714,72.0,90.0,40.0,10.0,53.333333,33.333333,46.666667,...,60.0,53.333333,16,56.793651,126.285714,140.0,213.333333,176.666667,147.650794,786.584811


In [8]:
CourseStats(df)

('MODERATE',
 'LOW',
 [264, 221, 140, 202, 248],
 '45.89-48.7',
 [38.62, 47.2, 55.5])

In [9]:
df.head()

Unnamed: 0,RollNumber,Name,exam-mid-35,exam-end-50,lab-basic01-20,lab-basic02-20,lab-basic03-20,asgn-basic01-15,asgn-basic02-15,asgn-basic03-15,...,oth-quiz03-30,fraud,overall,avgExam,avgLab,avgAsgn,avgOth,ChMarks,var,cheatflagged
0,1,0,11.428571,10.0,35.0,95.0,10.0,80.0,20.0,93.333333,...,60.0,18,28.801587,21.428571,140.0,240.0,103.333333,88.253968,760.910122,151.746032
1,2,0,45.714286,80.0,85.0,10.0,85.0,80.0,26.666667,13.333333,...,63.333333,19,58.31746,125.714286,180.0,173.333333,136.666667,147.460317,1181.992098,25.873016
2,3,0,91.428571,92.0,70.0,0.0,70.0,26.666667,13.333333,86.666667,...,90.0,27,68.912698,183.428571,140.0,180.0,136.666667,153.365079,583.686027,26.634921
3,4,0,80.0,6.0,40.0,80.0,15.0,40.0,40.0,40.0,...,43.333333,13,42.666667,86.0,135.0,146.666667,120.0,113.666667,457.022676,33.0
4,5,0,54.285714,72.0,90.0,40.0,10.0,53.333333,33.333333,46.666667,...,53.333333,16,56.793651,126.285714,140.0,213.333333,176.666667,147.650794,786.584811,65.68254


In [10]:
ExamStats(df)

('MODERATE', 'HIGH', [25, 26, 22, 1, 31], '47.34-54.09', [28.33, 53.33, 73.33])

In [11]:
PersistentLabels(df)

([],
 [],
 [1,
  2,
  3,
  4,
  5,
  6,
  7,
  8,
  9,
  10,
  11,
  12,
  13,
  14,
  15,
  16,
  17,
  18,
  19,
  20,
  21,
  22,
  23,
  24,
  25,
  26,
  27,
  28,
  29,
  30,
  31,
  32,
  33,
  34,
  35,
  36,
  37,
  38,
  39,
  40,
  41,
  42,
  43,
  44,
  45,
  46,
  47,
  48,
  49,
  50,
  51,
  52,
  53,
  54,
  55,
  56,
  57,
  58,
  59,
  60,
  61,
  62,
  63,
  64,
  65,
  66,
  67,
  68,
  69,
  70,
  71,
  72,
  73,
  74,
  75,
  76,
  77,
  78,
  79,
  80,
  81,
  82,
  83,
  84,
  85,
  86,
  87,
  88,
  89,
  90,
  91,
  92,
  93,
  94,
  95,
  96,
  97,
  98,
  99,
  100,
  101,
  102,
  103,
  104,
  105,
  106,
  107,
  108,
  109,
  110,
  111,
  112,
  113,
  114,
  115,
  116,
  117,
  118,
  119,
  120,
  121,
  122,
  123,
  124,
  125,
  126,
  127,
  128,
  129,
  130,
  131,
  132,
  133,
  134,
  135,
  136,
  137,
  138,
  139,
  140,
  141,
  142,
  143,
  144,
  145,
  146,
  147,
  148,
  149,
  150,
  151,
  152,
  153,
  154,
  155,
  156,
  157,

In [12]:
PerformanceLabels(df)

([],
 [2,
  3,
  5,
  9,
  10,
  11,
  12,
  15,
  17,
  19,
  22,
  23,
  24,
  25,
  29,
  30,
  32,
  33,
  37,
  40,
  49,
  50,
  51,
  54,
  56,
  57,
  61,
  62,
  64,
  65,
  67,
  68,
  69,
  70,
  71,
  72,
  73,
  80,
  81,
  83,
  87,
  88,
  89,
  90,
  92,
  93,
  95,
  97,
  101,
  104,
  108,
  109,
  110,
  115,
  117,
  119,
  120,
  121,
  123,
  126,
  128,
  133,
  135,
  136,
  138,
  140,
  141,
  144,
  146,
  147,
  156,
  157,
  160,
  162,
  164,
  165,
  166,
  172,
  176,
  178,
  181,
  189,
  191,
  196,
  201,
  202,
  204,
  207,
  208,
  209,
  212,
  213,
  223,
  227,
  228,
  232,
  233,
  234,
  236,
  245,
  249,
  250,
  254,
  256,
  257,
  258,
  260,
  263,
  265,
  266],
 [4,
  6,
  7,
  8,
  13,
  14,
  16,
  18,
  20,
  21,
  26,
  27,
  28,
  31,
  35,
  36,
  38,
  39,
  42,
  44,
  45,
  46,
  47,
  48,
  52,
  53,
  55,
  58,
  59,
  60,
  63,
  66,
  74,
  75,
  76,
  78,
  79,
  84,
  85,
  86,
  94,
  98,
  99,
  100,
  102,
  103,
 

In [13]:
mainFunc(df)

[50, 86, 75, 177, 193]

In [14]:
df.head()

Unnamed: 0,RollNumber,Name,exam-mid-35,exam-end-50,lab-basic01-20,lab-basic02-20,lab-basic03-20,asgn-basic01-15,asgn-basic02-15,asgn-basic03-15,...,fraud,overall,avgExam,avgLab,avgAsgn,avgOth,ChMarks,var,cheatflagged,temp
0,1,0,11.428571,10.0,35.0,95.0,10.0,80.0,20.0,93.333333,...,8,28.801587,21.428571,140.0,240.0,103.333333,88.253968,760.910122,151.746032,760.944842
1,2,0,45.714286,80.0,85.0,10.0,85.0,80.0,26.666667,13.333333,...,9,58.31746,125.714286,180.0,173.333333,136.666667,147.460317,1181.992098,25.873016,1182.009245
2,3,0,91.428571,92.0,70.0,0.0,70.0,26.666667,13.333333,86.666667,...,7,68.912698,183.428571,140.0,180.0,136.666667,153.365079,583.686027,26.634921,583.700538
3,4,0,80.0,6.0,40.0,80.0,15.0,40.0,40.0,40.0,...,3,42.666667,86.0,135.0,146.666667,120.0,113.666667,457.022676,33.0,457.046113
4,5,0,54.285714,72.0,90.0,40.0,10.0,53.333333,33.333333,46.666667,...,6,56.793651,126.285714,140.0,213.333333,176.666667,147.650794,786.584811,65.68254,786.602418


In [15]:
getRankMatrix(df)

((1, 255, 263, 139, 47, 223),
 (2, 46, 63, 59, 162, 152),
 (3, 9, 5, 134, 143, 154),
 (4, 174, 166, 142, 210, 192),
 (5, 56, 61, 128, 80, 73),
 (6, 114, 99, 143, 151, 179),
 (7, 194, 201, 60, 255, 238),
 (8, 197, 192, 120, 167, 257),
 (9, 41, 10, 243, 188, 76),
 (10, 50, 19, 229, 129, 116),
 (11, 61, 101, 81, 18, 69),
 (12, 55, 23, 213, 144, 122),
 (13, 176, 193, 78, 42, 264),
 (14, 208, 108, 261, 139, 256),
 (15, 66, 35, 160, 261, 103),
 (16, 248, 181, 260, 265, 155),
 (17, 34, 80, 21, 40, 164),
 (18, 159, 157, 149, 157, 109),
 (19, 92, 116, 51, 88, 216),
 (20, 213, 191, 221, 52, 233),
 (21, 170, 173, 89, 212, 243),
 (22, 99, 102, 87, 37, 239),
 (23, 21, 8, 189, 64, 146),
 (24, 72, 45, 150, 125, 210),
 (25, 58, 127, 17, 8, 196),
 (26, 247, 199, 218, 218, 267),
 (27, 210, 202, 246, 7, 24),
 (28, 117, 171, 15, 214, 234),
 (29, 88, 95, 98, 193, 46),
 (30, 39, 53, 75, 182, 37),
 (31, 166, 106, 228, 211, 141),
 (32, 16, 4, 211, 14, 205),
 (33, 108, 155, 88, 72, 31),
 (34, 257, 267, 166, 11

In [16]:
studentMarks(df)

((1, 95.0, 10.0, 'basic02', 'basic03'),
 (2, 85.0, 10.0, 'basic03', 'basic02'),
 (3, 92.0, 0.0, 'end', 'basic02'),
 (4, 80.0, 6.0, 'basic02', 'end'),
 (5, 90.0, 10.0, 'basic01', 'basic03'),
 (6, 86.67, 0.0, 'quiz02', 'quiz03'),
 (7, 85.0, 0.0, 'basic02', 'quiz01'),
 (8, 93.33, 0.0, 'basic04', 'basic01'),
 (9, 94.29, 0.0, 'mid', 'basic02'),
 (10, 93.33, 3.33, 'quiz03', 'quiz01'),
 (11, 90.0, 33.33, 'basic03', 'basic01'),
 (12, 86.67, 6.67, 'basic02', 'quiz03'),
 (13, 90.0, 0.0, 'basic01', 'quiz03'),
 (14, 86.67, 0.0, 'basic02', 'basic01'),
 (15, 90.0, 0.0, 'quiz02', 'basic01'),
 (16, 80.0, 0.0, 'quiz01', 'basic04'),
 (17, 97.14, 13.33, 'mid', 'basic04'),
 (18, 90.0, 15.0, 'quiz01', 'basic01'),
 (19, 93.33, 10.0, 'basic04', 'quiz01'),
 (20, 73.33, 10.0, 'quiz01', 'quiz02'),
 (21, 93.33, 0.0, 'basic02', 'basic03'),
 (22, 86.67, 3.33, 'basic04', 'quiz01'),
 (23, 90.0, 20.0, 'end', 'basic01'),
 (24, 86.67, 0.0, 'basic02', 'quiz01'),
 (25, 95.0, 6.0, 'basic01', 'end'),
 (26, 65.71, 0.0, 'mid

In [17]:
ExamDetails(df)

[['mid', '44.93-51.91', 97.14],
 ['end', '43.92-50.94', 98.0],
 ['basic01', '43.05-50.1', 95.0],
 ['basic02', '42.87-49.9', 95.0],
 ['basic03', '40.83-47.78', 95.0],
 ['basic01', '43.46-50.63', 93.33],
 ['basic02', '44.91-51.72', 93.33],
 ['basic03', '43.38-50.6', 93.33],
 ['basic04', '42.0-48.99', 93.33],
 ['quiz01', '43.39-50.39', 96.67],
 ['quiz02', '46.31-53.21', 96.67],
 ['quiz03', '47.34-54.09', 96.67]]

In [18]:
import requests
import imaplib
import string
from bs4 import BeautifulSoup
from urllib.parse import urlparse

def _request(method, url, session=None, **kwargs):
    headers = kwargs.get("headers") or dict()
    headers.update(requests.utils.default_headers())
    headers["User-Agent"] = "AppleWebKit/537.36 (KHTML, like Gecko) " \
    						#"Mozilla/5.0 (X11; Linux x86_64) " \
                            
                            #"Chrome/56.0.2924.87 Safari/537.36"
    kwargs["headers"] = headers
    if session:
        return session.request(method, url, **kwargs)
    else:
        return requests.request(method, url, **kwargs)

def _get(url, session=None, **kwargs):
    return _request('get', url, session=session, **kwargs)

def _post(url, session=None, **kwargs):
    return _request('post', url, session=session, **kwargs)

def _check_google(username, email, pw):
    with requests.Session() as session:
        r = _get("https://accounts.google.com/ServiceLogin", session=session)
        soup = BeautifulSoup(r.text, "html.parser")
        hidden_inputs = soup.find_all("input", type="hidden")
        data = {}
        for i in hidden_inputs:
            data.update({i.get('name', ''): i.get('value', '')})
        data.update({'checkConnection': 'youtube'})
        data.update({'Email': email})
        data.update({'Passwd': pw})
        r = _post("https://accounts.google.com/signin/challenge/sl/password",
                  data=data, session=session)

        i = imaplib.IMAP4_SSL('imap.gmail.com')
        try:
            i.login(email, pw)
            var =  True
        except:
            var = False
         
        return var

def _check_twitter(username, email, pw):
    with requests.Session() as session:
        r = _get("https://mobile.twitter.com/login", session=session)
        tk = session.cookies.get("_mb_tk")
        if not tk or r.status_code != 200:
            r = _get("https://mobile.twitter.com/i/nojs_router?path=%2Flogin", session=session)
            r = _get("https://mobile.twitter.com/login", session=session)
            tk = session.cookies.get("_mb_tk")
        if not tk or r.status_code != 200:
            return False
        r = _post("https://mobile.twitter.com/sessions", data={
            "authenticity_token": tk,
            "session[username_or_email]": username,
            "session[password]": pw,
            "remember_me": 0,
            "wfa": 1,
            "redirect_after_login": "/home"
        }, session=session)
        url = urlparse(r.url)
        return url.path != "/login/error"

def _check_github(username, email, pw):
    with requests.Session() as session:
        r = _get("https://github.com/login", session=session)
        soup = BeautifulSoup(r.text, "html.parser")
        i = soup.select_one("input[name='authenticity_token']")
        token = i["value"]
        r = _post("https://github.com/session", session=session, data={
            "utf8": "✓",
            "commit": "Sign in",
            "authenticity_token": token,
            "login": username,
            "password": pw,
        })
        url = urlparse(r.url)
        return url.path != "/session" and url.path != "/login"

def _check_fb(username, email, pw):
    with requests.Session() as session:
        r = _get("https://www.facebook.com", session=session)
        if r.status_code != 200:
            return False
        r = _post("https://www.facebook.com/login.php?login_attempt=1&lwv=100", data={
            "email": email,
            "pass": pw,
            "legacy_return": 0,
            "timezone": 480,
        }, session=session)
        url = urlparse(r.url)
        return url.path != "/login.php"

def _check_hn(username, email, pw):
    r = _post("https://news.ycombinator.com", data={
        "goto": "news",
        "acct": username,
        "pw": pw
    }, allow_redirects=False)
    return "Bad login" not in r.text

checks = {
    "Twitter": _check_twitter,
    "Facebook": _check_fb,
    "GitHub": _check_github,
    "Hacker News": _check_hn,
    "Google": _check_google
}

def check_pass(pw, email, username):
    errors = list()
    username = username or email
    for check in checks:
        try:
            if checks[check](username, email, pw):
                errors.append("Your password must not be the same as your {} password".format(check))
        except:
            pass
    return errors
    
print(check_pass('123456', 'tanay.r17@iiits.in', 'carbon_c60'))




[]
