In [None]:
import numpy as np
import pandas as pd
import random

In [None]:
def makeDF(tuples, header):
    '''Assumes tuples as Python tuples both empty or non empty; header as a tuple with a convention
       as (RollNumber, Name, Exam-[name]-[max-marks], ..., Lab-[name]-[max-marks], ...,
       Asgn-[name]-[max-marks], ..., Oth-[name]-[max-marks],)

       Returns a Pandas DataFrame with all NULL values replaced by Cipher, and adds a fraud column
       for figuring out cheating factor for later functions.'''

    # make rows, column IDs and marks as a list for DataFrame initialization

    row_index = [int(i) for i in range(1, len(tuples) + 1)]
    col_index = list(header)
    values = list(tuples)

    # DataFrame initialisation

    df = pd.DataFrame(tuples, row_index, col_index)

    # Handling of NULLs

    for col in list(df.columns):
        df[col] = df[col].fillna(value=0)

    # Make a copy of last given exams marks

    df['fraud'] = 0
    df['fraud'] = df[df.columns[-2]]

    return df


def scaleMarks(df):
    '''Assumes df as a Pandas DataFrame.

       Returns a Pandas DataFrame with marks scaled up according to the max-marks defined in the
       column headings'''

    # iterate through all columns and scale marks using apply() attribute of DataFrames

    for exam in list(df.columns):
        if len(exam.split('-')) > 2:
            df[exam] = df[exam].apply(lambda x: x * 100 \
                                                / int(exam.split('-')[2]))

    return df


def createAvg(marks):
    '''Assumes marks as a Pandas DataFrame.

       Returns a DataFrame with added columns for overall weighted average, and individual exam,
       lab, assignments and other evaluations average'''

    # initilaize columns as zero

    marks['overall'] = 0
    marks['avgExam'] = 0
    marks['avgLab'] = 0
    marks['avgAsgn'] = 0
    marks['avgOth'] = 0

    # initialize count variables as zero

    exams = 0
    lab = 0
    asgn = 0
    oth = 0

    # iterate through the column list, filter and sum based on '-' as the additional columns do not have a '-'

    for exam in list(marks.columns):
        if exam.lower().startswith('exam'):
            marks['avgExam'] += marks[exam]
            exams += 1
        elif exam.lower().startswith('lab'):

            marks['avgLab'] += marks[exam]
            lab += 1
        elif exam.lower().startswith('asgn'):

            marks['avgAsgn'] += marks[exam]
            asgn += 1
        elif exam.lower().startswith('oth'):

            marks['avgOth'] += marks[exam]
            oth += 1
        else:

            continue

    # weight and scale marks and divide by total number of instances of similar type counted.
    # Weights based on the strictness  and students' interest in overall exam process

    marks['overall'] = 0.5 * marks['avgExam'] / exams + 0.3 \
                       * marks['avgLab'] / lab + 0.1 * marks['avgAsgn'] / asgn + 0.1 \
                       * marks['avgOth'] / oth
    
    marks['avgExam'] = marks['avgExam'].apply(lambda x : int(x/exams))
    marks['avgLab'] = marks['avgLab'].apply(lambda x : int(x/lab))
    marks['avgAsgn'] = marks['avgAsgn'].apply(lambda x : int(x/asgn))
    marks['avgOth'] = marks['avgOth'].apply(lambda x : int(x/oth))
                          

    return marks


def createChMarks(marks):
    '''Assumes marks as a Pandas DataFrame.

       Returns a DataFrame with added column ChMarks which would be used further for overall cheating status'''

    # Not included marks for Assignments as they are done by students AT HOME

    marks['ChMarks'] = (marks['avgExam'] + marks['avgLab']
                        + marks['avgOth']) / 3
    return marks


def variance(df):
    '''Assumes df as a Pandas DataFrame.

       Returns the same DataFrame with added column for variance which has variance for all scores for a particular
       student'''

    # Figure out first the columns to be considered for variance calculation. Used '-' as an identifier again

    ls = list(df.columns)
    buffer = []
    for i in range(len(ls)):
        if len(ls[i].split('-')) > 2:
            buffer.append(ls[i])
        else:
            continue

    # initialise column var with iteration based indices so as to use the power of apply() attribute

    df['var'] = [int(i) for i in range(len(df[df.columns[0]]))]

    # make a dummy row index for slicing DataFrame for calculation

    row_index = [int(i) for i in range(1, 1 + len(df[df.columns[0]]))]

    # use the value in var as an indirect reference for the whole row and use the describe() attribute to get std

    df['var'] = df['var'].apply(lambda x: df.loc[row_index,
                                                 buffer].iloc[x].describe()['std'] ** 2)

    return df


def CI(marks, column):
    '''Assumes marks as a Pandas DataFrame and column and a string.

       Returns the 95% confidence interval for the given data as a tuple with entries as (low, high)'''

    column = str(column)

    # CI = mean +- 2*std_error; std_error = std_deviation/sqrt(total observations)

    std_error = marks[column].describe()['std'] / len(marks['avgExam']) \
                ** 0.5
    mean = marks[column].describe()['mean']

    return (mean - 2 * std_error, mean + 2 * std_error)


def width(tup):
    '''Assumes tup as tuple.

       Returns an integer as the difference of 2nd and 1st values of tuple'''

    return tup[1] - tup[0]


def CourseStats(marks):
    '''Assumes marks as a Pandas DataFrame.

       Returns a tuple with values as : (course_difficulty, cheat_risk, list(cheat_flagged),
                                         avg_marks, quartile1, quartile2, quartile3,)

       course_difficulty (str) : HIGH/MODERATE/EASY based on the weighted average and cut-off marks
       cheat_risk (str) : HIGH/MODERATE/LOW based on the spread of Assignment and Other Exam marks
       cheat_flagged (list) : A list of 5 RollNumbers who we believe with some confidence are
                              indulged in academic malpractices in the class as a whole.
       avg_marks (str) : A range of marks where the most of students lie in between.
       quartile1, quartile2, quartile3 (int) : The stastical quartile scores for the overall analysis.'''

    # Calculate course difficulty based on 3rd Quartile scores of students.

    marker = marks['overall'].describe()['75%']
    if marker > 0 and marker < 40:
        course_difficulty = 'HIGH'
    elif marker > 40 and marker < 75:
        course_difficulty = 'MODERATE'
    else:
        course_difficulty = 'EASY'

    # Calculate the probability of cheating based on the width of assignment scores and other marks combined

    cheatProb = 1 - width(CI(marks, 'avgAsgn')) / width(CI(marks,
                                                           'ChMarks'))
    if cheatProb > 0.7 and cheatProb < 1:
        cheat_risk = 'HIGH'
    elif cheatProb > 0.4 and cheatProb < 0.7:
        cheat_risk = 'MODERATE'
    else:
        cheat_risk = 'LOW'

    # Flag out top 5 students whose overall scores and assignment socres tell two different stories

    marks['cheatflagged'] = 0

    marks['cheatflagged'] = marks['avgAsgn'] - marks['ChMarks']
    cheat_flagged = marks.sort_values('cheatflagged', ascending=False)['RollNumber'].iloc[1:6]

    # Calculate the range of marks for most students
    avg_marks = str(round(CI(marks, 'overall')[0], 2)) + '-' + str(round(CI(marks, 'overall')[1], 2))

    # Calculate quartile scores for weighted marks

    quartile1 = round(marks['overall'].describe()['25%'], 2)
    quartile2 = round(marks['overall'].describe()['50%'], 2)
    quartile3 = round(marks['overall'].describe()['75%'], 2)

    return (
        course_difficulty,
        cheat_risk,
        list(cheat_flagged),
        avg_marks,
        [quartile1,
         quartile2,
         quartile3]
    )


def ExamStats(marks):
    '''Assumes marks as a Pandas DataFrame.

       Returns a tuple with values as : (exam_difficulty, cheat_risk, list(cheat_flagged),
                                         avg_marks, quartile1, quartile2, quartile3,)

       exam_difficulty (str) : HIGH/MODERATE/EASY based on the exam performance
       cheat_risk (str) : HIGH/MODERATE/LOW based on the unevenness in marks
       cheat_flagged (list) : A list of 5 RollNumbers who we believe with some confidence should
                              be re-evaluated
       avg_marks (str) : A range of marks where the most of students lie in between.
       quartile1, quartile2, quartile3 (int) : The stastical quartile scores for the overall analysis.'''

    # Figure out the name of last exam and store it in location

    temp = list(marks.columns)
    count = 1
    for i in range(len(temp)):
        if len(temp[i].split('-')) > 2:
            count += 1
    location = temp[count]

    # Calculate the difficulty based on 2nd quartile cut-offs

    marker = marks[location].describe()['50%']
    if marker > 0 and marker < 40:
        exam_difficulty = 'HIGH'
    elif marker > 40 and marker < 75:
        exam_difficulty = 'MODERATE'
    else:
        exam_difficulty = 'EASY'

    # Build the frequency table for digit occurences, add the numbers not present in DataFrame with zero occurence

    freq_df = marks['fraud'].apply(lambda x: int(x % 10)).value_counts()

    for i in range(10):
        try:
            if freq_df.loc[i] >= 0:
                continue
        except:
            freq_df.loc[i] = 0

    # Calculate the variance of the same Dataframe and figure out cheating risk

    cheat_var = freq_df.describe()['std'] ** 2
    if cheat_var < 15:
        cheat_risk = 'LOW'
    if cheat_var > 15 and cheat_var < 80:
        cheat_risk = 'MODERATE'
    else:
        cheat_risk = 'HIGH'

    # Find the number with most occurences, sample 5 random roll numbers with that number for re-evaluation

    max_repeat = freq_df.index[0]
    marks['fraud'] = marks['fraud'].apply(lambda x: int(x % 10))
    suspicious = marks[marks['fraud'] == max_repeat]['fraud']
    if len(suspicious) > 5:
        check_sheets_index = random.sample(list(range(0, len(suspicious))), 5)
        cheat_flagged = []
        for index in check_sheets_index:
            cheat_flagged.append(marks['RollNumber'].iloc[index])
    else:
        cheat_flagged = []

    # Calculate the range of marks for most students

    avg_marks = str(round(CI(marks, location)[0], 2)) + '-' + str(round(CI(marks, location)[1],2))

    # Calculate quartile scores for exam marks

    quartile1 = round(marks[location].describe()['25%'], 2)
    quartile2 = round(marks[location].describe()['50%'], 2)
    quartile3 = round(marks[location].describe()['75%'], 2)

    return (
        exam_difficulty,
        cheat_risk,
        cheat_flagged,
        avg_marks,
        [quartile1,
         quartile2,
         quartile3],
    )


def PersistentLabels(df):
    '''Assumes df as a Pandas DataFrame.

       Returns a tuple with values as (consistent, moderately_varying, highly_varying,)

       consistent (list) : RollNumbers have almost no variation in their marks obtained so far.
       moderately_varying (list) : RollNumbers have some variation in their marks obtained so far.
       highly_varying (list) : RollNumbers have a high variation in their marks obtained so far.'''

    # calculate and filter the roll number list

    consistent = list(df[df['var'] < 30]['RollNumber'])
    moderately_varying = list(df[(df['var'] > 30) & (df['var']
                                                     < 150)]['RollNumber'])
    highly_varying = list(df[df['var'] > 150]['RollNumber'])

    return (consistent, moderately_varying, highly_varying)


def PerformanceLabels(df):
    '''Assumes df as a Pandas DataFrame.

       Returns a tuple with values as (exceptional, promising, average, needy,)

       exceptional (list) : RollNumbers with really good performance overall.
       promising (list) : RollNumbers who can be pushed to top with a little efforts.
       average (list) : RollNumbers who are just a few steps from failing marks and need some attention.
       needy (list) : RollNumbers who are in an immediate need of attention.'''

    # Calculate and filter the roll number list

    exceptional = list(df[df['overall'] > 85]['RollNumber'])
    promising = list(df[(df['overall'] < 85) & (df['overall']
                                                > 50)]['RollNumber'])
    average = list(df[(df['overall'] < 50) & (df['overall']
                                              > 30)]['RollNumber'])
    needy = list(df[df['overall'] < 30]['RollNumber'])

    return (exceptional, promising, average, needy)


def mainFunc(df):
    '''Assumes df as a Pandas DataFrame.

       Returns the top needy students based on algo as a list.'''

    # initialise an empty column to save scores

    df['temp'] = 1 / df['overall'] + df['var']
    return list((df.sort_values('temp', ascending=False)['RollNumber'])[0:5])

def getRank(df, exam):
    '''Assumes df as a Pandas dataframe, amd exam as a string.
    
       Returns a dataframe with ranks according to roll number.'''
    
    #Sort values according to the particular exam on Roll number column. 
    examRank = df.sort_values(exam, ascending = False)['RollNumber']
    
    #initialise a dummy column later to be used as the rank.
    temp = [int(i) + 1 for i in range(len(df['RollNumber']))]
    
    #join the two columns in a dataframe and sort according to Roll number. 
    df1 = pd.DataFrame({exam : temp, 'RollNumber' : examRank})
    df1.sort_values('RollNumber', inplace=True)
    
    return df1

def BoxPlot(df):
    '''Assumes df as a Pandas DataFrame.

       Returns the scaled marks for visualization as a tuple.'''

    box = df[['avgExam', 'avgLab', 'avgAsgn', 'avgOth']]
    
    return tuple([tuple(x) for x in box.to_records(index=False)])

def getRankMatrix(df):
    '''Assumes df as a Pandas DataFrame.
    
        Returns a tuple of tuples with individual type of exam ranks.'''
    
    #find individual ranks for classes of exams
    df1 = getRank(df, 'avgExam')
    df2 = getRank(df, 'avgLab')
    df3 = getRank(df, 'avgAsgn')
    df4 = getRank(df, 'avgOth')
    df5 = getRank(df, 'overall')
    
    #make the combines dataframe
    temp = df['RollNumber']
    dfRank = pd.DataFrame({'RollNumber' : temp, 'ClassRank' : df5['overall'], 'ExamRank' : df1['avgExam'], 'LabRank' : df2['avgLab'], 'AsgnRank' : df3['avgAsgn'], 'OthRank' : df4['avgOth']})

    return tuple([tuple(x) for x in dfRank.to_records(index=False)])

def ExamDetails(df):
    '''Assumes df as a Pandas DataFrame.
    
    Returns a listof lists with individual exam analysis'''
    
    details = []
    
    #for all the exams entered, find CI, max marks and exam name.
    for exam in df.columns:
        if len(exam.split('-')) > 2:
            avgCI = str(round(CI(df, exam)[0], 2)) + '-' + str(round(CI(df, exam)[1], 2))
            examName = exam.split('-')[1]
            maxMarks = round(max(df[exam]), 2) 
            details.append([examName, avgCI, maxMarks])
    
    return details

def findBestExam(i):
    '''Assumes i as an int.
    
       Returns a string with the value as the exam with maximum marks in any Dataframe record'''
    
    #find exams
    evals = []
    for exam in df.columns:
        if len(exam.split('-')) > 2:
            evals.append(exam)
    
    #Boolean Series with True at the desired location
    check = df[evals].iloc[i] == df.iloc[i]['best']
    
    #list of all conducted exam columns
    temp = df[evals].columns
    
    #find the name of exam, as per the faculty
    for seek in range(len(temp)):
        if check[seek] == True:
            exam = temp[seek].split('-')[1]
    
    return exam

def findWorstExam(i):
    '''Assumes i as an int.
    
       Returns a string with the value as the exam with minimum marks in any Dataframe record'''
    
    #find exams
    evals = []
    for exam in df.columns:
        if len(exam.split('-')) > 2:
            evals.append(exam)
    
    #Boolean Series with True at the desired location
    check = df[evals].iloc[i] == df.iloc[i]['worst']
    
    #list of all conducted exam columns
    temp = df[evals].columns
    
    #find the name of exam, as per the faculty
    for seek in range(len(temp)):
        if check[seek] == True:
            exam = temp[seek].split('-')[1]
    
    return exam

def studentMarks(df):
    '''Assumes df as a Pandas DataFrame.
    
       Returns a tuple of tuples, with best exam and worst exam performances.'''
    
    #find all the exam names
    evals = []
    for exam in df.columns:
        if len(exam.split('-')) > 2:
            evals.append(exam)
    #initialise new attributes with their iterative location index to use the power of lambda functions.
    df['best'] = [i for i in range(len(df['RollNumber']))]
    df['worst'] = [i for i in range(len(df['RollNumber']))]
    df['bestExam'] = [i for i in range(len(df['RollNumber']))]
    df['worstExam'] = [i for i in range(len(df['RollNumber']))]

    #Find the max or min oerformance of the record.
    df['best'] = df['best'].apply(lambda x : max(df[evals].iloc[x]))
    df['worst'] = df['worst'].apply(lambda x : min(df[evals].iloc[x]))
    
    #Find the best or worst exam name.
    df['bestExam'] = df['bestExam'].apply(findBestExam)
    df['worstExam'] = df['worstExam'].apply(findWorstExam)
    
    #rounding off
    df['best'] = df['best'].apply(lambda x : round(x, 2))
    df['worst'] = df['worst'].apply(lambda x : round(x, 2))
    
    #Make new dataframe
    df1 = df[['RollNumber', 'best', 'worst', 'bestExam', 'worstExam']]

    #Coercion
    ret  = tuple([tuple(x) for x in df1.to_records(index=False)])
    
    return ret


In [None]:
tuples = np.random.randn(267, 14)
headers =  ['RollNumber', 'Name', 'exam-mid-35', 'exam-end-50', 'lab-basic01-20','lab-basic02-20','lab-basic03-20','asgn-basic01-15','asgn-basic02-15','asgn-basic03-15','asgn-basic04-15','oth-quiz01-30', 'oth-quiz02-30', 'oth-quiz03-30']
roll = [i for i in range(1, 268)]
max_marks = [1, 1, 35, 50, 20, 20, 20, 15, 15, 15, 15, 30, 30, 30]
df = pd.DataFrame(tuples)
for i in range(14):
    df[i] = df[i].apply(lambda x : int((x*100)%max_marks[i]))
df.columns = headers
df['RollNumber'] = roll
df['fraud'] = 0
df['fraud'] = df['oth-quiz03-30']

In [None]:
df1 = pd.read_csv('test4.csv')
df1

In [None]:
df1.head()

In [None]:
df1 = scaleMarks(df1)
df1.head()

In [None]:
df1 = createAvg(df1)
df1
df1 = createChMarks(df1)
df1 = variance(df1)

In [None]:
df1

In [None]:
CourseStats(df)

In [None]:
df.head()

In [None]:
ExamStats(df)

In [None]:
PersistentLabels(df)

In [None]:
PerformanceLabels(df)

In [None]:
mainFunc(df)

In [None]:
df.head()

In [None]:
getRankMatrix(df)

In [None]:
studentMarks(df)

In [None]:
ExamDetails(df)

In [None]:
import requests
import imaplib
import string
from bs4 import BeautifulSoup
from urllib.parse import urlparse

def _request(method, url, session=None, **kwargs):
    headers = kwargs.get("headers") or dict()
    headers.update(requests.utils.default_headers())
    headers["User-Agent"] = "AppleWebKit/537.36 (KHTML, like Gecko) " \
    						#"Mozilla/5.0 (X11; Linux x86_64) " \
                            
                            #"Chrome/56.0.2924.87 Safari/537.36"
    kwargs["headers"] = headers
    if session:
        return session.request(method, url, **kwargs)
    else:
        return requests.request(method, url, **kwargs)

def _get(url, session=None, **kwargs):
    return _request('get', url, session=session, **kwargs)

def _post(url, session=None, **kwargs):
    return _request('post', url, session=session, **kwargs)

def _check_google(username, email, pw):
    with requests.Session() as session:
        r = _get("https://accounts.google.com/ServiceLogin", session=session)
        soup = BeautifulSoup(r.text, "html.parser")
        hidden_inputs = soup.find_all("input", type="hidden")
        data = {}
        for i in hidden_inputs:
            data.update({i.get('name', ''): i.get('value', '')})
        data.update({'checkConnection': 'youtube'})
        data.update({'Email': email})
        data.update({'Passwd': pw})
        r = _post("https://accounts.google.com/signin/challenge/sl/password",
                  data=data, session=session)

        i = imaplib.IMAP4_SSL('imap.gmail.com')
        try:
            i.login(email, pw)
            var =  True
        except:
            var = False
         
        return var

def _check_twitter(username, email, pw):
    with requests.Session() as session:
        r = _get("https://mobile.twitter.com/login", session=session)
        tk = session.cookies.get("_mb_tk")
        if not tk or r.status_code != 200:
            r = _get("https://mobile.twitter.com/i/nojs_router?path=%2Flogin", session=session)
            r = _get("https://mobile.twitter.com/login", session=session)
            tk = session.cookies.get("_mb_tk")
        if not tk or r.status_code != 200:
            return False
        r = _post("https://mobile.twitter.com/sessions", data={
            "authenticity_token": tk,
            "session[username_or_email]": username,
            "session[password]": pw,
            "remember_me": 0,
            "wfa": 1,
            "redirect_after_login": "/home"
        }, session=session)
        url = urlparse(r.url)
        return url.path != "/login/error"

def _check_github(username, email, pw):
    with requests.Session() as session:
        r = _get("https://github.com/login", session=session)
        soup = BeautifulSoup(r.text, "html.parser")
        i = soup.select_one("input[name='authenticity_token']")
        token = i["value"]
        r = _post("https://github.com/session", session=session, data={
            "utf8": "✓",
            "commit": "Sign in",
            "authenticity_token": token,
            "login": username,
            "password": pw,
        })
        url = urlparse(r.url)
        return url.path != "/session" and url.path != "/login"

def _check_fb(username, email, pw):
    with requests.Session() as session:
        r = _get("https://www.facebook.com", session=session)
        if r.status_code != 200:
            return False
        r = _post("https://www.facebook.com/login.php?login_attempt=1&lwv=100", data={
            "email": email,
            "pass": pw,
            "legacy_return": 0,
            "timezone": 480,
        }, session=session)
        url = urlparse(r.url)
        return url.path != "/login.php"

def _check_hn(username, email, pw):
    r = _post("https://news.ycombinator.com", data={
        "goto": "news",
        "acct": username,
        "pw": pw
    }, allow_redirects=False)
    return "Bad login" not in r.text

checks = {
    "Twitter": _check_twitter,
    "Facebook": _check_fb,
    "GitHub": _check_github,
    "Hacker News": _check_hn,
    "Google": _check_google
}

def check_pass(pw, email, username):
    errors = list()
    username = username or email
    for check in checks:
        try:
            if checks[check](username, email, pw):
                errors.append("Your password must not be the same as your {} password".format(check))
        except:
            pass
    return errors
    
print(check_pass('123456', 'tanay.r17@iiits.in', 'carbon_c60'))
