In [None]:
# To be run early on results day.
# Will take upto an hour to generate transcripts for all students.

In [None]:
import os.path
import numpy as np
import pandas as pd
import requests
from bs4 import BeautifulSoup
from weasyprint import HTML, CSS
import yaml
import datetime
from concurrent.futures import ThreadPoolExecutor

In [None]:
credentials = '/Users/spb/.uon_credentials.yaml'

In [None]:
# this is a OneDrive folder for sharing with tutors
#outpath = '/Users/spb/The University of Nottingham/Physics_Tutors - Documents/Transcripts/'
outpath = '/Users/spb/tmp/local_transcripts/'

In [None]:
loginurl = 'https://bluecastle-uk-results.nottingham.ac.uk/login'
studenturl = 'https://bluecastle-uk-results.nottingham.ac.uk/Administration/StudentView/StudentView'

In [None]:
style = CSS(string='''
table  {border-collapse: collapse;
        page-break-inside: avoid;
        page-break-before: avoid;}
td, th {padding-left: 10px;}
h1, h2, h3 {margin-top: 4px;
            margin-bottom: 8px;}
p {margin-top: 10px;
   margin-bottom: 4px;}
@page {margin: 1.5cm 1.5cm 1.5cm 1.5cm;}
.covidIncomplete {color: grey;}
''')

In [None]:
fn = '/Users/spb/OneDrive - The University of Nottingham/CovidExams/COVID_incomplete modules.xlsx'
df = pd.read_excel(fn)
covid_modules = df['Subject'].str.strip() + ' ' + df['Catalog Number'].str.strip()
covid_modules = list(covid_modules)

In [None]:
cred = yaml.safe_load(open(credentials))
username = cred['username']
password = cred['password']

In [None]:
def transcript_from_response(response, tutor, year, stop_on_status="May 2020"):
    soup = BeautifulSoup(response.content, features="html5lib")
    marks = soup.find('h2', text='My Marks').parent
    info = marks.p.contents
    name = info[10].strip() + ' ' + info[6].strip()
    marks.find('p', attrs={'class': 'transcript-print-view'}).decompose()
    marks.find('div', attrs={'class': 'disclaimer'}).decompose()
    marks.find('h2', text='My Marks').decompose()
    if stop_on_status is not None:
        status = marks.find_all('p')[1]
        if stop_on_status in status.text:
            return False
        status.decompose()
    tag = soup.new_tag("h1")
    tag.string = name
    marks.insert(0, tag)
    tag = soup.new_tag("h2")
    date = datetime.date.today().strftime("%-d %B %Y")
    tag.string = f"Provisional marks transcript – {date}"
    marks.insert(1, tag)
    tag = soup.new_tag("h3")
    tag.string = f"Year {year}"
    marks.insert(2, tag)
    table = marks.find('table', attrs={'class': 'gridTable transcript'})
    tds = table.find_all('td', attrs={'class': 'grid-moduleCode'})
    for td in tds:
        if td.string.strip() in covid_modules:
            td.parent.attrs['class'].append('covidIncomplete')
            grade = td.find_all('td')[4]
            grade.string.replace_with(' – ')
    filename = f"{name.replace(' ', '_')}.pdf"
    if year not in 'MSc':
        year = f"year{year}"
    tutor_year_path = os.path.join(outpath, tutor, year)
    os.makedirs(tutor_year_path, exist_ok=True)
    filename = os.path.join(tutor_year_path, filename)
    HTML(string=str(marks)).write_pdf(filename, stylesheets=[style])

In [None]:
# Individual test

#loginpayload = {'UserName': username,
#                'Password': password}
#
#with requests.Session() as s:
#    s.post(loginurl, data=loginpayload)
#    sid = '14324429'
#    studentpayload = {'selectedStudent': sid}
#    r = s.post(studenturl, data=studentpayload)

In [None]:
def get_students_old():
    fn = '/Users/spb/The University of Nottingham/Physics_Tutors - Documents/General/Tutor_List_070220.xlsx'
    df = pd.read_excel(fn)
    df.columns = df.columns.str.strip()
    sids = df['Student Id']
    # switch to old IDs:
    #sids = np.where((sids > 10000000) & (sids < 20000000), sids - 10000000, sids)
    df['Student Id'] = sids.astype('str')
    # fix strange year = "2*" entry
    df['Year on Course'].update(df['Year on Course'].str.replace('*', ''))
    df['Year on Course'] = df['Year on Course'].astype('int')
    df = df.set_index('Student Id')
    df = df.drop_duplicates()
    df = df.sort_values('Year on Course', ascending=False)
    return df

In [None]:
def get_students():
    fn = '/Users/spb/The University of Nottingham/Physics_Tutors - Documents/General/tutee_remote_contact.xlsx'
    df = pd.read_excel(fn, skiprows=6)
    df.columns = df.columns.str.strip()
    sids = df['Student ID']
    # switch to old IDs:
    #sids = np.where((sids > 10000000) & (sids < 20000000), sids - 10000000, sids)
    df['Student ID'] = sids.astype('str')
    # fix strange year = "2*" entry
    df['Year on Course'] = df['Year'].astype('str').str.replace('*', '')
    df = df.set_index('Student ID')
    df = df.drop_duplicates()
    df = df.sort_values('Year on Course', ascending=False)
    return df

In [None]:
df = get_students()

In [None]:
if df.index.duplicated().any():
    print('Warning: there are duplicate Student IDs.')

In [None]:
# For testing full system on small sample:
df = df[df['Year on Course'] == '1']
df = df.sample(10)

In [None]:
loginpayload = {'UserName': username,
                'Password': password}

In [None]:
def create_transcript(sid, response, session):
    tutor = df.loc[sid, 'Tutor']
    year = df.loc[sid, 'Year on Course']
    student = f"{sid} {df.loc[sid, 'First name(s)']} {df.loc[sid, 'Surname']}"    
    try:
        transcript_from_response(response, tutor, year, None)
        success = True
    except:
        success = False
    return (student, success)

In [None]:
def fetch_and_create_transcripts(student_ids):
    results = []
    with requests.Session() as s:
        s.post(loginurl, data=loginpayload)
        for sid in student_ids:
            studentpayload = {'selectedStudent': sid}
            response = s.post(studenturl, data=studentpayload)
            results.append(create_transcript(sid, response, session=s))
    return results

In [None]:
n = 4
count = 0
subidx = np.array_split(df.index, n)
with ThreadPoolExecutor(n) as executor:
    results = executor.map(fetch_and_create_transcripts, subidx)
    for res in results:
        for student, success in res:
            if success:
                count += 1
            if not success:
                status = "not found on BlueCastle"
                print(f"{student}: {status}")
print(f"Successfully created {count} transcripts")