In [None]:
# To be run early on results day.
# Will take upto an hour to generate transcripts for all students.

In [None]:
import os.path
import numpy as np
import pandas as pd
import requests
from bs4 import BeautifulSoup
from weasyprint import HTML, CSS
import yaml
import datetime
from concurrent.futures import ThreadPoolExecutor
import logging
import sys

In [None]:
logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logger = logging.getLogger('transcripts')

In [None]:
credentials = '/Users/spb/.uon_credentials.yaml'

In [None]:
# this is a OneDrive folder for sharing with tutors
outpath = '/Users/spb/The University of Nottingham/Physics_Tutors - Documents/Transcripts/'
#outpath = '/Users/spb/tmp/local_transcripts/'

In [None]:
loginurl = 'https://bluecastle-uk-results.nottingham.ac.uk/login'
studenturl = 'https://bluecastle-uk-results.nottingham.ac.uk/Administration/StudentView/StudentView'

In [None]:
style = CSS(string='''
table  {border-collapse: collapse;
        page-break-inside: avoid;
        page-break-before: avoid;}
td, th {padding-left: 10px;}
h1, h3 {margin-top: 4px;
        margin-bottom: 8px;}
h2 {margin-top: 8px;
    margin-bottom: 8px;
    page-break-after: avoid;}            
p {margin-top: 12px;
   margin-bottom: 4px;}
@page {margin: 1.5cm 1.5cm 1.5cm 1.5cm;}
.covidIncomplete {color: grey;}
''')

In [None]:
try:
    fn = '/Users/spb/OneDrive - The University of Nottingham/CovidExams/COVID_incomplete modules.xlsx'
    df = pd.read_excel(fn)
    covid_modules = df['Subject'].str.strip() + ' ' + df['Catalog Number'].str.strip()
    covid_modules = list(covid_modules)
except:
    print('List of Covid modules not found')
    covid_modules = None

In [None]:
cred = yaml.safe_load(open(credentials))
username = cred['username']
password = cred['password']

In [None]:
def transcript_from_response(response, tutor, year, stop_on_status=None):
    soup = BeautifulSoup(response.content, features="html5lib")
    marks = soup.find('h2', text='My Marks').parent
    info = marks.p.contents
    name = info[10].strip() + ' ' + info[6].strip()
    marks.find('p', attrs={'class': 'transcript-print-view'}).decompose()
    marks.find('div', attrs={'class': 'disclaimer'}).decompose()
    marks.find('h2', text='My Marks').decompose()
    if stop_on_status is not None:
        status = marks.find_all('p')[1]
        if stop_on_status in status.text:
            return False
        status.decompose()
    tag = soup.new_tag("h1")
    tag.string = name
    marks.insert(0, tag)
    tag = soup.new_tag("h2")
    date = datetime.date.today().strftime("%-d %B %Y")
    tag.string = f"Provisional marks transcript – {date}"
    marks.insert(1, tag)
    tag = soup.new_tag("h3")
    tag.string = f"Year {year}"
    marks.insert(2, tag)
    try:
        table = marks.find('table', attrs={'class': 'gridTable transcript'})
        tds = table.find_all('td', attrs={'class': 'grid-moduleCode'})
        if covid_modules is not None:
            for td in tds:
                if td.string.strip() in covid_modules:
                    td.parent.attrs['class'].append('covidIncomplete')
                    grade = td.parent.find_all('td')[4]
                    grade.string.replace_with(' – ')
    except:
        pass
    try:
        award = soup.find('h2', text='My Award').parent
        qual = award.find('strong', text='Qualification Obtained:')
        if qual is None:
            raise Exception
        award.find('h2', text='My Award').decompose()
        for tag in award.find_all('p'):
            tag.decompose()
        for tag in award.find_all('div'):
            tag.decompose()
        tag = soup.new_tag("h2")
        tag.string = f"Provisional award"
        award.insert(0, tag)
        output = str(marks) + str(award)
    except:
        output = str(marks)
    filename = f"{name.replace(' ', '_')}.pdf"
    if year not in 'MSc':
        year = f"year{year}"
    tutor_year_path = os.path.join(outpath, tutor, year)
    os.makedirs(tutor_year_path, exist_ok=True)
    filename = os.path.join(tutor_year_path, filename)
    HTML(string=output).write_pdf(filename, stylesheets=[style])

In [None]:
# Individual test

#loginpayload = {'UserName': username,
#                'Password': password}
#
#with requests.Session() as s:
#    s.post(loginurl, data=loginpayload)
#    sid = 'STUDENT_ID'
#    studentpayload = {'selectedStudent': sid}
#    r = s.post(studenturl, data=studentpayload)
#
#transcript_from_response(r, 'test', '0')

In [None]:
def get_students_old():
    fn = '/Users/spb/The University of Nottingham/Physics_Tutors - Documents/General/Tutor_List_070220.xlsx'
    df = pd.read_excel(fn)
    df.columns = df.columns.str.strip()
    sids = df['Student Id']
    # switch to old IDs:
    #sids = np.where((sids > 10000000) & (sids < 20000000), sids - 10000000, sids)
    df['Student Id'] = sids.astype('str')
    # fix strange year = "2*" entry
    df['Year on Course'].update(df['Year on Course'].str.replace('*', ''))
    df['Year on Course'] = df['Year on Course'].astype('int')
    df = df.set_index('Student Id')
    df = df.drop_duplicates()
    df = df.sort_values('Year on Course', ascending=False)
    return df

In [None]:
def get_students():
    fn = '/Users/spb/The University of Nottingham/Physics_Tutors - Documents/General/tutee_remote_contact.xlsx'
    df = pd.read_excel(fn, skiprows=6)
    df.columns = df.columns.str.strip()
    sids = df['Student ID']
    # switch to old IDs:
    #sids = np.where((sids > 10000000) & (sids < 20000000), sids - 10000000, sids)
    df['Student ID'] = sids.astype('str')
    # fix strange year = "2*" entry
    df['Year on Course'] = df['Year'].astype('str').str.replace('*', '')
    df = df.set_index('Student ID')
    df = df.drop_duplicates()
    df = df.sort_values('Year on Course', ascending=False)
    return df

In [None]:
df = get_students()

In [None]:
if df.index.duplicated().any():
    print('Warning: there are duplicate Student IDs.')

In [None]:
dfall = df.copy()

In [None]:
# For testing full system on small sample:
df = dfall.copy()
#df = df[df['Year on Course'] == '1']
#df = df.sample(40)

In [None]:
loginpayload = {'UserName': username,
                'Password': password}

In [None]:
def create_transcript(sid, response, session):
    tutor = df.loc[sid, 'Tutor']
    year = df.loc[sid, 'Year on Course']
    student = f"{sid} {df.loc[sid, 'First name(s)']} {df.loc[sid, 'Surname']}"    
    try:
        response.raise_for_status()
        transcript_from_response(response, tutor, year, None)
        #logger.info(f"Created transcript for {student}")
        success = True
    except:
        logger.info(f"Failed for {student}")
        success = False
    return (sid, student, success)

In [None]:
def fetch_and_create_transcripts(student_ids):
    results = []
    with requests.Session() as s:
        try:
            response = s.post(loginurl, data=loginpayload, timeout=31)
        except requests.Timeout:
            logger.error('Failed to log in - timed out')
        else:
            if response.ok:
                for sid in student_ids:
                    studentpayload = {'selectedStudent': sid}
                    try:
                        response = s.post(studenturl, data=studentpayload, timeout=(3.1, 31))
                        response.raise_for_status()
                    except requests.Timeout:
                        logger.warning(f'{sid}: Request timed out')
                    except requests.HTTPError:
                        logger.warning(f'{sid}: Unsuccessful request')
                    else:
                        results.append(create_transcript(sid, response, session=s))
            else:
                logger.error('Failed to log in - error response')
    return results

In [None]:
n = 4
count = 0
subidx = np.array_split(df.index, n)
with ThreadPoolExecutor(n) as executor:
    results = executor.map(fetch_and_create_transcripts, subidx)
    for res in results:
        for sid, student, success in res:
            if success:
                count += 1
            if not success:
                status = "not found on BlueCastle"
                logger.info(f"{student}: {status}")
logger.info(f"Successfully created {count} transcripts")