In [1]:
import pandas as pd
import numpy as np
import glob

In [2]:
#Function to create a column that combines QAN with the word QAN to help with merging
def QAN_merge_col (df,col):
    return "QAN " + str(df[col])

In [3]:
#Function to create a column that combines QAN with the word QAN to help with merging
def QAN_grade_col (df):
    return str(df['QAN Merge']) + " " + str(df['Pass Grades'])

In [13]:
#Function to create a column that combines QAN with the student grade to enable merging
def QAN_student_grade_col (df):
    return "QAN " + str(df['QAN']) + " " + str(df['Result'])

In [4]:
#Reads the discount codes
discount_codes = pd.read_excel('Discount Codes.xlsx',sheet_name='2019',skiprows=[0,1])
#Enables the removal of footer columns
discount_codes.dropna(subset=['Qualification Number'],inplace=True)
#Only need the discount code and QAN columns
discount_codes = discount_codes[['Qualification Number','2019 KS4 Discount Code']]

#Gets the performace points.
performance_points = pd.read_excel('Performance Points.xlsx',sheet_name='Underlying Data',skiprows=[0,1,2])
#Enables the removal of footer columns
performance_points.dropna(subset=['Qualification Number'],inplace=True)

In [5]:
'''Read all the Ebacc qualifcations and merge into a single dataframe'''

#All the EBacc qualifcation type (i.e tabs on the EBacc spreadsheet)
EBacc_slots = ['English','English Literature','Maths','Science','Humanities','Languages']

#Stores all the Ebacc qualifcation data
EBacc_quals = []

#Loops through each relevant tab in the EBacc qual spreadsheet
for qual in EBacc_slots:
    
    #Read the sheet in the EBacc Quals spreadsheet
    qual_data = pd.read_excel('EBacc Quals.xlsx',sheet_name=qual)
    
    #Used to remove any header, footer and blank rows
    qual_data.dropna(subset=['Unnamed: 2'],inplace=True)
    
    #Rename the columns to the correct rows in the spreadsheet
    qual_data.columns = qual_data.iloc[0].tolist()
    
    #Drop the header column
    qual_data.drop([qual_data.index[0]],inplace=True)
    
    #Creates a column with which EBacc slot the dataframe of qualifcations belong to
    qual_data['EBacc Slot'] = qual
    
    EBacc_quals.append(qual_data)

#Puts all the EBacc data into 1 dataframe
EBacc_quals = pd.concat(EBacc_quals,sort=False)

#Remove any EBacc qualifcations that do not count in 2019
EBacc_quals.dropna(subset=[2019],inplace=True)

#Only need the QAN number and which EBacc slot the qualifcation belongs to
EBacc_quals = EBacc_quals[['QN','EBacc Slot']]

In [6]:
'''A merge column is created which is the QAN followed by the number in order to enable merging
    as merging in pandas does not work well with numbers
'''
discount_codes['QAN Merge'] = discount_codes.apply(lambda x : QAN_merge_col(x,'Qualification Number'),axis=1)
performance_points['QAN Merge'] = performance_points.apply(lambda x : QAN_merge_col(x,'Qualification Number'),axis=1)
EBacc_quals['QAN Merge'] = EBacc_quals.apply(lambda x : QAN_merge_col(x,'QN'),axis=1)

In [7]:
'''Merge all the qualifcation data together into 1 dataframe'''
qualifcations = EBacc_quals.merge(performance_points,how='right',on='QAN Merge')
qualifcations = qualifcations.merge(discount_codes,how='left',on='QAN Merge')

In [8]:
'''Create a column that enable the merging of the QAN 
    and the grade in order to get the points of each qualifcation'''
qualifcations['QAN Grade'] = qualifcations.apply(lambda x : QAN_grade_col(x),axis=1)

In [15]:
#gets the list of filenames with the subject results
results_filename=glob.glob('* Results.xlsx')

#stores the results data for each school
results = []

#loops through each file, gets the results and append to the list of results
for filename in results_filename:
    
    df = pd.read_excel(filename,sheet_name='data')
    results.append(df)
    
#Merege each of the school data together into 1 dataframe
results = pd.concat(results,sort=False)

#create a column that combines QAN with the student grade to enable merging
results['QAN Grade'] = results.apply(lambda x : QAN_student_grade_col(x),axis=1)

In [17]:
#Merge the data to get the equivalent DfE points and the discount codes
results = results.merge(qualifcations,on='QAN Grade',how='left')

Unnamed: 0,School,UPN,Surname,Forename,QAN,Result Date,Result,QAN Grade,QN,EBacc Slot,QAN Merge,Qualification Number_x,Qualification Title,Qualification Type,Pass Grades,Grade Text,Pass Points,GCSE Size Equivalence 1,Qualification Number_y,2019 KS4 Discount Code
0,School 1,1,Surname 1,Surname 1,60142923,2019-07-01,4,QAN 60142923 4,60142923.0,English,QAN 60142923,60142923,AQA Level 1/Level 2 GCSE (9-1) in English Lang...,GCSE (9-1) Full Course,4,Grade 4,4.0,1.0,60142923,FK2B
1,School 1,1,Surname 1,Surname 1,60144476,2019-07-01,3,QAN 60144476 3,60144476.0,English Literature,QAN 60144476,60144476,AQA Level 1/Level 2 GCSE (9-1) in English Lite...,GCSE (9-1) Full Course,3,Grade 3,3.0,1.0,60144476,FC4
2,School 1,1,Surname 1,Surname 1,60147003,2019-07-01,3,QAN 60147003 3,60147003.0,Maths,QAN 60147003,60147003,Pearson Edexcel Level 1/Level 2 GCSE (9-1) in ...,GCSE (9-1) Full Course,3,Grade 3,3.0,1.0,60147003,RB1
3,School 1,1,Surname 1,Surname 1,60186124,2019-07-01,43,QAN 60186124 43,60186124.0,Science,QAN 60186124,60186124,Pearson Edexcel Level 1/Level 2 GCSE (9-1) in ...,GCSE (9-1) Full Course (Double Award),43,Grade 4-3 (Double Award),3.5,1.0,60186124,RA1E
4,School 1,1,Surname 1,Surname 1,60184917,2019-07-01,3,QAN 60184917 3,,,QAN 60184917,60184917,Pearson Edexcel Level 1/Level 2 GCSE (9-1) in ...,GCSE (9-1) Full Course,3,Grade 3,3.0,1.0,60184917,LC11
5,School 2,B1,Surname B1,Surname B1,60142923,2019-07-01,5,QAN 60142923 5,60142923.0,English,QAN 60142923,60142923,AQA Level 1/Level 2 GCSE (9-1) in English Lang...,GCSE (9-1) Full Course,5,Grade 5,5.0,1.0,60142923,FK2B
6,School 2,B1,Surname B1,Surname B1,60144476,2019-07-01,6,QAN 60144476 6,60144476.0,English Literature,QAN 60144476,60144476,AQA Level 1/Level 2 GCSE (9-1) in English Lite...,GCSE (9-1) Full Course,6,Grade 6,6.0,1.0,60144476,FC4
7,School 2,B1,Surname B1,Surname B1,60147003,2019-07-01,7,QAN 60147003 7,60147003.0,Maths,QAN 60147003,60147003,Pearson Edexcel Level 1/Level 2 GCSE (9-1) in ...,GCSE (9-1) Full Course,7,Grade 7,7.0,1.0,60147003,RB1
8,School 2,B1,Surname B1,Surname B1,60186124,2019-07-01,77,QAN 60186124 77,60186124.0,Science,QAN 60186124,60186124,Pearson Edexcel Level 1/Level 2 GCSE (9-1) in ...,GCSE (9-1) Full Course (Double Award),77,Grade 7-7 (Double Award),7.0,1.0,60186124,RA1E
9,School 2,B1,Surname B1,Surname B1,60184012,2019-07-01,6,QAN 60184012 6,,,QAN 60184012,60184012,AQA Level 1/Level 2 GCSE (9-1) in Religious St...,GCSE (9-1) Full Course,6,Grade 6,6.0,1.0,60184012,DD1
