In [19]:
from functions import *

# Assessments

The assessments dataframe contains information about the unique assessments in each code module and presentation.

In [2]:
assessments.head()

Unnamed: 0,code_module,code_presentation,id_assessment,assessment_type,date,weight
0,AAA,2013J,1752,TMA,19.0,10.0
1,AAA,2013J,1753,TMA,54.0,20.0
2,AAA,2013J,1754,TMA,117.0,20.0
3,AAA,2013J,1755,TMA,166.0,20.0
4,AAA,2013J,1756,TMA,215.0,30.0


---

## Assessments Contents

* **code_module**: The code module represents the code name of the course the assessment was held for.
* **code_presentation**: The presentation represents the presentation which the test was held for.
* **id_assessment**: The assessment ID is the unique identifier for each assessment.
* **assessment_type**: The assessment type represents the kind of assessment it was.
    - There are three assessment types:
        * TMA: Tutor Marked Assessment
        * CMA: Computer Marked Assessment
        * Exam: The Final Exam
* **date**: The date is how many days from the start of the course the assessment took place
* **weight**: The weight is the weighted value of the assessment. Exams should have a weight of 100 which the rest of the assessments should add to 100 in total.

---

## Assessments Information

**Size**

In [3]:
get_size(assessments)

Unnamed: 0,Count
Columns,6
Rows,206


In [4]:
md(f'''
Assessments has {len(assessments.columns)} columns and {len(assessments)} rows representing unique exams.
''')


Assessments has 6 columns and 206 rows representing unique exams.


**Data Types**

In [5]:
get_dtypes(assessments)

index,Type
code_module,object
code_presentation,object
id_assessment,int64
assessment_type,object
date,float64
weight,float64


* `id_assessments` are is a categorical value and so should be converted to `string`
* `object` types should be converted to strings
* Both of the `float64` typed variables are whole numbers and should be converted to `int64`

In [16]:
# converting the data types
assessments['id_assessment'] = assessments['id_assessment'].astype(str)
assessments = assessments.convert_dtypes(convert_integer=False)

**Null Values**

In [7]:
# prints the sum of a columns null value
null_vals(assessments)

index,Null Values
code_module,0
code_presentation,0
id_assessment,0
assessment_type,0
date,11
weight,0


In [8]:
md(f'''
* We have {assessments['date'].isnull().sum()} null data points for assessment date. 
* The documentation of this dataset states that if the exam date is missing then it is as the end of the last presentation week. 
* We can find this information in the courses dataframe, and add them in to get rid of the NaNs.
''')


* We have 11 null data points for assessment date. 
* The documentation of this dataset states that if the exam date is missing then it is as the end of the last presentation week. 
* We can find this information in the courses dataframe, and add them in to get rid of the NaNs.


In [9]:
# adding the dates for the null test dates
for index, row in assessments[assessments['date'].isna()].iterrows():
    assessments.at[index, 'date'] = courses.loc[(courses['code_module'] == row['code_module']) & (courses['code_presentation'] == row['code_presentation']), 'module_presentation_length']

# reprinting to ensure it worked
dataframe(assessments.isnull().sum(), columns=['Null Values'])

Unnamed: 0,Null Values
code_module,0
code_presentation,0
id_assessment,0
assessment_type,0
date,0
weight,0


**Unique Counts**

In [10]:
count_unique(assessments)

index,Count
code_module,7
code_presentation,4
id_assessment,206
assessment_type,3
date,78
weight,24


In [11]:
md(f'''
There are {assessments['id_assessment'].nunique()} unique assessment ID's
''')


There are 206 unique assessment ID's


**Unique Categorical Values**

In [12]:
unique_vals(assessments)

index,Values
code_module,"['AAA', 'BBB', 'CCC', 'DDD', 'EEE', 'FFF', 'GGG']"
code_presentation,"['2013J', '2014J', '2013B', '2014B']"
assessment_type,"['TMA', 'Exam', 'CMA']"


Everything here is as we would expect in the data's description

**Duplicate Values:**

In [13]:
get_dupes(assessments)

There are no Duplicate Values

**Numerical Values**

In [17]:
assessments.describe().round(2)

Unnamed: 0,date,weight
count,206.0,206.0
mean,150.97,20.87
std,78.16,30.38
min,12.0,0.0
25%,81.25,0.0
50%,159.0,12.5
75%,227.0,24.25
max,269.0,100.0
