# Lecture 33

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/UTA-DataScience/DATA1401.2020.Fall/blob/master/Lectures/Lecture.33/Lecture.33.ipynb)

From previous lecture:

In [None]:
# Import libraries we will use
import pandas as pd
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plt
from collections import OrderedDict
import math

In [None]:
# Create some virtual classes
class base:
    __name=""
    
    def __init__(self,name):
        self.__name=name

    def name(self):
        return self.__name

class data(base):
    def __init__(self,name):
        base.__init__(self,name)
        
class alg(base):
    def __init__(self,name):
        base.__init__(self,name)


## Data Classes

In [None]:
class grade(data):
    __letter_grades=["F-","F","F+","D-","D","D+","C-","C","C+","B-","B","B+","A-","A","A+"]
    
    def __init__(self,name,numerical=True,value=None):
        self.__value=value
        self.__numerical=numerical
        self.__gradebook_name=str()
        
        if value:
            if isinstance(value,(int,float)):
                self.__numerical=True
            elif isinstance(value,str):
                self.__numerical=False
            self.set(value)
        else:            
            self.__numerical=numerical
        self.__gradebook_name=name
        data.__init__(self,name+" Grade Data Object")        

    def set(self,value):
        if isinstance(value,(int,float)) and self.__numerical:
            self.__value=value
        elif isinstance(value,str) and not self.__numerical:
            if value in self.__letter_grades:
                self.__value=value
        else:
            print( self.name()+" Error: Bad Grade.")
            raise Exception
    
    def value(self):
        return self.__value
    
    def numerical(self):
        return self.__numerical
    
    def gradebook_name(self):
        return self.__gradebook_name
    
    def __str__(self):
        return self.__gradebook_name+": "+str(self.__value)


In [None]:
class student(data):
    def __init__(self, first_name, last_name, id_number):
        self.__grades=dict()
        self.__id_number=id_number
        data.__init__(self,first_name+" "+last_name+" Student Data")

    def add_grade(self,a_grade,overwrite=False):
        if overwrite or not a_grade.gradebook_name() in self.__grades:
            self.__grades[a_grade.gradebook_name()]=a_grade
        else:
            print (self.name()+" Error Adding Grade "+a_grade.name()+". Grade already exists.")
            raise Exception

    def id_number(self):
        return self.__id_number
    
    def grade_names(self):
        return self.__grades.keys()
    
    def grades(self):
        return self.__grades
    
    def __getitem__(self,key):
        return self.__grades[key]
    
    def print_grades(self):
        for grade in self.__grades:
            print (self.__grades[grade])

In [None]:
class grade_book(data):
    # New member class to hold arbitrary data associated with the class 
    def __init__(self,name):
        data.__init__(self,name+" Course Grade Book")
        self.__students=dict()
        self.__data=dict()
        
    # New method to access data
    def __getitem__(self,key):
        return self.__data[key]
            
    # New method to add data
    def __setitem__(self, key, value):
        self.__data[key] = value
        
    def add_student(self,a_student):
        self.__students[a_student.id_number()]=a_student

    def get_students(self):
        return self.__students
    
    def assign_grade(self,key,a_grade):
        the_student=None
        try:
            the_student=self.__students[key]
        except:
            for id in self.__students:
                if key == self.__students[id].name():
                    the_student=self.__students[id]
                    break
        if the_student:
            the_student.add_grade(a_grade)
        else:
            print (self.name()+" Error: Did not find student.")
    

    # Accessors
    def data(self):
        return self.__data

    def students(self):
        return self.__students
    
            
    def get_data(self,key=None):
        a_data=dict()
        for k,v in self.__data.items():
            if key:
                if key in k:
                    a_data[k]=v
            else:
                a_data[k]=v

        return a_data
    
    # Print functions
    def print_data(self):
        for k,v in self.__data.items():
            print (k,":",v)
 
    def print_grades(self,grade_name):
        if isinstance(grade_name,str):
            grade_names=list()
            grade_names.append(grade_name)
        else:
            grade_names=grade_name
                      
        for k,a_student in self.__students.items():
            print (a_student.name(),end="")
            for a_grade_name in grade_names:
                print (a_student[a_grade_name],end="")
            print()
            
    def print_students(self):    
        for k,a_student in self.__students.items():
            print (k, a_student.name())
            a_student.print_grades()
            print ("_______________________________________")
            
            
    def apply_calculator(self,a_calculator,**kwargs):
        a_calculator.apply(self,**kwargs)




## Building a Gradebook

In [None]:
# Read Data into a Pandas DataFrame
df = pd.read_csv("../Lecture.32/Data-1401-Grades-Fixed.csv")

In [None]:
# Create mask to keep only lines with numbers
mask=list()
for i in range(16):
    mask.append(True)
    mask.append(False)
    
# Apply mask and remove NaNs
df_0=df[mask].fillna(0)

# Fix Exam 1 entries
df_0["Exam 1 Fixed"] = list(map(lambda x: int(x.split("-")[0]) ,df_0["Exam 1"].tolist()))

def build_grade_book(df_0):
    a_grade_book=grade_book("Data 1401")

    for student_i in range(df_0.shape[0]):
        a_student_0=student("Student",str(student_i),student_i)

        # Add data
        for k in df_0.keys():
            try:
                a_student_0.add_grade(grade(k,value=float(df_0[k].tolist()[student_i])))
            except:
                a_student_0.add_grade(grade(k,value=str(df_0[k].tolist()[student_i])))


        a_grade_book.add_student(a_student_0)

    return a_grade_book
        


In [None]:
a_grade_book=build_grade_book(df_0)

## Algorithm Classes
### Migrating Algorithms

We have redesigned the calculator part of our Grade Book framework. We now have to migrate the existing calculator algorithms to work with this redesign. The main thing to notice is that previously the `apply_xxx` methods

* found the data they needed in the grade book
* looped over that data
* applied the algorithm
* placed the results back into grade book

here are the methods of the old implementation:


In [None]:
    def apply_summary(self,a_grader):
        for k,a_student in self.__students.items():
            a_student.add_grade(a_grader.apply(a_student))
    
    def apply_grader(self,a_grader,grade_name):
        for k,a_student in self.__students.items():
            a_student.add_grade(a_grader.apply(a_student[grade_name]))
            
    def apply_stats(self,a_stat_comp,grade_name):
        grades=list()
        for k,a_student in self.__students.items():
            grades.append(a_student[grade_name].value())
        return a_stat_comp.apply(grades)

which were replaced with:

In [None]:
   def apply_calculator(self,a_calculator,**kwargs):
        a_calculator.apply(self,**kwargs)

Therefore, what ever was in the different `apply_xxx` functions of the old `grade_book` implementation have to be moved into the `apply` function of the calculators. 

For example compare the apply methods of the old and new implementation of this `grader` algorithm: 

In [None]:
    # Old implementation
    def apply(self,a_grade):
        if not isinstance(a_grade,grade):
            print (self.name()+ " Error: Did not get an proper grade as input.")
            raise Exception
        if not a_grade.numerical():
            print (self.name()+ " Error: Did not get a numerical grade as input.")
            raise Exception
    
        # Rescale the grade
        percent=a_grade.value()/self.__max_grade
        shift_to_zero=percent-(self.__mean/self.__max_grade)
        scale_std=0.1*shift_to_zero/(self.__std/self.__max_grade)
        scaled_percent=scale_std+0.8
        
        for i,v in enumerate(self.__grades_definition):
            if scaled_percent>=v[0]:
                break
                            
        return grade(self.__grade_name,value=self.__grades_definition[i][1])

In [None]:
    # migrated implementation
    def apply(self,a_grade_book,grade_name=None,**kwargs):
        if grade_name:
            pass
        else:
            grade_name=self.__grade_name
            
        for k,a_student in a_grade_book.get_students().items():
            a_grade=a_student[grade_name]

            if not a_grade.numerical():
                print (self.name()+ " Error: Did not get a numerical grade as input.")
                raise Exception
    
            percent=a_grade.value()/self.__max_grade
        
            for i,v in enumerate(self.__grades_definition):
                if percent>=v[0]:
                    break
                            
            a_student.add_grade(grade(grade_name+" Letter",value=self.__grades_definition[i][1]))
            

Let's test with our class data:

In [None]:
class calculator(alg):    
    def __init__(self,name):
        alg.__init__(self,name)

    def apply(self,a_grade_book):
        raise NotImplementedError

class uncurved_letter_grade_percent(calculator):
    __grades_definition=[ (.97,"A+"),
                          (.93,"A"),
                          (.9,"A-"),
                          (.87,"B+"),
                          (.83,"B"),
                          (.8,"B-"),
                          (.77,"C+"),
                          (.73,"C"),
                          (.7,"C-"),
                          (.67,"C+"),
                          (.63,"C"),
                          (.6,"C-"),
                          (.57,"F+"),
                          (.53,"F"),
                          (0.,"F-")]
    __max_grade=100.
    __grade_name=str()
    
    def __init__(self,grade_name,max_grade=100.):
        self.__max_grade=max_grade
        self.__grade_name=grade_name
        calculator.__init__(self,
                                  "Uncurved Percent Based Grade Calculator "+self.__grade_name+" Max="+str(self.__max_grade))
        
    def apply(self,a_grade_book,grade_name=None,**kwargs):
        if grade_name:
            pass
        else:
            grade_name=self.__grade_name
            
  
        for k,a_student in a_grade_book.get_students().items():
            a_grade=a_student[grade_name]

            if not a_grade.numerical():
                print (self.name()+ " Error: Did not get a numerical grade as input.")
                raise Exception
    
            percent=a_grade.value()/self.__max_grade
        
            for i,v in enumerate(self.__grades_definition):
                if percent>=v[0]:
                    break
                            
            a_student.add_grade(grade(grade_name+" Letter",value=self.__grades_definition[i][1]))
            

In [None]:
a_grade_book.apply_calculator(uncurved_letter_grade_percent("Lab 2",max_grade=100))
# for k,a_student in a_grade_book.get_students().items():
#     print (a_student.id_number(),a_student["Lab 2"],a_student["Lab 2 Letter"])

In [None]:
a_grade_book.print_students()

### Examples of other types of calculators

In [None]:
class mean_std_calculator(calculator):
    def __init__(self,grade_name,cut_off=None):
        self.__grade_name=grade_name
        self.__cut_off=cut_off
        calculator.__init__(self,"Mean and Standard Deviation Calculator")
        
    def apply(self,a_grade_book,grade_name=None,cut_off=None,**kwargs):
        if grade_name:
            pass
        else:
            grade_name=self.__grade_name
            
        if cut_off:
            pass
        else:
            cut_off=self.__cut_off
                    
        grades=list()
        for k,a_student in a_grade_book.get_students().items():
            a_grade_val=a_student[grade_name].value()
            if cut_off:
                if a_grade_val>cut_off:
                    grades.append(a_student[grade_name].value())
            else:
                grades.append(a_student[grade_name].value())
        
        a_grade_book[grade_name+" Mean"] = np.mean(grades)
        a_grade_book[grade_name+" STD"] = math.sqrt(np.var(grades))
        a_grade_book[grade_name+" Max"] = max(grades)
        a_grade_book[grade_name+" Min"] = min(grades)

In [None]:
class grade_summer(calculator):
    def __init__(self,prefix,n=None):
        self.__prefix=prefix
        self.__n=n
        calculator.__init__(self,"Sum Grades")
        
    def apply(self,a_gradebook,**kwargs):
        first=True
        
        for k,a_student in a_grade_book.get_students().items():
            if first:
                first=False                
                if self.__n:
                    labels=[self.__prefix+str(x) for x in range(1,self.__n)]
                else:
                    labels=list()
                    for i in range(1,1000):
                        label=self.__prefix+str(i)
                        try:
                            a_grade=a_student[label]
                            labels.append(label)
                        except:
                            break                

            grade_sum=0.
            for label in labels:
                grade_sum+=a_student[label].value()

            a_student.add_grade(grade(self.__prefix+"sum",value=grade_sum),**kwargs)

### Migration Exercise

Migrate the following algorithm from old to new calculator implementation. Use the example above to guide you. You will have to:

* Change the arguments of the apply function
* Insert loop over students (copy it from the analgous place above)
* Instead of returning the result, update the data within the grade book.

In [None]:
class grade_calculator(alg):    
    def __init__(self,name, stats):
        self.__stats=stats
        alg.__init__(self,name)

    def apply(self,a_grade):
        raise NotImplementedError
        # Returns a grade
            
class curved_letter_grade(grade_calculator):
    __grades_definition=[ (.97,"A+"),
                          (.93,"A"),
                          (.9,"A-"),
                          (.87,"B+"),
                          (.83,"B"),
                          (.8,"B-"),
                          (.77,"C+"),
                          (.73,"C"),
                          (.7,"C-"),
                          (.67,"C+"),
                          (.63,"C"),
                          (.6,"C-"),
                          (.57,"F+"),
                          (.53,"F"),
                          (0.,"F-")]
    __max_grade=100.
    __grade_name=str()
    
    def __init__(self,grade_name,mean,std,max_grade=100.):
        self.__max_grade=max_grade
        self.__mean=mean
        self.__std=std
        self.__grade_name=grade_name
        grade_calculator.__init__(self,
                                  "Curved Percent Based Grade Calculator "+self.__grade_name+ \
                                  " Mean="+str(self.__mean)+\
                                  " STD="+str(self.__std)+\
                                  " Max="+str(self.__max_grade))
        

    def apply(self,a_grade):
        if not isinstance(a_grade,grade):
            print (self.name()+ " Error: Did not get an proper grade as input.")
            raise Exception
        if not a_grade.numerical():
            print (self.name()+ " Error: Did not get a numerical grade as input.")
            raise Exception
    
        # Rescale the grade
        percent=a_grade.value()/self.__max_grade
        shift_to_zero=percent-(self.__mean/self.__max_grade)
        scale_std=0.1*shift_to_zero/(self.__std/self.__max_grade)
        scaled_percent=scale_std+0.8
        
        for i,v in enumerate(self.__grades_definition):
            if scaled_percent>=v[0]:
                break
                            
        return grade(self.__grade_name,value=self.__grades_definition[i][1])
            

In [None]:
# Solution here
            
class curved_letter_grade(calculator):
    __grades_definition=[ (.97,"A+"),
                          (.93,"A"),
                          (.9,"A-"),
                          (.87,"B+"),
                          (.83,"B"),
                          (.8,"B-"),
                          (.77,"C+"),
                          (.73,"C"),
                          (.7,"C-"),
                          (.67,"C+"),
                          (.63,"C"),
                          (.6,"C-"),
                          (.57,"F+"),
                          (.53,"F"),
                          (0.,"F-")]
    __max_grade=100.
    __grade_name=str()
    
    def __init__(self,grade_name,mean,std,max_grade=100.):
        self.__max_grade=max_grade
        self.__mean=mean
        self.__std=std
        self.__grade_name=grade_name
        calculator.__init__(self,
                            "Curved Percent Based Grade Calculator "+self.__grade_name+ \
                             " Mean="+str(self.__mean)+\
                             " STD="+str(self.__std)+\
                             " Max="+str(self.__max_grade))
        

    def apply(self,a_grade_book,grade_name=None,
              overwrite=False,**kwargs):
        if grade_name:
            pass
        else:
            grade_name=self.__grade_name
            
        for k,a_student in a_grade_book.get_students().items():
            a_grade=a_student[grade_name]
        
            if not isinstance(a_grade,grade):
                print (self.name()+ " Error: Did not get an proper grade as input.")
                raise Exception
            if not a_grade.numerical():
                print (self.name()+ " Error: Did not get a numerical grade as input.")
                raise Exception

            # Rescale the grade
            percent=a_grade.value()/self.__max_grade
            shift_to_zero=percent-(self.__mean/self.__max_grade)
            scale_std=0.1*shift_to_zero/(self.__std/self.__max_grade)
            scaled_percent=scale_std+0.8

            for i,v in enumerate(self.__grades_definition):
                if scaled_percent>=v[0]:
                    break
                    
            #a_student.add_grade(grade(grade_name+" Letter",value=self.__grades_definition[i][1]))
                   
            a_student.add_grade(grade(self.__grade_name+" Letter",value=self.__grades_definition[i][1]),
                                overwrite=overwrite)



In [None]:
a_grade_book.apply_calculator(mean_std_calculator(grade_name="Lab 3"))
a_grade_book.apply_calculator(curved_letter_grade("Lab 3",
                                                  a_grade_book["Lab 3 Mean"],
                                                  a_grade_book["Lab 3 STD"]),
                                                  overwrite=True)

In [None]:
a_grade_book.print_students()

## Running a sequence of algorithms



In [None]:
algs=[# Sum the lab grades
    lambda:
      grade_summer("Lab ",n=5),  
    
      # Calculate the stats -> determine cut off
    lambda:
      mean_std_calculator("Lab sum",0.),
    
      # Calculate the stats with cut off
      lambda :
          mean_std_calculator("Lab sum",a_grade_book["Lab sum Max"]/2.),
    
      # Curve using new stats
      lambda :
      curved_letter_grade("Lab sum",
                          a_grade_book["Lab sum Mean"],
                          a_grade_book["Lab sum STD"]) ]

In [None]:
list(map(lambda x: a_grade_book.apply_calculator(x(),overwrite=True), algs))

### More Exercises

1. Write an algorithm that removes grades that are less than a value (default 50% of max possible grade), and distributes grades as follows:
    * 16% A
    * 34% B
    * 34% C
    * 16% D

1. Write new algorithms:
    1. Write an algorithm that uses the grade boundries to assign grades, including +/-. 
    1. Write an algorithm that sums up a provided list of grades, dropping the lowest $n$.
    1. Write an algorithm that histograms letter grades, validating that it follows the bell curve distribution.

    For each:
        * Determine what type of algorithm.
        * Copy paste analogous algorithm.
        * Change name.
        * Remove code.
        * Add new code.
            * Sketch out how to implement the appropriate logic.
            * Perhaps work out specific pieces in another cell.
            * Implement.
        * Test.
             

    

In [None]:
class forced_curve(calculator):
    def __init__(self,grade_name,cut_off=None):
        self.__grade_name=grade_name
        self.__cut_off=cut_off
        calculator.__init__(self,"Forced Curve Boundry Statistical Calculator")
        
    def apply(self,a_grade_book,grade_name=None,cut_off=None,**kwargs):
        if grade_name:
            pass
        else:
            grade_name=self.__grade_name
            
        if cut_off:
            pass
        else:
            cut_off=self.__cut_off
                    
        grades=list()
        for k,a_student in a_grade_book.get_students().items():
            a_grade_val=a_student[grade_name].value()
            if cut_off:
                if a_grade_val>cut_off:
                    grades.append(a_student[grade_name].value())
            else:
                grades.append(a_student[grade_name].value())
                
        # Possible Algorithm Logic
        # Sort the grades
        # Top 16% -> A
        # How many is 16%? Count all students (after min grade cut) -> .16 * n

        grades = sorted(grades)[::-1]
        n_students = len(grades)
        
        n_16 = int(0.16 * float(n_students))
        n_34 = int(0.34 * float(n_students))
        
        boundries= dict()
        boundries["A"]=grades[n_16-1]
        boundries["B"]=grades[n_16+n_34-1]
        boundries["C"]=grades[n_16+n_34+n_34-1]
        boundries["D"]=grades[min(n_students-1,n_16+n_34+n_34+n_16-1)]
        
        a_grade_book[grade_name+" Boundries"] = boundries


In [None]:
a_grade_book.apply_calculator(forced_curve(grade_name="Exam 1"))



In [None]:
a_grade_book.print_data()