In [24]:
import pandas as pd
import numpy as np
from datetime import datetime
import xlrd

def remove(data):
    # Add course - whatever you need to remove
    removal = ["HIS 315K", "CH 104M", "RHE 306"]
    for i in removal:
        data = data[data["Course Code"] != i]
    return data
    
# Naming convention to be updated
def standardize(data):
    #Naming Convention
    convention = {"CS 302": "Computer Science",
        "RHE 306": "English Language Arts (ENGL 1301)",
        "RHE 309K": "English Language Arts",
        "GEO 302E": "Geoscience",
        "HIS 315K": "US History (HIST 1301)",
        "HIS 315L": "US History",
        "M 305G": "Precalculus (MATH 2312)",
        "MATH 1550": "MATH 1550",
        "MATH 2300": "MATH 1550",
        "PHY 302K": "Physics (PHYS 1301)",
        "PHY 302L": "Physics (PHYS 1302)",
        "PHY 1403": "PHY 1403",
        "SDS 302": "Statistics",
        "AET 304": "Arts & Entertainment Technologies (AET)",
        "CH 301": "Chemistry",
        "CH 104M": "Chemistry Lab",
        "M 301": "College Algebra"}
    convention = pd.Series(convention)
    
    for i in convention.index:
        data.loc[data["Course Code"] == i, "Course Title"] = convention[i]
    return data

#Load Data
def get_master(master, district = ''):
    data = pd.read_csv(master)
    data = remove(data)
    data = data[data["Eligibility Status Final Simple"] != 'HS Only']
    data = standardize(data)
    if district == '':
        return data
    return data[data["District Name"] == district]

def read_data(master, district):
    data = get_master(master, district)
    UT = data[data["Partner"] == "UT Austin OnRamps"]
    TTU = data[data["Partner"] == "TTU OnRamps"]
    return UT, TTU

def read_data_without_differentiate(master, district):
    data = get_master(master, district)
    return data

# Get Row totals
def get_row_total(data):
    data['Total'] = 0
    for index, row in data.iterrows():
        data.loc[index, 'Total'] = sum(data.loc[index])
    return data

# Get Column totals
def get_column_total(data, exclude = ""):
    data.loc['Total'] = 0
    for i in data.columns:
        data.loc['Total', i] = data[i].sum()
    return data

#Figure 1.1
def get_course(data, label = []):
    if label == '':
        course = data["Course Code"]
    else:
#         print(label)
        course = data[label]
    return course.drop_duplicates()

def Figure_1_2(data):
    ans = pd.DataFrame()
    ans["Instructor"]= get_course(data, ['Course Title', 'Instructor EID'])["Course Title"].value_counts().astype("int32")
    ans["Section"] = get_course(data, ['Course Title', 'College Section Name'])["Course Title"].value_counts().astype("int32")
    
    total = get_column_total(ans.copy(deep = True))
    total["Course Name"] = total.index
    total = total[["Course Name", "Instructor", "Section"]]
    total.columns = ["Course Name", "Number of Instructors", "Number of Course Sections Offered"]

    ans["Course Name"] = ans.index
    ans = ans[["Course Name", "Instructor", "Section"]]
    ans.columns = ["Course Name", "Number of Instructors", "Number of Course Sections Offered"]
    ans = ans.sort_values("Course Name")
    ans.loc['Total'] = 0
    for i in ans.columns:
        ans.loc['Total', i] = total.loc['Total', i]
    return ans

def Figure_1_2_QC(data, district_name):
    template = [['', '', '', '', '', ''],
        ['', f'Figure 1.2 shows the number of OnRamps high school instructors and sections by course in {district_name}.\n\nFigure 1.2: Instructor enrollment at district level in the 2018-2019 academic year', '', '', '', ''],
        ['', '', '', '', '', ''],
        ['', '', 'Course Name', 'Number of Instructors', 'Number of Course Sections Offered', '']
        ]
    for i in range(len(data)):
        lrow = ['', '']
        for j in range(len(data.columns)):
            lrow.append(data.iloc[i,j])
        lrow.append('')
        template.append(lrow)
        print (template)
    return template

def verify(inputFile, QCData, index):
    loc = (inputFile)
    wb = xlrd.open_workbook(loc) 
    sheet = wb.sheet_by_index(index)
    QCCompleted = 1
    for i in range(sheet.nrows):
        if sheet.row_values(i) != QCData[i]:
            print(inputFile + " sheet " + str(index) + " " + str(i) + " Error!")
            print("EOY Output:")
            print(sheet.row_values(i))
            print("QC Output:")
            print(QCData[i])
            QCCompleted = 0
    return True

In [25]:
def report_generator_UT_TTU(master, district):
    
    OnRamps = standardize(read_data_without_differentiate(master, district))
    UT, TTU = read_data(master, district)

    funclist = [(Figure_1_2,UT, Figure_1_2_QC),               ]
    for i in funclist:
        func, data,QC= i
#         print ("func:",func)
        return verify(district + ".xlsx", QC(func(data), district), 2)

In [26]:
master = "MFEOY1819.csv"
district = pd.read_csv("district not finished.csv", header = None)
report_generator_UT_TTU(master, "Mineral Wells ISD")

# for i in range(len(district)):
#     distrct_name = district.loc[i, 0]
#     try:
#         if report_generator_UT_TTU(master, distrct_name) == True:
#             print('passed if'+master+distrct_name)
#             if i < len(district) - 1:
#                 district.loc[i + 1 :].to_csv("district not finished.csv", index=False)
#     except FileNotFoundError as error:
#         print(distrct_name)
#         print(error)

[['', '', '', '', '', ''], ['', 'Figure 1.2 shows the number of OnRamps high school instructors and sections by course in Mineral Wells ISD.\n\nFigure 1.2: Instructor enrollment at district level in the 2018-2019 academic year', '', '', '', ''], ['', '', '', '', '', ''], ['', '', 'Course Name', 'Number of Instructors', 'Number of Course Sections Offered', ''], ['', '', 'Total', 0, 0, '']]
Mineral Wells ISD.xlsx sheet 2 4 Error!
EOY Output:
['', '', 'Physics (PHYS 1301)', 1.0, 2.0, '']
QC Output:
['', '', 'Total', 0, 0, '']


IndexError: list index out of range