In [54]:
import pandas as pd
import numpy as np
from datetime import datetime
import xlrd

hum = ['HIS 315K', 'HIS 315L','RHE 306', 'RHE 309K', 'AET 304', 'NSC 309', 'CS 302']
courseList = {"CS 302": "Computer Science",
        "RHE 306": "ENGL 1301",
        "RHE 309K": "ENGL 1302",
        "GEO 302E": "Geoscience",
        "HIS 315K": "HIST 1301",
        "HIS 315L": "HIST 1302",
        "M 305G": "Precalculus",
        "PHY 302K": "PHYS 1301",
        "PHY 302L": "PHYS 1302",
        "PHY 1403": "PHY 1403",
        "SDS 302": "Statistics",
        "AET 304": "AET",
        "CH 301": "Chemistry",
        "CH 104M": "Chemistry Lab",
        "M 301": "College Algebra",
        "NSC 309": "NSC 309"}
courseList = pd.Series(courseList)

def remove(data):
    removal = []
    for i in removal:
        data = data[data["Course Code"] != i]
    return data
    
# Naming convention to be updated
def standardize(data):
    convention = courseList
    for i in convention.index:
        data.loc[data["Course Code"] == i, "Course Title"] = convention[i]
    return data

#Load Data
def get_master(master, district = ''):
    data = pd.read_csv(master)
    data = remove(data)
    data = data[data["Eligibility Status Final Simple"] != 'HS Only']
    data = standardize(data)
    if district == '':
        return data
    return data[data["District Name"] == district]

def read_data(master, district):
    data = get_master(master, district)
    UT = data[data["Partner"] == "UT Austin OnRamps"]
    TTU = data[data["Partner"] == "TTU OnRamps"]
    return UT, TTU

def read_data_without_differentiate(master, district):
    data = get_master(master, district)
    return data

# Get Row totals
def get_row_total(data2, clean = True):
    if clean:
        data = pd.DataFrame(columns = data2.columns)
        data['Total'] = 0
        for index, row in data2.iterrows():
            rowSum = sum(data2.loc[index])
            if rowSum > 0:
                data.loc[index] = data2.loc[index]
                data.loc[index, 'Total'] = rowSum
    else:
        data = data2   
        data['Total'] = 0
        for index, row in data.iterrows():
            data.loc[index, 'Total'] = sum(data.loc[index])
    return data

# Get Column totals
def get_column_total(data2, clean = True):
    if clean:
        data = pd.DataFrame(index = data2.index)
        for i in data2.columns:
            colSum = data2[i].sum()
            if colSum > 0:
                data[i] = data2[i]
                data.loc['Total', i] = colSum
    else:
        data = data2
        data.loc['Total'] = 0
        for i in data.columns:
            data.loc['Total', i] = data[i].sum()
    return data
    
def get_Campus(data):
    return data['Campus Name'].drop_duplicates()

def get_unique_values(date, label):
    return data[label].drop_duplicates()

def get_percent(data, total = 0, column = '', column2 = None):
    order = []
    for i in data.columns:
        order.append(i)
        if total != 0:
            order.append(i + '_%')
            data[i + '_%'] = data[i]/total
        elif column != '':
            order.append(i + '_%')
            data[i + '_%'] = data[i]/data['Total'] 
        elif column2 is not None:
            order.append(i + '_%')
            data[i + '_%'] = data[i]/column2
    return data[order]

def fill_row(row, num, fill = ''):
    while len(row) < num:
        row.append(fill)
    return row

def Figure_3_2(data, district):
    data = data[data["District Name"] == district]
    data = data[data["Eligibility Status Final Simple"] == "Eligible"]
    campus = get_Campus(data)
    grade = ['Yes', 'No', 'Withdraw']
    ans = pd.DataFrame(index = pd.Index(campus), columns = grade)
    for i in grade:
        ans[i] = data[data["College Credit Earned Final"] == i]["Campus Name"].value_counts()
    ans = ans.fillna(0)
    ans.columns = ['Earned College Credit', 'Did Not Earn College Credit', 'Withdrew prior to Final Grade Determination']
    ans = ans.sort_index()
    ans = get_row_total(ans, True)
    ans = get_column_total(ans, True)
    # Don't remove 'Total'
    cols = ['Campus Name'] + ans.columns.tolist()
    ans = get_percent(ans, column2 = ans['Total'])
    return (ans, cols)

def Figure_3_2_QC(data, district_name):
    cols = data[1]
    data = data[0]
    
    template = [['', '', '', '', '', '', '', '', '', '', '', ''],
        ['', 'Figure 3.2 shows the number and percentage of eligible students on each campus who earned college credit, did not earn college credit, or withdrew from the distance college course prior to final grade determination. Students who made a D- or above in the distance college course earned college credit.', '', '', '', '', '', '', '', '', '', ''],
        ['', '', '', '', '', '', '', '', '', '', '', ''],
        ['', 'Figure 3.2: Eligible students who earned credit at campus level in the 2018-2019 academic year', '', '', '', '', '', '', '', '', '', ''],
        ['', '', '', '', '', '', '', '', '', '', '', ''],
        cols
        ]
    # -1 for disregarding campus name
    template.append(['N', '%'] * (len(cols) - 1))
    for i in data.index:
        lrow = ['', '', i]
        lrow = lrow + data.loc[i].tolist()
        lrow = lrow + ['', '']
        template.append(lrow)
    return template

def filter_str(row):
    if row is None:
        return row
    newRow = []
    for j in row:
        if isinstance(j, float):
            newRow.append(round(j, 10))
        elif j != '' and j != None:
            newRow.append(j)
    return newRow

def verify(inputFile, QCData, index):
    loc = (inputFile)
    wb = xlrd.open_workbook(loc) 
    sheet = wb.sheet_by_index(index)
    QCCompleted = 1
    for i in range(sheet.nrows):
        sheetRow = filter_str(sheet.row_values(i))
        QCRow = filter_str(QCData[i])
        if sheetRow != QCRow:
            print(inputFile + " (sheet " + str(index + 1) + ") line " + str(i) + " Error!")
            print("EOY Output:")
            print(sheetRow)
            print("QC Output:")
            print(QCRow)
            QCCompleted = 0
    return True

In [55]:
def report_generator_UT_TTU(master, district):
    
    OnRamps = standardize(read_data_without_differentiate(master, district))
    UT, TTU = read_data(master, district)

    funclist = [(UT, Figure_3_2, Figure_3_2_QC)]
    for i in funclist:
        data, func, QC= i
        return verify(district + ".xlsx", QC(func(data, district), district), 10)

master = "18-19 FAKE Master File (Final).csv"
district = pd.read_csv("DistrictName2.csv", header = None)

for i in range(len(district)):
    distrct_name = district.loc[i, 0]
    try:
        if report_generator_UT_TTU(master, distrct_name) == True:
            if i < len(district) - 1:
                district.loc[i + 1 :].to_csv("district not finished.csv", index=False)
    except FileNotFoundError as error:
        print(distrct_name)
        print(error)

Austin ISD.xlsx (sheet 11) line 7 Error!
EOY Output:
['Akins HS', 629.0, 0.9155749636, 37.0, 0.0538573508, 7.0, 0.0101892285, 687.0, 1.0]
QC Output:
['Akins HS', 629.0, 0.9346210996, 37.0, 0.0549777117, 7.0, 0.0104011887, 673.0, 1.0]
Austin ISD.xlsx (sheet 11) line 8 Error!
EOY Output:
['Total', 629.0, 0.9155749636, 37.0, 0.0538573508, 7.0, 0.0101892285, 687.0, 1.0]
QC Output:
['Anderson HS', 325.0, 0.9615384615, 13.0, 0.0384615385, 0.0, 0.0, 338.0, 1.0]
Flour Bluff ISD.xlsx (sheet 11) line 7 Error!
EOY Output:
['Flour Bluff HS', 100.0, 0.9803921569, 2.0, 0.0196078431, 102.0, 1.0]
QC Output:
['Flour Bluff HS', 79.0, 0.975308642, 2.0, 0.024691358, 81.0, 1.0]
Flour Bluff ISD.xlsx (sheet 11) line 8 Error!
EOY Output:
['Total', 100.0, 0.9803921569, 2.0, 0.0196078431, 102.0, 1.0]
QC Output:
['Total', 79.0, 0.975308642, 2.0, 0.024691358, 81.0, 1.0]
Angleton ISD.xlsx (sheet 11) line 7 Error!
EOY Output:
['Angleton HS', 2.0, 0.25, 6.0, 0.75, 8.0, 1.0]
QC Output:
['Angleton HS', 2.0, 0.28571428

In [28]:
loc = ("Austin ISD.xlsx") 
wb = xlrd.open_workbook(loc) 
sheet = wb.sheet_by_index(10) 
for i in range(sheet.nrows):
    print(sheet.row_values(i))

['', '', '', '', '', '', '', '', '', '', '', '']
['', 'Figure 3.2 shows the number and percentage of eligible students on each campus who earned college credit, did not earn college credit, or withdrew from the distance college course prior to final grade determination. Students who made a D- or above in the distance college course earned college credit.', '', '', '', '', '', '', '', '', '', '']
['', '', '', '', '', '', '', '', '', '', '', '']
['', 'Figure 3.2: Eligible students who earned credit at campus level in the 2018-2019 academic year', '', '', '', '', '', '', '', '', '', '']
['', '', '', '', '', '', '', '', '', '', '', '']
['', '', 'Campus Name', 'Earned College Credit', None, 'Did Not Earn College Credit', None, 'Withdrew prior to Final Grade Determination', None, 'Total', None, '']
['', '', None, 'N', '%', 'N', '%', 'N', '%', 'N', '%', '']
['', '', 'Akins HS', 763.0, 0.91377245508982, 44.0, 0.0526946107784431, 9.0, 0.0107784431137725, 835.0, 1.0, '']
['', '', 'Anderson HS', 