In [111]:
import pandas as pd
import numpy as np
from datetime import datetime
import xlrd

hum = ['HIS 315K', 'HIS 315L','RHE 306', 'RHE 309K', 'AET 304', 'NSC 309', 'CS 302']
courseList = {"CS 302": "Computer Science",
        "RHE 306": "ENGL 1301",
        "RHE 309K": "ENGL 1302",
        "GEO 302E": "Geoscience",
        "HIS 315K": "HIST 1301",
        "HIS 315L": "HIST 1302",
        "M 305G": "Precalculus",
        "PHY 302K": "PHYS 1301",
        "PHY 302L": "PHYS 1302",
        "PHY 1403": "PHY 1403",
        "SDS 302": "Statistics",
        "AET 304": "AET",
        "CH 301": "Chemistry",
        "CH 104M": "Chemistry Lab",
        "M 301": "College Algebra",
        "NSC 309": "NSC 309"}
courseList = pd.Series(courseList)

def remove(data):
    removal = []
    for i in removal:
        data = data[data["Course Code"] != i]
    return data
    
# Naming convention to be updated
def standardize(data):
    convention = courseList
    for i in convention.index:
        data.loc[data["Course Code"] == i, "Course Title"] = convention[i]
    return data

#Load Data
def get_master(master, district = ''):
    data = pd.read_csv(master)
    data = remove(data)
    data = data[data["Eligibility Status Final Simple"] != 'HS Only']
    data = standardize(data)
    if district == '':
        return data
    return data[data["District Name"] == district]

def read_data(master, district):
    data = get_master(master, district)
    UT = data[data["Partner"] == "UT Austin OnRamps"]
    TTU = data[data["Partner"] == "TTU OnRamps"]
    return UT, TTU

def read_data_without_differentiate(master, district):
    data = get_master(master, district)
    return data

# Get Row totals
def get_row_total(data2, clean = True):
    if clean:
        data = pd.DataFrame(columns = data2.columns)
        data['Total'] = 0
        for index, row in data2.iterrows():
            rowSum = sum(data2.loc[index])
            if rowSum > 0:
                data.loc[index] = data2.loc[index]
                data.loc[index, 'Total'] = rowSum
    else:
        data = data2   
        data['Total'] = 0
        for index, row in data.iterrows():
            data.loc[index, 'Total'] = sum(data.loc[index])
    return data

# Get Column totals
def get_column_total(data2, clean = True):
    if clean:
        data = pd.DataFrame(index = data2.index)
        for i in data2.columns:
            colSum = data2[i].sum()
            if colSum > 0:
                data[i] = data2[i]
                data.loc['Total', i] = colSum
    else:
        data = data2
        data.loc['Total'] = 0
        for i in data.columns:
            data.loc['Total', i] = data[i].sum()
    return data
    
def get_Campus(data):
    return data['Campus Name'].drop_duplicates()

def get_unique_values(date, label):
    return data[label].drop_duplicates()

def get_percent(data, total = 0, column = '', column2 = None):
    order = []
    for i in data.columns:
        order.append(i)
        if total != 0:
            order.append(i + '_%')
            data[i + '_%'] = data[i]/total
        elif column != '':
            order.append(i + '_%')
            data[i + '_%'] = data[i]/data['Total']
        elif column2 is not None:
            order.append(i + '_%')
            data[i + '_%'] = data[i]/column2
    return data[order]

def fill_row(row, num, fill = ''):
    while len(row) < num:
        row.append(fill)
    return row

def Figure_2_2(data, district):
    data = data[data["District Name"] == district]
    campus = get_Campus(data)
    grade = ['Eligible: Grade-based', 'Eligible: TSI Override']
    ans = pd.DataFrame(index = pd.Index(campus), columns = grade)
    for i in grade:
        ans[i] = data[data["Eligibility Status Final"] == i]["Campus Name"].value_counts()
    ans = ans.fillna(0)
    ans.columns = ['Met Grade Criteria', 'Met TSI Criteria']
    ans = ans.sort_index()
    ans = get_row_total(ans, True)
    ans = get_column_total(ans, True)
    cols = ans.columns.tolist()[:-1]
    ans = get_percent(ans, column2 = ans['Total'])
    return (ans, cols)

def Figure_2_2_QC(data, district_name):
    
    cols = data[1]
    data = data[0]
    
    template = [['', '', '', '', '', '', '', '', '', '', ''],
        ['', 'Figure 2.2 shows the number and percentage of students who were eligible for the opportunity to earn college credit within each eligibility category.\n', '', '', '', '', '', '', '', '', ''],
        ['', '', '', '', '', '', '', '', '', '', ''],
        ['', 'Figure 2.2: Detailed student eligibility status at campus level in the 2018-2019 academic year', '', '', '', '', '', '', '', '', ''],
        ['', '', '', '', '', '', '', '', '', '', ''],
        ['', '', 'Campus Name ', 'Eligible', None, None, None, 'Total', None, '', ''],
        cols
        ]
    # Plus one for total
    template.append(['N', '%'] * (len(cols) + 1))
    for i in data.index:
        lrow = ['', '', i]
        lrow = lrow + data.loc[i].tolist()
        lrow = lrow + ['', '']
        template.append(lrow)
    return template

def filter_str(row):
    if row is None:
        return row
    newRow = []
    for j in row:
        if isinstance(j, float):
            newRow.append(round(j, 10))
        elif j != '' and j != None:
            newRow.append(j)
    return newRow

def verify(inputFile, QCData, index):
    loc = (inputFile)
    wb = xlrd.open_workbook(loc) 
    sheet = wb.sheet_by_index(index)
    QCCompleted = 1
    for i in range(sheet.nrows):
        sheetRow = filter_str(sheet.row_values(i))
        QCRow = filter_str(QCData[i])
        if sheetRow != QCRow:
            print(inputFile + " (sheet " + str(index + 1) + ") line " + str(i) + " Error!")
            print("EOY Output:")
            print(sheetRow)
            print("QC Output:")
            print(QCRow)
            QCCompleted = 0
    return True

In [112]:
def report_generator_UT_TTU(master, district):
    
    OnRamps = standardize(read_data_without_differentiate(master, district))
    UT, TTU = read_data(master, district)

    funclist = [(UT, Figure_2_2, Figure_2_2_QC)]
    for i in funclist:
        data, func, QC= i
        return verify(district + ".xlsx", QC(func(data, district), district), 8)

master = "18-19 FAKE Master File (Final).csv"
district = pd.read_csv("DistrictName2.csv", header = None)

for i in range(len(district)):
    distrct_name = district.loc[i, 0]
    try:
        if report_generator_UT_TTU(master, distrct_name) == True:
            if i < len(district) - 1:
                district.loc[i + 1 :].to_csv("district not finished.csv", index=False)
    except FileNotFoundError as error:
        print(distrct_name)
        print(error)

In [110]:
loc = ("Austin ISD.xlsx") 
wb = xlrd.open_workbook(loc) 
sheet = wb.sheet_by_index(8) 
for i in range(sheet.nrows):
    print(sheet.row_values(i))

['', '', '', '', '', '', '', '', '', '', '']
['', 'Figure 2.2 shows the number and percentage of students who were eligible for the opportunity to earn college credit within each eligibility category.\n', '', '', '', '', '', '', '', '', '']
['', '', '', '', '', '', '', '', '', '', '']
['', 'Figure 2.2: Detailed student eligibility status at campus level in the 2018-2019 academic year', '', '', '', '', '', '', '', '', '']
['', '', '', '', '', '', '', '', '', '', '']
['', '', 'Campus Name ', 'Eligible', None, None, None, 'Total', None, '', '']
['', '', None, 'Met Grade Criteria', None, 'Met TSI Criteria', None, None, None, '', '']
['', '', '', 'N', '%', 'N', '%', 'N', '%', '', '']
['', '', 'Akins HS', 664.0, 0.986627043090639, 9.0, 0.0133729569093611, 673.0, 1.0, '', '']
['', '', 'Anderson HS', 338.0, 1.0, 0.0, 0.0, 338.0, 1.0, '', '']
['', '', 'Ann Richards School', 65.0, 1.0, 0.0, 0.0, 65.0, 1.0, '', '']
['', '', 'Austin HS', 632.0, 0.998420221169036, 1.0, 0.00157977883096367, 633.0, 1