In [30]:
import pandas as pd
import numpy as np
from datetime import datetime

stem = ['Chemistry Lab',
        "Chemistry",
        'College Algebra',
        'Geoscience',
        'PHYS 1301',
        'PHYS 1302',
        'Precalculus',
        'Statistics',
        'TTU Physics I',
        'TTU Physics II',
        'TTU Precalculus',
        'TTU Geoscience',
        'TTU Statistics'
       ]

def remove(data):
    removal = []
    for i in removal:
        data = data[data["Course Code"] != i]
    return data
    
# Naming convention to be updated
def standardize(data):
    convention = {"CS 302": "Computer Science",
        "RHE 306": "English Language Arts (ENGL 1301)",
        "RHE 309K": "English Language Arts (ENGL 1302)",
        "GEO 302E": "Geoscience",
        "HIS 315K": "US History (HIST 1301)",
        "HIS 315L": "US History (HIST 1302)",
        "M 305G": "Precalculus (MATH 2312)",
        "MATH 1550": "MATH 1550",
        "MATH 2300": "MATH 1550",
        "PHY 302K": "Physics (PHYS 1301)",
        "PHY 302L": "Physics (PHYS 1302)",
        "PHY 1403": "PHY 1403",
        "SDS 302": "Statistics",
        "AET 304": "Arts & Entertainment Technologies (AET)",
        "CH 301": "Chemistry",
        "CH 104M": "Chemistry Lab",
        "M 301": "College Algebra"}
    convention = pd.Series(convention)
    Course_Name_Complete = {"Thriving in our Digital World": "Thriving in our Digital World",
        "Intro to Rhetoric": "Introduction to Rhetoric: Reading, Writing, and Research",
        "Earth, Wind, and Fire": "Earth, Wind, and Fire: An Introduction to Geoscience",
        "United States, 1492-1865": "The United States: 1492-1865",
        "United States Since 1865": "The United States Since 1865",
        "Discovery Precalculus": "Discovery Precalculus: A Creative and Connected Approach",
        "Mechanics, Heat, and Sound": "Mechanics, Heat, and Sound",
        "EM, Optics, & Nuclear Physics": "Electromagnetism, Optics, and Nuclear Physics",
        "Rhe of Amer Iden": "Reading and Writing the Rhetoric of American Identity",
        "OnRamps Statistics": "OnRamps Statistics",
        "Arts & Entertainment":"Pixels, Samples, Lumens, Illusion: Foundations of Arts & Entertainment Technologies",
        "College Algebra":"College Algebra",
        "Intro to Chem Practices (Lab)":"Introduction to Chemistry Practices I",
        "Principles of Chem (Lecture)":"Principles of Chemistry I"}
    Course_Name_Complete = pd.Series(Course_Name_Complete)
    data["Course Title Complete"] = data["Course Title"]
    for i in Course_Name_Complete.index:
        data.loc[data["Course Title"] == i, "Course Title Complete"] = Course_Name_Complete[i]
    for i in convention.index:
        data.loc[data["Course Code"] == i, "Course Title"] = convention[i]
    return data

def fix_dates(data):
    for index, row in data.iterrows():
        if row["Eligibility Status Final Simple"] != "Withdraw":
            continue
        else:
            month = datetime.strptime(row["Withdraw Date"], '%m/%d/%y').strftime('%m')
            date = int(datetime.strptime(row["Withdraw Date"], '%m/%d/%y').strftime('%d'))
            if (month == '01' and date > 11) or month == '02':
                data.loc[index, "Eligibility Status Final Simple"] = 'Ineligible'
            elif (row["Term"] == 'Fal18-Spr19_UT_A' or row['Term'] == 'Fal18-Spr19_TTU') and month == '12' and date > 21:
                data.loc[index, "Eligibility Status Final Simple"] = 'Ineligible'
            else:
                continue
    return data

#Load Data
def get_master(master, district = ''):
    data = pd.read_csv(master)
    data = remove(data)
    data = data[data["Eligibility Status Final Simple"] != 'HS Only']
    data = standardize(data)
    if district == '':
        return data
    return data[data["District Name"] == district]

def read_data(master, district):
    data = get_master(master, district)
    UT = data[data["Partner"] == "UT Austin OnRamps"]
    TTU = data[data["Partner"] == "TTU OnRamps"]
    return UT, TTU

def read_data_without_differentiate(master, district):
    data = get_master(master, district)
    return data

# Get Row totals
def get_row_total(data):
    data['Total'] = 0
    for index, row in data.iterrows():
        data.loc[index, 'Total'] = sum(data.loc[index])
    return data

# Get Column totals
def get_column_total(data):
    data.loc['Total'] = 0
    for i in data.columns:
        data.loc['Total', i] = data[i].sum()
    return data

#Figure 1.1
def get_course(data, label = []):
    if label == '':
        course = data["Course Code"]
    else:
        course = data[label]
    return course.drop_duplicates()

def Figure_1_4(data):
    return get_course(data, ["Course Title Complete"])

def get_unique_values(date, label):
    return data[label].drop_duplicates()

def get_Campus(data, course):
    data = data[data['Course Title'] == course]
    return data['Campus Name'].value_counts()

def get_Census(data, district, endOfTerm = False):
    if endOfTerm == True:
        data = data[data["Withdraw Status"] == "No"]
    data = data[data["District Name"] == district]
    ans = pd.DataFrame()
    for i in data["Campus Name"].drop_duplicates().sort_values():
        ans[i] = data[data["Campus Name"] == i]["Course Title"].value_counts()
        print(data[data["Campus Name"] == i][data["Course Title"] == "Chemistry"]["EID"])
    ans = ans.sort_index()
    return ans

In [31]:
def report_generator_UT_TTU(master, master1718, district, dataset):
    
    OnRamps = standardize(fix_dates(read_data_without_differentiate(master, district)))
    UT, TTU = read_data(master, district)
    OnRamps1718 = standardize(read_data_without_differentiate(master1718, district))
    master = standardize(fix_dates(get_master(master)))
    pastdata = []
    for i in dataset:
        pastdata.append([i[0], read_data_without_differentiate(i[1], district)])

    funclist = [(Figure_1_4, UT),]
    print(get_Census(UT, district))
    for i in funclist:
        func, data= i
        print(func(data))

master = "18-19 FAKE Master File (Final).csv"
district = ['Austin ISD']
master1718 = '17-18 Master File (Student Tab)_Final (2018-05-29).csv'
dataset = [['14-15', 'Master File (14-15) (FINAL).csv'],
          ['15-16', 'Master File (15-16) (FINAL).csv'],
          ['16-17', '16-17 Master File (FINAL).csv'],
          ['17-18', '17-18 Master File (Student Tab)_Final (2018-05-29).csv'],
          ['18-19', "18-19 Mid-Year Master File (Spring 2019).csv"]]

for i in district:
    report_generator_UT_TTU(master, master1718, i, dataset)

992      eid3208
2571     eid3184
7183     eid4259
9229     eid3178
15058    eid3190
22313    eid4243
22556    eid4267
23865    eid4235
31515    eid3202
40256    eid4251
40896    eid3196
Name: EID, dtype: object
Series([], Name: EID, dtype: object)
Series([], Name: EID, dtype: object)
Series([], Name: EID, dtype: object)
Series([], Name: EID, dtype: object)
2057     eid2434
5647     eid3243
12384    eid3235
20311    eid2422
26173    eid2440
29400    eid2428
32731    eid3219
41924    eid3227
Name: EID, dtype: object
Series([], Name: EID, dtype: object)
Series([], Name: EID, dtype: object)
Series([], Name: EID, dtype: object)
10891    eid3011
15444    eid3003
21620    eid2254
22627    eid2995
27727    eid2260
Name: EID, dtype: object
                                         Akins HS  Anderson HS  \
Arts & Entertainment Technologies (AET)        28          NaN   
Chemistry                                      11          NaN   
Chemistry Lab                                  11          N



In [2]:
import xlrd
loc = ("Austin ISD.xlsx") 
wb = xlrd.open_workbook(loc) 
sheet = wb.sheet_by_index(4) 
for i in range(sheet.nrows):
    print(sheet.row_values(i))

['', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '']
['', 'Figure 1.4 shows the number of students by campus who were enrolled in each OnRamps distance college course at census* and at the end of the academic term. Students could enroll in multiple course.\n\nThis figure is divided according to course subject. Figure 1.4a includes math and science courses and Figure 1.4b includes humanities, arts, and technology courses. Figures are only displayed if the course(s) are offered at Austin ISD.\n*Census is the date on which OnRamps confirms students for reporting purposes only.\n\nFigure 1.4a: Student enrollment in math and science courses at census & end of year at campus level in the 2018-2019 academic year', '', '', '', '', '', '', '', '', '', '', '', '', '', '']
['', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '']
['', '', 'Course Code', 'Enrollment Dates', 'Akins HS', 'Anderson HS', 'Ann Richards School', 'Austin HS', 'Bowie HS', 'Crockett HS', 'Eastside Memori