In [23]:
## Importing Packages to run Pandas

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import os
import math
import re
import glob
import configparser

from sklearn.preprocessing import OneHotEncoder
from sklearn import metrics
from sklearn.datasets import make_classification
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import (RandomTreesEmbedding, RandomForestClassifier,
                              GradientBoostingClassifier)
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import Imputer
from sklearn.model_selection import cross_val_score
from sklearn.svm import SVR


In [24]:
## Defining Import Related Function

##Import Config File to determine where to put into Google Sheets
def ImportTeacherConfig():
    #print(os.getcwd())
    #os.chdir(r"C:\Users\Altair\Dropbox\1819 NSA Mastery Tracking\Math\Coapman\AP Calculus BC")

    Config = configparser.ConfigParser()
    Config.read("teacher.ini")
    def ConfigSectionMap(section):
        dict1 = {}
        options = Config.options(section)
        for option in options:
            try:
                dict1[option] = Config.get(section, option)
                if dict1[option] == -1:
                    DebugPrint("skip: %s" % option)
            except:
                print("exception on %s!" % option)
                dict1[option] = None
        return dict1

    wks = ConfigSectionMap("teacherconfig")['worksheet']
    spreadsheet = ConfigSectionMap("teacherconfig")['spreadsheet']

    #print(spreadsheet)
    return (wks, spreadsheet)
#######
###### Import Student Roster ######
def ImportRoster():
    path = r'.\Roster'
    filename = glob.glob(path + "/*.xls*")

    Roster = pd.read_excel(filename[0])

    Roster.index = Roster.loc[:,'Local Student Id']
    Roster = Roster.sort_index()
    Roster = Roster[['Local Student Id', 'Student Last Name', 'Student First Name', 'Section', 'Accommodation Level']]
    #StandardsList_processed = StandardsList_processed [['Unit', 'Last Date Assessed', 'Priority', 'Standard Code']]
    Roster.dropna(inplace=True)
    Roster = Roster.drop(['Local Student Id'],axis = 1)
    return Roster
####

####### This is where most importing is taking place #######

def GetDatesAssessed():
    DatesAssessed = []
    path = r'.\TestInfo'
    #This uses a wildcard to get both .xls and .xlsx
    filename = glob.glob(path + "/*.xls*")
    for test in range(len(filename)):
        DatesAssessed.append(filename[test][len(path)+1:len(path)+9])

    #Processing the dates into better format
    DatesAssessed = [e[0:4] + "-" + e[4:6] + "-" + e[6:8] for e in DatesAssessed]
    return DatesAssessed

## Read Filenames for Test Info Pages and Response Matrices
# Use this later when working with multiple tests at once

def GetTestInfoPaths():
    path1 = r'.\TestInfo'
    TestInfoNames = glob.glob(path1 + "/*.xls*")
    return(TestInfoNames) 

def GetResponsesPaths():
    path2 = r'.\Responses'
    ResponsesNames = glob.glob(path2 + "/*.xls*")
    return (ResponsesNames)


In [25]:
## Define function to clean test info page to included secondary standards in analysis

def cleanTestInfo2(TestInfoFileName):
    TestInfoAll = pd.DataFrame()
    TestInfo = pd.read_excel(TestInfoFileName, sheet_name='Test Information', skiprows=8)
    TestInfo.index = range(1, len(TestInfo.index)+1)
    Standards1 = TestInfo.loc[:,'(Primary) Standard']
    Standards2 = TestInfo.loc[:,'(Secondary) OPTIONAL: Additional Standard']
    Type = TestInfo.loc[:,'MC, OER (Question Group)']
    Points = TestInfo.loc[:,'Possible Points\nmust be a whole number (>= 1) if OER']
    Correct = TestInfo.loc[:,'Correct Answer']

    TestInfo1 = pd.concat([Standards1, Type, Points, Correct], axis = 1)
    Qseries = TestInfo1.loc[:,'(Primary) Standard']
    LastQuestion = Qseries.last_valid_index()
    TestInfo1 = TestInfo1.loc[1:LastQuestion,:]

    Bseries = pd.Series(TestInfo.index, dtype = str)
    for q in Bseries.index:
        Bseries[q] = 'B' + str(Bseries[q])
    
    TestInfo2 = pd.concat([Standards2, Type, Points, Correct], axis = 1)
    TestInfo2.index = Bseries
    Qseries = TestInfo2.loc[:,'(Secondary) OPTIONAL: Additional Standard']
    LastQuestion = Qseries.last_valid_index()
    #LastQuestion = 'B' + LastQuestion
    TestInfo2 = TestInfo2.loc['B1':LastQuestion,:]
    TestInfo2.rename(index=str, columns={"(Secondary) OPTIONAL: Additional Standard" : "(Primary) Standard"}, inplace = True)
    
    #TestInfoALL = pd.concat([TestInfo1, TestInfo2], keys = None, axis = 0)
    TestInfoALL = TestInfo1.append(TestInfo2, ignore_index=True)
    TestInfoALL.rename (columns={'Possible Points\nmust be a whole number (>= 1) if OER': 'Possible Points'}, inplace=True)
    
    return TestInfoALL

#########
## Create NEW Standards List based off clean test info sheet

def constructStandards(TestInfo):
    StandardsList = pd.DataFrame()
    AllStandards = TestInfo.loc[:,"(Primary) Standard"]
    UniqueStandards = AllStandards.unique()
    StandardsList = pd.DataFrame(data = UniqueStandards, columns = ['Standard Code']) 
    #StandardsList.rename(columns={"0":"Standards Code"}, inplace=True)
    StandardsList = StandardsList.sort_values(by=['Standard Code'])
    StandardsList = StandardsList.reset_index(drop=True)
    return StandardsList

#####

def cleanResponses2(ResponsesFileName,TestInfo):
    Responses = pd.DataFrame()
    print (TestInfo.index)
    Responses = pd.read_excel(ResponsesFileName)
    Questions = Responses.columns.values[9:]
    StudentID = Responses.loc[:,['Local Student Id']]
    StudentResponses = Responses.loc[:,Questions]
    StudentResponses2 = StudentResponses
    StudentResponses2.columns = 'B' + StudentResponses.columns
    
    Responses = pd.concat([StudentID, StudentResponses, StudentResponses2], axis = 1)
    Responses.index = Responses.loc[:,'Local Student Id']
    Responses = Responses.drop(['Local Student Id'], axis = 1)
    Responses.columns = TestInfo.index
    Responses = Responses.sort_index()
    return Responses
    
    
    #StudentResponses2.columns = 'B' + StudentResponses.columns
    
## Define function to create binary matrix (points earned)
# Only creates 1/0 matrix for MC questions

def createBinary(Responses, TestInfos):
    BinaryMatrix = Responses.copy()
    for question in TestInfos.index.values:
        if TestInfos.loc[question,'MC, OER (Question Group)'] == 'MC':
            for student in Responses.index.values:
                if TestInfos.loc[question,'Correct Answer'] == Responses.loc[student,question]:
                    BinaryMatrix.loc[student,question] = TestInfos.loc[question,'Possible Points']
                else:
                    BinaryMatrix.loc[student,question] = 0
        else:
            continue
    return BinaryMatrix


## Define function to calculate points per standard for a given test

def calcPPS(StandardsList,TestInfos):
    PPS = pd.DataFrame()
    StandardIDs = StandardsList.loc[:,'Standard Code']
    PPS = pd.DataFrame(index = StandardIDs, columns = ['Points'])
    for standard in StandardIDs:
        PPS.loc[standard,'Points'] = 0
        for question in TestInfos.index.values:
            if TestInfos.loc[question,'(Primary) Standard'] == standard:
                PPS.loc[standard,'Points'] = PPS.loc[standard,'Points'] + TestInfos.loc[question,'Possible Points']
            else:
                continue
    return PPS

##
## Define function to calculate standards matrix per student

def createStandardsMatrix(BinaryMatrix, TestInfos, PPS):
    StandardsbyStudent = pd.DataFrame()
    StandardIDs = PPS.index.values
    StandardsbyStudent = pd.DataFrame(index = BinaryMatrix.index.values, columns = StandardIDs, data = None)
    
    for standard in StandardsbyStudent.columns:
        AlignedQuestions = list()
        for question in TestInfos.index.values:
            if TestInfos.loc[question,'(Primary) Standard'] == standard:
                AlignedQuestions.append(question)
            else:
                continue
        for student in StandardsbyStudent.index:
            if PPS.loc[standard,'Points'] == '0':
                StandardsbyStudent.loc[student,standard] = 0
            else:
                PointsEarned = BinaryMatrix.loc[student,AlignedQuestions].sum()
                StandardsbyStudent.loc[student,standard] = (PointsEarned)
                
    return StandardsbyStudent

###

## Calculate overall mastery for each student for each standard

def CalculateTestMastery(StandardsMatrix, PPS):
    MasteryMatrix = StandardsMatrix.copy()
    for student in MasteryMatrix.index:
        for standard in PPS.index:
            if PPS.loc[standard,'Points'] == 0:
                MasteryMatrix.loc[student,standard] = np.NaN
            else:
                MasteryMatrix.loc[student,standard] = StandardsMatrix.loc[student,standard]/PPS.loc[standard,'Points']
    return (MasteryMatrix)


In [26]:
## Import/Clean Info Pages, Import/Clean Responses, Create Binary Matrices

ImportTeacherConfig()
Roster = ImportRoster()

DatesAssessed = GetDatesAssessed()
TestInfoNames = GetTestInfoPaths() 
ResponsesNames = GetResponsesPaths()

IndexDataFrame = pd.DataFrame(index= DatesAssessed, data={'TestInfo': TestInfoNames, 'Responses': ResponsesNames})

TestInfos = pd.DataFrame()
Responses = pd.DataFrame()
Binaries = pd.DataFrame()
PPSs = pd.DataFrame()
StandardsMatrices = pd.DataFrame()

#Blank dictionary for collecting all test's data frames
TestInfo_collection = {} 

for Dates in IndexDataFrame.index:
    filepath = IndexDataFrame.loc[Dates, 'TestInfo']
    TestInfo_collection[Dates]= cleanTestInfo2(filepath)

#print (dataframe_collection)

SuperTestForStandards = pd.DataFrame()
for Dates in TestInfo_collection:
    SuperTestForStandards = pd.concat([SuperTestForStandards, TestInfo_collection[Dates]], axis=0)

StandardsList = constructStandards(SuperTestForStandards)
StandardsList.dropna(inplace=True)

Responses = pd.DataFrame()
Responses.drop(Responses.index, inplace = True)

Responses_collection = {}
for Dates in IndexDataFrame.index:
    #filepath = IndexDataFrame.loc[Dates, 'TestInfo']
    filepath2 = IndexDataFrame.loc[Dates, 'Responses']
    
    Responses_collection[Dates]= cleanResponses2(filepath2, TestInfo_collection[Dates])



RangeIndex(start=0, stop=138, step=1)
RangeIndex(start=0, stop=20, step=1)


In [27]:
Binaries_collection = {} 
PPSs_collection = {}
StandardsMatrix_collection = {}

for Dates in IndexDataFrame.index:
    Binaries_collection[Dates] = createBinary(Responses_collection[Dates],TestInfo_collection[Dates])
    PPSs_collection[Dates] = calcPPS(StandardsList, TestInfo_collection[Dates])
    StandardsMatrix_collection[Dates] = createStandardsMatrix(Binaries_collection[Dates],TestInfo_collection[Dates],PPSs_collection[Dates])


In [38]:
TestMastery_collection = {}
#print (StandardsMatrix_collection)

for Dates in IndexDataFrame.index:
   TestMastery_collection[Dates] = CalculateTestMastery(StandardsMatrix_collection[Dates], PPSs_collection[Dates])

TestOverallMastery_collection = {}

for Dates in IndexDataFrame.index:
    TestOverallMastery_collection[Dates] =  TestMastery_collection[Dates].mean(axis = 0, skipna = False)


In [29]:
# Merging Matrices
OverallBinaries = pd.DataFrame()
OverallPPS = pd.DataFrame()
OverallStandardsMastery = pd.DataFrame()

# Final Matrix
OverallStudentMastery = pd.DataFrame()

for Dates in IndexDataFrame.index:
    OverallPPS = OverallPPS.add(PPSs_collection[Dates], fill_value = 0)
    OverallStandardsMastery = OverallStandardsMastery.add(StandardsMatrix_collection[Dates], fill_value = 0)

OverallPPS
#OverallStandardsMastery

OverallStudentMastery = OverallStandardsMastery.divide(OverallPPS.loc[:,'Points'])

In [30]:
### Create assessment x standard martix to be write-ready

MasterByTest = pd.DataFrame(TestOverallMastery_collection)

## Removed Standards List from the Process

#StandardsList_processed = StandardsList_processed [['Unit', 'Last Date Assessed', 'Priority', 'Standard Code']]
MasterByTest = MasterByTest.fillna(value='')
MasterByTest = MasterByTest.T
MasterByTest.head()

StandardsList_processed = StandardsList.copy()
#StandardsList_processed = StandardsList_processed [['Unit', 'Last Date Assessed', 'Priority', 'Standard Code']]
StandardsList_processed = StandardsList_processed.fillna(value='')
StandardsList_processed = StandardsList_processed.T

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,39,40,41,42,43,44,45,46,47,48
Standard Code,UHAI.HS.Calc.01.Asymptotes,UHAI.HS.Calc.01.Continuity,UHAI.HS.Calc.01.IVT,UHAI.HS.Calc.01.LimGraph,UHAI.HS.Calc.01.LimInf,UHAI.HS.Calc.02.DerivApprox,UHAI.HS.Calc.02.DerivInterpret,UHAI.HS.Calc.02.DerivLimitDef,UHAI.HS.Calc.02.DerivMatching,UHAI.HS.Calc.02.Differentiability,...,UHAI.HS.Calc.06.RiemannSums,UHAI.HS.Calc.07.AvgValueAROC,UHAI.HS.Calc.07.IntByParts,UHAI.HS.Calc.07.IntLongDivision,UHAI.HS.Calc.07.IntSimplify,UHAI.HS.Calc.07.IntSub,UHAI.HS.Calc.08.Euler,UHAI.HS.Calc.08.Exponential,UHAI.HS.Calc.08.IVP,UHAI.HS.Calc.08.SlopeField


In [34]:
#Get Roster df in order
Data_processed = pd.DataFrame()
Roster_processed = Roster.copy()
#cols = Roster2.columns.tolist() 
#cols = [cols[:0]]+[cols[2]]+[cols[1]]
#Roster2 = Roster2[cols]
#Roster2 = Roster2.ix[:, cols]
# 	Last, First 	Teacher 	Section

## Roster_processed.head()
Data_processed = pd.merge(Roster_processed, OverallStudentMastery, left_index=True, right_index=True, how='outer')

Data_processed

Data_left = Data_processed.iloc[:, 0:4]
x = Data_processed.shape[1]
Data_right = Data_processed.iloc[:, 4:x]

DatesofTests = pd.DataFrame(columns=['Dates'], data=IndexDataFrame.index)


In [37]:
#Time to upload
from df2gspread import df2gspread as d2g
import gspread
from oauth2client.service_account import ServiceAccountCredentials

scope = ['https://spreadsheets.google.com/feeds',
         'https://www.googleapis.com/auth/drive']

credentials2 = ServiceAccountCredentials.from_json_keyfile_name('creds.json', scope)

gc = gspread.authorize(credentials2)

# Get the info from ini file that was loaded way back up in the code
wks = str(ImportTeacherConfig()[0])
spreadsheet = str(ImportTeacherConfig()[1])

#Upload  Data
d2g.upload(Data_left, gfile=spreadsheet, wks_name=wks, start_cell='A37', credentials=credentials2, clean=False, df_size=False, col_names=False, row_names=False)
d2g.upload(Data_right, gfile=spreadsheet, wks_name=wks, start_cell='F37', credentials=credentials2, clean=False, df_size=False, col_names=False, row_names=False)


#Upload the Standards themselves to TWO locations.
d2g.upload(StandardsList_processed, gfile=spreadsheet, wks_name=wks, start_cell='D16', credentials=credentials2, clean=False, df_size=False, col_names=False, row_names=False)
d2g.upload(MasterByTest, gfile=spreadsheet, wks_name=wks, start_cell='D18', credentials=credentials2, clean=False, df_size=False, col_names=False, row_names=False)
d2g.upload(StandardsList_processed, gfile=spreadsheet, wks_name=wks, start_cell='F36', credentials=credentials2, clean=False, df_size=False, col_names=False, row_names=False)
d2g.upload(DatesofTests, gfile=spreadsheet, wks_name=wks, start_cell='A18', credentials=credentials2, clean=False, df_size=False, col_names=False, row_names=False)

<Worksheet 'AP Calc BC - Coapman' id:oz8h307>