In [1]:
## Importing Packages to run Pandas

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import os
import math
import re

from sklearn.preprocessing import OneHotEncoder
from sklearn import metrics
from sklearn.datasets import make_classification
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import (RandomTreesEmbedding, RandomForestClassifier,
                              GradientBoostingClassifier)
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import Imputer
from sklearn.model_selection import cross_val_score
from sklearn.svm import SVR
import glob

In [2]:
## Change the working directory

os.chdir(r"C:\Users\Kyle Coapman\Dropbox\1819 WPHS Assessment\APCalculusBC")

## Import Standards for Course

path = r'.\Standards'
filename = glob.glob(path + "/*.xlsx")

StandardsList = pd.read_excel(filename[0])

SLColumns = StandardsList.columns.values
Codes = StandardsList.loc[:,'Standard Code']
Description = StandardsList.loc[:,'Description']
Unit = StandardsList.loc[:,'Unit']
Priority = StandardsList.loc[:,'Priority']
DF_temp = pd.DataFrame(index = StandardsList.index.values, columns = ['Last Date Assessed'])

StandardsList = pd.concat([Codes, Description, Unit, Priority, DF_temp], axis=1)

StandardsList.head()

NameError: name 'glob' is not defined

In [1]:
## Determine date of each assessment

path = r'.\TestInfo'
filename = glob.glob(path + "/*.xlsx")

DatesAssessed = []
for test in range(len(filename)):
    DatesAssessed.append(filename[test][len(path)+1:len(path)+9])
    

#Processing the dates into better format
DatesAssessed = [e[0:4] + "-" + e[4:6] + "-" + e[6:8] for e in DatesAssessed]

NameError: name 'glob' is not defined

In [4]:
## Import Student Roster

path = r'.\Roster'
filename = glob.glob(path + "/*.xlsx")

Roster = pd.read_excel(filename[0])

Roster.index = Roster.loc[:,'Student ID']
Roster = Roster.sort_index()
Roster = Roster.drop(['Student ID'],axis = 1)
Roster.head()

Unnamed: 0_level_0,"Last, First",Teacher,Section
Student ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
301001207,"Green, Imani",Coapman,2
301001210,"Hayes, Autumn",Coapman,2
301001323,"Acosta, Analia",Coapman,2
301001331,"Agbadou, Joshua",Coapman,2
301001336,"Bakare, Monica",Coapman,1


In [5]:
## Define function to clean test info page

def cleanTestInfo(TestInfoFileName):
    TestInfo = pd.read_excel(TestInfoFileName, skiprows=9)
    TestInfo.index = range(1, len(TestInfo.index)+1)
    Standards = TestInfo.loc[:,'(Primary) Standard']
    Type = TestInfo.loc[:,'MC, OER (Question Group)']
    Points = TestInfo.loc[:,'Possible Points']
    Correct = TestInfo.loc[:,'Correct Answer']

    TestInfo = pd.concat([Standards, Type, Points, Correct], axis = 1)
    Qseries = TestInfo.loc[:,'(Primary) Standard']
    LastQuestion = Qseries.last_valid_index()
    TestInfo = TestInfo.loc[1.0:LastQuestion,:]
    return TestInfo

In [6]:
## Define function to clean response matrices

def cleanResponses(ResponsesFileName,TestInfo):
    Responses = pd.read_excel(ResponsesFileName)
    Questions = Responses.columns.values[9:]
    StudentID = Responses.loc[:,['Local Student Id']]
    StudentResponses = Responses.loc[:,Questions]
    Responses = pd.concat([StudentID, StudentResponses], axis = 1)
    Responses.index = Responses.loc[:,'Local Student Id']
    Responses = Responses.drop(['Local Student Id'], axis = 1)
    Responses.columns = TestInfo.index
    Responses = Responses.sort_index()
    return Responses



In [7]:
## Define function to create binary matrix (points earned)

def createBinary(Responses, TestInfo):
    BinaryMatrix = Responses.copy()
    for question in TestInfo.index.values:
        if TestInfo.loc[question,'MC, OER (Question Group)'] == 'MC':
            for student in Responses.index.values:
                if TestInfo.loc[question,'Correct Answer'] == Responses.loc[student,question]:
                    BinaryMatrix.loc[student,question] = TestInfo.loc[question,'Possible Points']
                else:
                    BinaryMatrix.loc[student,question] = 0
        else:
            continue
    return BinaryMatrix

In [8]:
## Define function to calculate points per standard for a given test

def calcPPS(StandardsList,TestInfo):

    StandardIDs = StandardsList.loc[:,'Standard Code']
    PPS = pd.DataFrame(index = StandardIDs, columns = ['Points'])
    for standard in StandardIDs:
        PPS.loc[standard,'Points'] = 0
        for question in TestInfo.index.values:
            if TestInfo.loc[question,'(Primary) Standard'] == standard:
                PPS.loc[standard,'Points'] = PPS.loc[standard,'Points'] + TestInfo.loc[question,'Possible Points']
            else:
                continue
    return PPS

In [9]:
## Define function to calculate standards matrix per student

def createStandardsMatrix(BinaryMatrix, TestInfo, PPS):

    StandardIDs = PPS.index.values
    StandardsbyStudent = pd.DataFrame(index = BinaryMatrix.index.values, columns = StandardIDs, data = None)
    
    for standard in StandardsbyStudent.columns:
        AlignedQuestions = list()
        for question in TestInfo.index.values:
            if TestInfo.loc[question,'(Primary) Standard'] == standard:
                AlignedQuestions.append(question)
            else:
                continue
        for student in StandardsbyStudent.index:
            if PPS.loc[standard,'Points'] == 0:
                StandardsbyStudent.loc[student,standard] = 0
            else:
                PointsEarned = BinaryMatrix.loc[student,AlignedQuestions].sum()
                StandardsbyStudent.loc[student,standard] = (PointsEarned)
                
    return StandardsbyStudent
                                  

In [10]:
## Read Filenames for Test Info Pages and Response Matrices
# Use this later when working with multiple tests at once

path1 = r'.\TestInfo'
TestInfoNames = glob.glob(path1 + "/*.xlsx")

path2 = r'.\Responses'
ResponsesNames = glob.glob(path2 + "/*.xls")


In [11]:
## Import/Clean Info Pages, Import/Clean Responses, Create Binary Matrices

TestInfos = []
Responses = []
Binaries = []
PPSs = []
StandardsMatrices = []

for testName in TestInfoNames:
    DF_temp = cleanTestInfo(testName)
    TestInfos.append(DF_temp)
    
count = 0
for responsesName in ResponsesNames:
    DF_temp = cleanResponses(responsesName,TestInfos[count])
    Responses.append(DF_temp)
    count = count + 1
    
for testNum in range(len(Responses)):
    DF_temp = createBinary(Responses[testNum],TestInfos[testNum])
    Binaries.append(DF_temp)
    DF_temp = calcPPS(StandardsList, TestInfos[testNum])
    PPSs.append(DF_temp)
    DF_temp = createStandardsMatrix(Binaries[testNum],TestInfos[testNum],PPSs[testNum])
    StandardsMatrices.append(DF_temp)
    
PPSs[0].head()
    

Unnamed: 0_level_0,Points
Standard Code,Unnamed: 1_level_1
UHAI.HS.APCalc.Limits.1A.Symbol,0.0
UHAI.HS.APCalc.Limits.1B.Estimate,1.2
UHAI.HS.APCalc.Limits.1C.Determine,1.2
UHAI.HS.APCalc.Limits.1D.Behavior,2.4
UHAI.HS.APCalc.Limits.2A.Continuity,3.2


In [12]:
## Add the last date of assessment to Standards list

for test in range(len(PPSs)):
    print('test', test)
    count = 0
    for standard in PPSs[test].index.values:
        print(PPSs[test].loc[standard,'Points'], count)
        if PPSs[test].loc[standard,'Points'] == 0:
            StandardsList.loc[count, 'Last Date Assessed'] = StandardsList.loc[count, 'Last Date Assessed']
        else:
            StandardsList.loc[count, 'Last Date Assessed'] = DatesAssessed[test]
        count = count + 1
        
StandardsList

test 0
0 0
1.2 1
1.2 2
2.4 3
3.2 4
0 5
3.2 6
2.0 7
13.0 8
0 9
17.4 10
3.6 11
3.2 12
10.6 13
13.8 14
0 15
0 16
0 17
1.2 18
0 19
1.2 20
8.6 21
2.4 22
0 23
6.2 24
7.2 25
1.2 26
4.2 27
0 28
0 29
2.0 30
0 31
0 32
0 33
0 34
0 35
0 36
0 37
test 1
0 0
0 1
3.2 2
1.2 3
0 4
1.2 5
2.2 6
2.0 7
6.0 8
1.2 9
13.2 10
2.4 11
2.0 12
5.0 13
16.8 14
0 15
0 16
5.6 17
2.4 18
0 19
1.2 20
4.2 21
1.2 22
0 23
3.0 24
6.0 25
0 26
3.2 27
3.2 28
13.0 29
0 30
5.0 31
2.4 32
0 33
0 34
0 35
0 36
0 37
test 2
0 0
0 1
5.4 2
0 3
1.2 4
1.2 5
1.2 6
1.0 7
13.8 8
3.0 9
7.8 10
0 11
1.2 12
0 13
5.6 14
2.0 15
0 16
2.4 17
3.2 18
0 19
1.2 20
4.2 21
2.2 22
1.2 23
2.4 24
9.0 25
1.2 26
4.2 27
6.2 28
1.2 29
1.2 30
1.2 31
6.2 32
2.4 33
1.2 34
4.2 35
7.2 36
2.4 37


Unnamed: 0,Standard Code,Description,Unit,Priority,Last Date Assessed
0,UHAI.HS.APCalc.Limits.1A.Symbol,Express limits symbolically using correct nota...,1,,
1,UHAI.HS.APCalc.Limits.1B.Estimate,Estimate limits of functions,1,,2017-10-25
2,UHAI.HS.APCalc.Limits.1C.Determine,Determine limits of functions,1,X,2018-03-25
3,UHAI.HS.APCalc.Limits.1D.Behavior,Deduce and interpret behavior using limits,1,,2018-01-25
4,UHAI.HS.APCalc.Limits.2A.Continuity,Analyze functions for intervals of continuity ...,1,,2018-03-25
5,UHAI.HS.APCalc.Limits.2B.Theorems,Determine the applicability of important calcu...,1,,2018-03-25
6,UHAI.HS.APCalc.Derivatives.1A.LimDef,Identify the derivative of a function as the l...,2,,2018-03-25
7,UHAI.HS.APCalc.Derivatives.1B.Approximate,Estimate dervatives.,2,,2018-03-25
8,UHAI.HS.APCalc.Derivatives.1C.Calculate,Calculate Derivatives.,3,X,2018-03-25
9,UHAI.HS.APCalc.Derivatives.1D.HigherOrder,Determine higher order derivatives,3,,2018-03-25


In [13]:
## Sum Student Mastery Matrices and PPSs

StandardsMatricesSUM = sum(StandardsMatrices)
PPSsSUM = sum(PPSs)



In [14]:
## Calculate overall mastery for each student for each standard

OverallMastery = StandardsMatricesSUM.copy()

for student in StandardsMatricesSUM.index:
    for standard in PPSsSUM.index:
        if PPSsSUM.loc[standard,'Points'] == 0:
            OverallMastery.loc[student,standard] = 'NaN'
        else:
            OverallMastery.loc[student,standard] = StandardsMatricesSUM.loc[student,standard]/PPSsSUM.loc[standard,'Points']
            #Depreciated version with *100 for percent
            #OverallMastery.loc[student,standard] = StandardsMatricesSUM.loc[student,standard]/PPSsSUM.loc[standard,'Points']*100

OverallMastery


Unnamed: 0,UHAI.HS.APCalc.Limits.1A.Symbol,UHAI.HS.APCalc.Limits.1B.Estimate,UHAI.HS.APCalc.Limits.1C.Determine,UHAI.HS.APCalc.Limits.1D.Behavior,UHAI.HS.APCalc.Limits.2A.Continuity,UHAI.HS.APCalc.Limits.2B.Theorems,UHAI.HS.APCalc.Derivatives.1A.LimDef,UHAI.HS.APCalc.Derivatives.1B.Approximate,UHAI.HS.APCalc.Derivatives.1C.Calculate,UHAI.HS.APCalc.Derivatives.1D.HigherOrder,...,UHAI.HS.APCalc.Integrals.4C.Motion,UHAI.HS.APCalc.Integrals.4D.AreaVolume,UHAI.HS.APCalc.Integrals.4E.Apps,UHAI.HS.APCalc.Integrals.5A.AnalyzeDiffEqs,UHAI.HS.APCalc.Integrals.5B.SolveDiffEqs,UHAI.HS.APCalc.Series.1A.Convergence,UHAI.HS.APCalc.Series.1B.Sum,UHAI.HS.APCalc.Series.2A.Taylor,UHAI.HS.APCalc.Series.2B.Power,UHAI.HS.APCalc.Series.2C.Interval
301001207,,0,0.367347,0.666667,1.0,0.5,0.545455,0.2,0.72561,0.285714,...,0.234043,0.464789,0.375,0.16129,1.0,0.5,0,0.0,0.305556,0.0
301001210,,1,0.571429,0.666667,0.272727,0.5,0.818182,0.4,0.719512,1.0,...,0.255319,0.394366,0.0,0.0,0.627907,1.0,0,0.285714,0.138889,0.0
301001323,,1,0.673469,0.666667,1.0,0.5,0.818182,0.8,0.628049,1.0,...,0.106383,0.605634,0.375,0.0,0.627907,0.5,0,0.0,0.305556,0.0
301001331,,1,0.122449,0.666667,0.772727,0.0,0.181818,0.0,0.554878,0.0,...,0.12766,0.0,0.0,0.0,0.0,0.5,1,0.285714,0.0,0.5
301001336,,1,0.673469,0.666667,0.545455,0.0,0.636364,0.8,0.689024,0.285714,...,0.212766,0.43662,0.375,0.193548,0.627907,0.5,1,0.0,0.416667,0.0
301001384,,0,0.367347,0.333333,0.727273,0.5,0.454545,0.6,0.731707,0.285714,...,0.468085,0.450704,0.375,0.193548,0.72093,1.0,0,0.285714,0.0,1.0
301001391,,1,0.469388,0.666667,0.727273,0.0,0.818182,0.6,0.762195,0.285714,...,0.553191,0.830986,0.375,0.0,0.72093,0.0,0,0.0,0.722222,0.5
301001402,,1,0.693878,0.666667,1.0,0.0,1.0,1.0,0.737805,0.285714,...,0.361702,0.464789,1.0,0.193548,0.72093,1.0,0,0.238095,0.555556,0.5
301001409,,1,0.673469,0.666667,1.0,0.5,1.0,1.0,0.859756,1.0,...,0.531915,0.760563,0.0,0.193548,0.744186,1.0,0,0.47619,0.555556,0.5
301001410,,1,0.571429,0.666667,1.0,0.0,0.848485,1.0,0.932927,0.714286,...,0.234043,0.704225,0.0,0.0,1.0,1.0,1,0.238095,0.833333,0.5


In [15]:
#Get Roster df in order
Roster_processed = Roster.copy()
#cols = Roster2.columns.tolist() 
#cols = [cols[:0]]+[cols[2]]+[cols[1]]
#Roster2 = Roster2[cols]
#Roster2 = Roster2.ix[:, cols]
Roster_processed = Roster_processed[['Last, First', 'Section', 'Teacher']]
# 	Last, First 	Teacher 	Section

Roster_processed.head()

Unnamed: 0_level_0,"Last, First",Section,Teacher
Student ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
301001207,"Green, Imani",2,Coapman
301001210,"Hayes, Autumn",2,Coapman
301001323,"Acosta, Analia",2,Coapman
301001331,"Agbadou, Joshua",2,Coapman
301001336,"Bakare, Monica",1,Coapman


In [16]:
StandardsList_processed = StandardsList.copy()
StandardsList_processed = StandardsList_processed [['Unit', 'Last Date Assessed', 'Priority', 'Standard Code']]
StandardsList_processed = StandardsList_processed.fillna(value='')
StandardsList_processed = StandardsList_processed.T
StandardsList_processed.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,28,29,30,31,32,33,34,35,36,37
Unit,1,1,1,1,1,1,2,2,3,3,...,9,9,9,8,8,11,11,12,12,12
Last Date Assessed,,2017-10-25,2018-03-25,2018-01-25,2018-03-25,2018-03-25,2018-03-25,2018-03-25,2018-03-25,2018-03-25,...,2018-03-25,2018-03-25,2018-03-25,2018-03-25,2018-03-25,2018-03-25,2018-03-25,2018-03-25,2018-03-25,2018-03-25
Priority,,,X,,,,,,X,,...,X,,,,X,,,X,,0
Standard Code,UHAI.HS.APCalc.Limits.1A.Symbol,UHAI.HS.APCalc.Limits.1B.Estimate,UHAI.HS.APCalc.Limits.1C.Determine,UHAI.HS.APCalc.Limits.1D.Behavior,UHAI.HS.APCalc.Limits.2A.Continuity,UHAI.HS.APCalc.Limits.2B.Theorems,UHAI.HS.APCalc.Derivatives.1A.LimDef,UHAI.HS.APCalc.Derivatives.1B.Approximate,UHAI.HS.APCalc.Derivatives.1C.Calculate,UHAI.HS.APCalc.Derivatives.1D.HigherOrder,...,UHAI.HS.APCalc.Integrals.4C.Motion,UHAI.HS.APCalc.Integrals.4D.AreaVolume,UHAI.HS.APCalc.Integrals.4E.Apps,UHAI.HS.APCalc.Integrals.5A.AnalyzeDiffEqs,UHAI.HS.APCalc.Integrals.5B.SolveDiffEqs,UHAI.HS.APCalc.Series.1A.Convergence,UHAI.HS.APCalc.Series.1B.Sum,UHAI.HS.APCalc.Series.2A.Taylor,UHAI.HS.APCalc.Series.2B.Power,UHAI.HS.APCalc.Series.2C.Interval


In [17]:
#Time to Upload!
from df2gspread import df2gspread as d2g
import gspread
from oauth2client.service_account import ServiceAccountCredentials

scope = ['https://spreadsheets.google.com/feeds',
         'https://www.googleapis.com/auth/drive']

credentials2 = ServiceAccountCredentials.from_json_keyfile_name('creds.json', scope)

gc = gspread.authorize(credentials2)
# Create empty dataframe
df = pd.DataFrame()

# Create a column
#df['name'] = ['John2', 'Steve', 'Sarah']
#df.append
# use full path to spreadsheet file
#spreadsheet = '/some/folder/New Spreadsheet'
# or spreadsheet file id
spreadsheet = '1AvN6e8bUKdXq43lYTU0fnK4dmUjC5iRdYaOpaoMFLHc'
wks = 'Course Template'

#Upload Roster Data
d2g.upload(Roster_processed, gfile=spreadsheet, wks_name=wks, start_cell='A40', credentials=credentials2, clean=False, df_size=False, col_names=False, row_names=False)
#d2g.upload(df, gfile=spreadsheet, wks_name=wks, start_cell='A40', credentials=credentials2, clean=False, df_size=False, col_names=False, row_names=False

#Upload Standards Mastery Numbers
d2g.upload(OverallMastery, gfile=spreadsheet, wks_name=wks, start_cell='E40', credentials=credentials2, clean=False, df_size=False, col_names=False, row_names=False)

#Upload the Standards themselves to TWO locations.
d2g.upload(StandardsList_processed, gfile=spreadsheet, wks_name=wks, start_cell='E19', credentials=credentials2, clean=False, df_size=False, col_names=False, row_names=False)
d2g.upload(StandardsList_processed, gfile=spreadsheet, wks_name=wks, start_cell='E36', credentials=credentials2, clean=False, df_size=False, col_names=False, row_names=False)

<Worksheet 'Course Template' id:otc155o>