# Conditional Class Performance

Basic Modules:

In [2]:
import pandas
from pandas import DataFrame, Series
pandas.options.display.max_columns = None

import time
import copy

import matplotlib.pyplot as plt
%matplotlib inline

from itertools import combinations

#### Import Data File:
The data representation indicates student failing a class (1) or student passing a class (0)

In [3]:
df = pandas.read_excel('Class_Bundle_Sample_File.xlsx')

print(df.shape)

df.head()


(99, 11)


Unnamed: 0,EMPLID,CL1,CL2,CL3,CL4,CL5,CL6,CL7,CL8,CL9,CL10
0,1,1,0,0,0,0,1,1,0,0,0
1,2,0,1,1,1,1,1,0,0,0,1
2,3,1,0,0,0,0,1,1,0,0,1
3,4,0,1,0,1,1,1,0,1,1,0
4,5,0,1,0,1,0,0,0,1,1,0


##### Create all possible combinations of n classes

In [4]:
n = 4

classes = df.columns[1:]

comb = combinations(classes, n) 

comb_list = list(comb)

print('there are {} possible {}-class combinations'.format(len(comb_list), n))

there are 210 possible 4-class combinations


### Prepare data for processing

###### Fix the EMPLID column name which contains a space inserted at the end for no obvious reason.

In [5]:
df.columns

Index(['EMPLID ', 'CL1', 'CL2', 'CL3', 'CL4', 'CL5', 'CL6', 'CL7', 'CL8',
       'CL9', 'CL10'],
      dtype='object')

In [6]:
df.rename(columns={'EMPLID ':'EMPLID'},inplace=True)

In [14]:
df.columns[1:].shape[0]

10

## Engine

In [7]:
def combination_counter(df):
    
    #set up rows,columns,indexes and data to populate adjecency matrix
    dim = df.columns[1:].shape[0]
    row = 0
    col = 0
    row_idx = []
    col_idx = []
    data = []
    students = {}
    combo_dict = {}

    lst_index = list(range(dim - 1))
    start = 0
    end = start + len(list(range(dim))) - 1

    for row in range(dim):

        col = row + 1

        if row > 0:

            holder_index = lst_index

            start = holder_index[-1] + 1

            end = start + len(holder_index) - 1

            lst_index = list(range(start,end))

        # iterate through all possible class-combos combos
        for combo in comb_list[start:end]:

            # test
            #print(row,col,combo)

            # end indexing when reaching end
            if col <= end:

                # pull enrollment indicator for class-combos
                df_tuple = df[['EMPLID'] + list(combo)]

                # ignore class-combos without students in common
                if df_tuple[(df_tuple.iloc[:,1]==1) & 
                            (df_tuple.iloc[:,2]==1) & 
                            (df_tuple.iloc[:,3]==1) & 
                            (df_tuple.iloc[:,4]==1)].shape[0]!=0:

                    # pull df of all students in common for a given class-combo
                    common_stdts_emplid = df_tuple[(df_tuple.iloc[:,1]==1) & 
                                                   (df_tuple.iloc[:,2]==1) & 
                                                   (df_tuple.iloc[:,3]==1) & 
                                                   (df_tuple.iloc[:,4]==1)]
                    common_stdts = common_stdts_emplid.drop(columns=['EMPLID'])

                    # count students in common for a given class-combo
                    common_stdts_cnt = common_stdts.iloc[:,0].sum()

                    # store row index
                    row_idx.append(row)

                    # store column index
                    col_idx.append(col)

                    # store students in common count
                    data.append(common_stdts_cnt)

                    # store students in common with emplid
                    students[combo] = common_stdts_emplid

                    # store index class-combos
                    combo_dict[combo] = [combo[0],combo[1],combo[2],combo[3],row,col,common_stdts_cnt]

                    #test
                    #print(row,col,combo,common_stdts_cnt)

                # accumulate col by one
                col += 1
                
    return (combo_dict,students,data)

###### Implement Engine and obtain results

In [8]:
start_time = time.time()
combo_dict, students, data = combination_counter(df)
end_time = time.time()

print('run took {:0.3f} seconds'.format((end_time-start_time)))

run took 0.197 seconds


In [9]:
print('there are {} class-tuples that have at least one student failing all n classes'.format(len(data)))

print('the maximum number of students enrolled in a class set is {}'.format(max(data)))

there are 45 class-tuples that have at least one student failing all n classes
the maximum number of students enrolled in a class set is 13


In [10]:
combo_data = pandas.DataFrame.from_dict(combo_dict, orient='index')
combo_data.columns = ['class1','class2','class3','class4','row','column','failed']

combo_data.sort_values(by='failed',ascending=False,inplace=True)

combo_data.head()

Unnamed: 0,class1,class2,class3,class4,row,column,failed
"(CL1, CL3, CL7, CL9)",CL1,CL3,CL7,CL9,8,9,13
"(CL1, CL2, CL7, CL9)",CL1,CL2,CL7,CL9,2,9,13
"(CL1, CL2, CL3, CL7)",CL1,CL2,CL3,CL7,0,4,12
"(CL1, CL2, CL5, CL9)",CL1,CL2,CL5,CL9,1,9,11
"(CL1, CL2, CL6, CL7)",CL1,CL2,CL6,CL7,2,4,10


In [13]:
combo_data.shape

(45, 7)

###### Validation:
Look at original dataset and manually count events

In [12]:
df[['CL1','CL3','CL7','CL9']].sort_values(by=['CL1','CL3','CL7','CL9'],ascending=False).head(14)

Unnamed: 0,CL1,CL3,CL7,CL9
9,1,1,1,1
11,1,1,1,1
20,1,1,1,1
27,1,1,1,1
37,1,1,1,1
55,1,1,1,1
59,1,1,1,1
66,1,1,1,1
72,1,1,1,1
73,1,1,1,1
