# Conditional Class Performance

Basic Modules:

In [1]:
import pandas
from pandas import DataFrame, Series
pandas.options.display.max_columns = None

import time
import copy

import matplotlib.pyplot as plt
%matplotlib inline

from itertools import combinations

#### Import Data File:
The data representation indicates student failing a class (1) or student passing a class (0)

In [2]:
df = pandas.read_excel('Class_Bundle_Sample_File.xlsx')

print(df.shape)

df.head()


(99, 11)


Unnamed: 0,EMPLID,CL1,CL2,CL3,CL4,CL5,CL6,CL7,CL8,CL9,CL10
0,1,1,0,0,0,0,1,1,0,0,0
1,2,0,1,1,1,1,1,0,0,0,1
2,3,1,0,0,0,0,1,1,0,0,1
3,4,0,1,0,1,1,1,0,1,1,0
4,5,0,1,0,1,0,0,0,1,1,0


##### Create all possible combinations of n classes

In [3]:
n = 3

classes = df.columns[1:]

comb = combinations(classes, n) 

comb_list = list(comb)

print('there are {} possible {}-class combinations'.format(len(comb_list), n))

there are 120 possible 3-class combinations


### Prepare data for processing

###### Fix the EMPLID column name which contains a space inserted at the end for no obvious reason.

In [4]:
df.columns

Index(['EMPLID ', 'CL1', 'CL2', 'CL3', 'CL4', 'CL5', 'CL6', 'CL7', 'CL8',
       'CL9', 'CL10'],
      dtype='object')

In [5]:
df.rename(columns={'EMPLID ':'EMPLID'},inplace=True)

## Engine

In [6]:
def combination_counter(df):
    
    #set up rows,columns,indexes and data to populate adjacency matrix
    data = []
    students = {}
    combo_dict = {}
    
    # iterate through all possible class-combos combos
    for combo in comb_list:
        # pull enrollment indicator for class-combos
        df_tuple = df[['EMPLID'] + list(combo)]
        # ignore class-combos without students in common
        if df_tuple[(df_tuple.iloc[:,1]==1) & 
                    (df_tuple.iloc[:,2]==1) & 
                    (df_tuple.iloc[:,3]==1)].shape[0]!=0:
            
            # pull df of all students in common for a given class-combo
            common_stdts_emplid = df_tuple[(df_tuple.iloc[:,1]==1) & 
                                           (df_tuple.iloc[:,2]==1) & 
                                           (df_tuple.iloc[:,3]==1)]

            common_stdts = common_stdts_emplid.drop(columns=['EMPLID'])

            # count students in common for a given class-combo
            common_stdts_cnt = common_stdts.iloc[:,0].sum()

            # store students in common count
            data.append(common_stdts_cnt)

            # store students in common with emplid
            students[combo] = common_stdts_emplid

            # store index class-combos
            combo_dict[combo] = [combo[0],combo[1],combo[2],common_stdts_cnt]
                
    return (combo_dict,students,data)

###### Implement Engine and obtain results

In [7]:
start_time = time.time()
combo_dict, students, data = combination_counter(df)
end_time = time.time()

print('run took {:0.3f} seconds'.format((end_time-start_time)))

run took 0.389 seconds


In [8]:
print('there are {} class-tuples that have at least one student failing all n classes'.format(len(data)))

print('the maximum number of students enrolled in a class set is {}'.format(max(data)))

there are 120 class-tuples that have at least one student failing all n classes
the maximum number of students enrolled in a class set is 21


In [9]:
combo_data = pandas.DataFrame.from_dict(combo_dict, orient='index')
combo_data.columns = ['class1','class2','class3','failed']

combo_data.sort_values(by='failed',ascending=False,inplace=True)

combo_data.head()

Unnamed: 0,class1,class2,class3,failed
"(CL2, CL5, CL6)",CL2,CL5,CL6,21
"(CL2, CL3, CL5)",CL2,CL3,CL5,19
"(CL2, CL3, CL7)",CL2,CL3,CL7,19
"(CL1, CL2, CL7)",CL1,CL2,CL7,19
"(CL1, CL7, CL9)",CL1,CL7,CL9,19


###### Validation:
Look at original dataset and manually count events

In [10]:
df[['CL2','CL5','CL6']].sort_values(by=['CL2','CL5','CL6'],ascending=False).head(22)

Unnamed: 0,CL2,CL5,CL6
1,1,1,1
3,1,1,1
6,1,1,1
9,1,1,1
10,1,1,1
18,1,1,1
28,1,1,1
31,1,1,1
33,1,1,1
36,1,1,1
