### Imports

In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import urllib
import matplotlib.dates as mdates

### Loading the data the dataframe

In [2]:
df = pd.read_csv('data.csv')
pd.options.display.max_columns = 999

### Reorganizing the data - Difficulty and Questions score
We want to change the 1/5 scale to -2/2 so that we can have 0 as an absolute value

In [3]:
df.iloc[:, 4:] = df.iloc[:, 4:].apply(lambda x: x - 3)

### Renaming the nb.repeats column name to no.taken 
The name is confusing and can be interpreted that a student has taken the calss already once and this
value represents the number of times that he repeated the class

In [4]:
df.rename(columns={'nb.repeat':'no.taken'}, inplace=True)

### Assessing the class size - Grouping the 13 classes

In [5]:
# spc means students per class
spc = df.groupby(['class']).size()
spc

class
1     303
2     140
3     904
4     187
5     656
6     558
7     187
8     500
9     571
10    448
11    484
12     41
13    841
dtype: int64

### Assessing how many students have taken the class and how many times

In [6]:
df.groupby('no.taken').size()

no.taken
1    4909
2     576
3     335
dtype: int64

In [7]:
# sapc means student attempts per class
sapc = df.groupby(['class', 'no.taken']).size()
sapc

class  no.taken
1      1           272
       2            25
       3             6
2      1           130
       2             7
       3             3
3      1           778
       2            87
       3            39
4      1           154
       2            20
       3            13
5      1           611
       2            28
       3            17
6      1           494
       2            55
       3             9
7      1           150
       2            14
       3            23
8      1           379
       2            69
       3            52
9      1           400
       2            99
       3            72
10     1           425
       2            14
       3             9
11     1           431
       2            43
       3            10
12     1            34
       2             5
       3             2
13     1           651
       2           110
       3            80
dtype: int64

### Converting the values in percentages to better understand the dataset

In [8]:
# sapc means student attempts per class percentage
sapcp = df.groupby(['class', 'no.taken']).size().reset_index(name='count')
sapcp['Percentage'] = sapcp.groupby('class')['count'].transform(lambda x: round(x/sum(x) * 100, 2))
sapcp

Unnamed: 0,class,no.taken,count,Percentage
0,1,1,272,89.77
1,1,2,25,8.25
2,1,3,6,1.98
3,2,1,130,92.86
4,2,2,7,5.0
5,2,3,3,2.14
6,3,1,778,86.06
7,3,2,87,9.62
8,3,3,39,4.31
9,4,1,154,82.35


### Getting the mean of the percentages from 1, 2, 3 attemps

In [9]:
sapcp.groupby('no.taken').mean()['Percentage']

no.taken
1    84.848462
2     9.508462
3     5.643077
Name: Percentage, dtype: float64

Based on the previous block, we can assume that the teachers had little to do with students passing/failing.
By calculating the mean for the atempts, we can see that 84.8% of students took the class once, 9.5% took the class twice and 5.6% three times 

### Split the data in chunks - classes to prepare it for further analysis
This way, we can quickly access the data by class

Data can be accessed like so dfdc[class]

In [10]:
# Create a list of classes
classes = df['class'].unique()
classes
# Create a data frame dictionary to store the entries per class for quick access
# dfd is dataframe dictionary
dfdc = {elem : pd.DataFrame for elem in classes}
for key in dfdc.keys():
    dfdc[key] = df[:][df['class'] == key]


### Split the data in chunks - teachers to prepare it for further analysis
This way, we can quickly access the entries relating to a teacher

Data can be accessed like so dfdt[teacher]

In [11]:
# Create a list of teachers
teachers = df['instr'].unique()
teachers
# Create a data frame dictionary to store the entries per teacher for quick access
# dfdp is dataframe dictionary
dfdp = {elem : pd.DataFrame for elem in teachers}
for key in dfdc.keys():
    dfdp[key] = df[:][df['instr'] == key]

### Observation: 

Based on our assumptions and analysis, we have noticed that some questions tend to repeat. That is because the author of the questionaire wanted to make sure that the students are answering almost the same to the questions at hand. After further analysis, we identified the following as potential duplicates:

- Q5 and Q7 - 263 (1)
- Q8 and Q10 - 251 (2)
- Q13 and Q14 - 102 (3)
- Q15 and Q19 - 250 (4)
- Q16 and Q18 - 191 (5)
- Q20 and Q25 - 233 (6) 
- Q21 and Q22 - 106 (7)
- Q24 and Q26 - 288 (8) 

The next steps are:
- Calculate the difference between the four groups of questions
- Assess how many of the entries have a difference greater than abs(1)
- Condense the duplicate questions if the percentage of such occurance is greater than 90%


In [12]:
# Create a dataframe that holds the questions and the differences between the ones that we think are duplicate
df_alt = df
df_alt['diff (1)'] = df_alt.apply(lambda x: x['Q5'] - x['Q7'], axis=1)
df_alt['diff (2)'] = df_alt.apply(lambda x: x['Q8'] - x['Q10'], axis=1)
df_alt['diff (3)'] = df_alt.apply(lambda x: x['Q13'] - x['Q14'], axis=1)
df_alt['diff (4)'] = df_alt.apply(lambda x: x['Q15'] - x['Q19'], axis=1)
df_alt['diff (5)'] = df_alt.apply(lambda x: x['Q16'] - x['Q18'], axis=1)
df_alt['diff (6)'] = df_alt.apply(lambda x: x['Q20'] - x['Q25'], axis=1)
df_alt['diff (7)'] = df_alt.apply(lambda x: x['Q21'] - x['Q22'], axis=1)
df_alt['diff (8)'] = df_alt.apply(lambda x: x['Q24'] - x['Q26'], axis=1)


In [13]:
# See how many records are have all the new columns greater than 1 or lower than -1 
# This will show us how many people have answered very differently to a duplicate question

# df_alt[((df_alt['diff (1)'] < -1) | (df_alt['diff (1)'] > 1))].count()
# df_alt[((df_alt['diff (2)'] < -1) | (df_alt['diff (2)'] > 1))].count()
# df_alt[((df_alt['diff (3)'] < -1) | (df_alt['diff (3)'] > 1))].count()
# df_alt[((df_alt['diff (4)'] < -1) | (df_alt['diff (4)'] > 1))].count()
# df_alt[((df_alt['diff (5)'] < -1) | (df_alt['diff (5)'] > 1))].count()
# df_alt[((df_alt['diff (6)'] < -1) | (df_alt['diff (6)'] > 1))].count()
# df_alt[((df_alt['diff (7)'] < -1) | (df_alt['diff (7)'] > 1))].count()
# df_alt[((df_alt['diff (8)'] < -1) | (df_alt['diff (8)'] > 1))].count()
df

Unnamed: 0,instr,class,no.taken,attendance,difficulty,Q1,Q2,Q3,Q4,Q5,Q6,Q7,Q8,Q9,Q10,Q11,Q12,Q13,Q14,Q15,Q16,Q17,Q18,Q19,Q20,Q21,Q22,Q23,Q24,Q25,Q26,Q27,Q28,diff (1),diff (2),diff (3),diff (4),diff (5),diff (6),diff (7),diff (8)
0,1,2,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,1,2,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,1,2,1,2,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,0
3,1,2,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,1,2,1,0,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,0,0,0,0,0,0,0,0
5,1,2,1,3,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0
6,1,2,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0
7,1,2,1,1,0,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,0,0,0,0,0,0,0
8,1,2,1,1,0,1,1,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0
9,1,2,1,4,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0


### Entries that have a different answer greater than 1 or lower than -1 to a potential duplicated question
- Q5 and Q7 - 263 (1)
- Q8 and Q10 - 251 (2)
- Q13 and Q14 - 102 (3)
- Q15 and Q19 - 250 (4)
- Q16 and Q18 - 191 (5)
- Q20 and Q25 - 233 (6) 
- Q21 and Q22 - 106 (7)
- Q24 and Q26 - 288 (8) 

### Therefore we can mark the 8 groups as duplicate questions, eliminating the one that has the lower rating
- We can do this by making the average between a group of questions and eliminating one

In [14]:
df_alt['Q5 - Q7'] = df_alt.apply(lambda x: int(round((x['Q5'] + x['Q7']) / 2)), axis=1)
df_alt['Q8 - Q10'] = df_alt.apply(lambda x: int(round((x['Q8'] + x['Q10']) / 2)), axis=1)
df_alt['Q13 - Q14'] = df_alt.apply(lambda x: int(round((x['Q13'] + x['Q14']) / 2)), axis=1)
df_alt['Q15 - Q19'] = df_alt.apply(lambda x: int(round((x['Q15'] + x['Q19']) / 2)), axis=1)
df_alt['Q16 - Q18'] = df_alt.apply(lambda x: int(round((x['Q16'] + x['Q18']) / 2)), axis=1)
df_alt['Q20 - Q25'] = df_alt.apply(lambda x: int(round((x['Q20'] + x['Q25']) / 2)), axis=1)
df_alt['Q21 - Q22'] = df_alt.apply(lambda x: int(round((x['Q21'] + x['Q22']) / 2)), axis=1)
df_alt['Q24 - Q26'] = df_alt.apply(lambda x: int(round((x['Q24'] + x['Q26']) / 2)), axis=1)

### Confirming that the questions are close in score

In [131]:
df_alt[['Q5', 'Q7', 'Q8', 'Q10', 'Q13', 'Q14', 'Q15', 'Q19', 'Q16', 'Q18', 'Q20', 'Q25', 'Q21', 'Q22', 'Q24', 'Q26']].sum()

Q5      616
Q7      386
Q8      244
Q10     528
Q13    1413
Q14    1693
Q15    1672
Q19    1523
Q16     987
Q18    1295
Q20    1661
Q25    1819
Q21    1789
Q22    1848
Q24     971
Q26    1293
dtype: int64

### Eliminating the unwanted columns
This step is needed so that we can further analyse the data

In [15]:
df.drop(['Q5', 'Q7', 'Q8', 'Q10', 'Q13', 'Q14', 'Q15', 'Q19', 'Q16', 'Q18', 'Q20', 'Q25', 'Q21', 'Q22', 'Q24', 'Q26', 'diff (1)', 'diff (2)', 'diff (3)', 'diff (4)', 'diff (5)', 'diff (6)', 'diff (7)', 'diff (8)'], axis=1, inplace=True)
df

Unnamed: 0,instr,class,no.taken,attendance,difficulty,Q1,Q2,Q3,Q4,Q6,Q9,Q11,Q12,Q17,Q23,Q27,Q28,Q5 - Q7,Q8 - Q10,Q13 - Q14,Q15 - Q19,Q16 - Q18,Q20 - Q25,Q21 - Q22,Q24 - Q26
0,1,2,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,1,2,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,1,2,1,2,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2
3,1,2,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,1,2,1,0,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2
5,1,2,1,3,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
6,1,2,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
7,1,2,1,1,0,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2
8,1,2,1,1,0,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1
9,1,2,1,4,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1


### Grouping the dataset by themes - Chosen question that represent that theme
- Course Satisfaction - Q3, Q5-Q7, Q9, Q11, Q8-Q10 (5)
- Preparation/Organization - Q1, Q2, Q4, Q15-Q19, Q24-Q26 (5)
- Competence/Ethics - Q6, Q28, Q17, Q13-Q14, Q16-Q18(Because he is competent and has ethics he is committed to the course. I think)
- Helpfulness/Commitment - Q12, Q27, Q23, Q21-Q22, Q20-Q25 (5)

In [22]:
df_cs = df_alt[['instr', 'class', 'no.taken', 'attendance', 'difficulty', 'Q3', 'Q5 - Q7', 'Q9', 'Q11', 'Q8 - Q10']]
df_po = df_alt[['instr', 'class', 'no.taken', 'attendance', 'difficulty', 'Q1', 'Q2', 'Q4', 'Q15 - Q19', 'Q24 - Q26']]
df_ce = df_alt[['instr', 'class', 'no.taken', 'attendance', 'difficulty', 'Q6', 'Q13 - Q14', 'Q16 - Q18', 'Q17', 'Q28']]
df_hc = df_alt[['instr', 'class', 'no.taken', 'attendance', 'difficulty', 'Q12', 'Q20 - Q25', 'Q21 - Q22', 'Q23', 'Q27']]

In [31]:
# this is interesting - have to figure out what it all means.
df_alt.groupby(['instr', 'class']).describe()

Unnamed: 0_level_0,Unnamed: 1_level_0,no.taken,no.taken,no.taken,no.taken,no.taken,no.taken,no.taken,no.taken,attendance,attendance,attendance,attendance,attendance,attendance,attendance,attendance,difficulty,difficulty,difficulty,difficulty,difficulty,difficulty,difficulty,difficulty,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q6,Q6,Q6,Q6,Q6,Q6,Q6,Q6,Q9,Q9,Q9,Q9,Q9,Q9,Q9,Q9,Q11,Q11,Q11,Q11,Q11,Q11,Q11,Q11,Q12,Q12,Q12,Q12,Q12,Q12,Q12,Q12,Q17,Q17,Q17,Q17,Q17,Q17,Q17,Q17,Q23,Q23,Q23,Q23,Q23,Q23,Q23,Q23,Q27,Q27,Q27,Q27,Q27,Q27,Q27,Q27,Q28,Q28,Q28,Q28,Q28,Q28,Q28,Q28,Q5 - Q7,Q5 - Q7,Q5 - Q7,Q5 - Q7,Q5 - Q7,Q5 - Q7,Q5 - Q7,Q5 - Q7,Q8 - Q10,Q8 - Q10,Q8 - Q10,Q8 - Q10,Q8 - Q10,Q8 - Q10,Q8 - Q10,Q8 - Q10,Q13 - Q14,Q13 - Q14,Q13 - Q14,Q13 - Q14,Q13 - Q14,Q13 - Q14,Q13 - Q14,Q13 - Q14,Q15 - Q19,Q15 - Q19,Q15 - Q19,Q15 - Q19,Q15 - Q19,Q15 - Q19,Q15 - Q19,Q15 - Q19,Q16 - Q18,Q16 - Q18,Q16 - Q18,Q16 - Q18,Q16 - Q18,Q16 - Q18,Q16 - Q18,Q16 - Q18,Q20 - Q25,Q20 - Q25,Q20 - Q25,Q20 - Q25,Q20 - Q25,Q20 - Q25,Q20 - Q25,Q20 - Q25,Q21 - Q22,Q21 - Q22,Q21 - Q22,Q21 - Q22,Q21 - Q22,Q21 - Q22,Q21 - Q22,Q21 - Q22,Q24 - Q26,Q24 - Q26,Q24 - Q26,Q24 - Q26,Q24 - Q26,Q24 - Q26,Q24 - Q26,Q24 - Q26
Unnamed: 0_level_1,Unnamed: 1_level_1,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max
instr,class,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2,Unnamed: 23_level_2,Unnamed: 24_level_2,Unnamed: 25_level_2,Unnamed: 26_level_2,Unnamed: 27_level_2,Unnamed: 28_level_2,Unnamed: 29_level_2,Unnamed: 30_level_2,Unnamed: 31_level_2,Unnamed: 32_level_2,Unnamed: 33_level_2,Unnamed: 34_level_2,Unnamed: 35_level_2,Unnamed: 36_level_2,Unnamed: 37_level_2,Unnamed: 38_level_2,Unnamed: 39_level_2,Unnamed: 40_level_2,Unnamed: 41_level_2,Unnamed: 42_level_2,Unnamed: 43_level_2,Unnamed: 44_level_2,Unnamed: 45_level_2,Unnamed: 46_level_2,Unnamed: 47_level_2,Unnamed: 48_level_2,Unnamed: 49_level_2,Unnamed: 50_level_2,Unnamed: 51_level_2,Unnamed: 52_level_2,Unnamed: 53_level_2,Unnamed: 54_level_2,Unnamed: 55_level_2,Unnamed: 56_level_2,Unnamed: 57_level_2,Unnamed: 58_level_2,Unnamed: 59_level_2,Unnamed: 60_level_2,Unnamed: 61_level_2,Unnamed: 62_level_2,Unnamed: 63_level_2,Unnamed: 64_level_2,Unnamed: 65_level_2,Unnamed: 66_level_2,Unnamed: 67_level_2,Unnamed: 68_level_2,Unnamed: 69_level_2,Unnamed: 70_level_2,Unnamed: 71_level_2,Unnamed: 72_level_2,Unnamed: 73_level_2,Unnamed: 74_level_2,Unnamed: 75_level_2,Unnamed: 76_level_2,Unnamed: 77_level_2,Unnamed: 78_level_2,Unnamed: 79_level_2,Unnamed: 80_level_2,Unnamed: 81_level_2,Unnamed: 82_level_2,Unnamed: 83_level_2,Unnamed: 84_level_2,Unnamed: 85_level_2,Unnamed: 86_level_2,Unnamed: 87_level_2,Unnamed: 88_level_2,Unnamed: 89_level_2,Unnamed: 90_level_2,Unnamed: 91_level_2,Unnamed: 92_level_2,Unnamed: 93_level_2,Unnamed: 94_level_2,Unnamed: 95_level_2,Unnamed: 96_level_2,Unnamed: 97_level_2,Unnamed: 98_level_2,Unnamed: 99_level_2,Unnamed: 100_level_2,Unnamed: 101_level_2,Unnamed: 102_level_2,Unnamed: 103_level_2,Unnamed: 104_level_2,Unnamed: 105_level_2,Unnamed: 106_level_2,Unnamed: 107_level_2,Unnamed: 108_level_2,Unnamed: 109_level_2,Unnamed: 110_level_2,Unnamed: 111_level_2,Unnamed: 112_level_2,Unnamed: 113_level_2,Unnamed: 114_level_2,Unnamed: 115_level_2,Unnamed: 116_level_2,Unnamed: 117_level_2,Unnamed: 118_level_2,Unnamed: 119_level_2,Unnamed: 120_level_2,Unnamed: 121_level_2,Unnamed: 122_level_2,Unnamed: 123_level_2,Unnamed: 124_level_2,Unnamed: 125_level_2,Unnamed: 126_level_2,Unnamed: 127_level_2,Unnamed: 128_level_2,Unnamed: 129_level_2,Unnamed: 130_level_2,Unnamed: 131_level_2,Unnamed: 132_level_2,Unnamed: 133_level_2,Unnamed: 134_level_2,Unnamed: 135_level_2,Unnamed: 136_level_2,Unnamed: 137_level_2,Unnamed: 138_level_2,Unnamed: 139_level_2,Unnamed: 140_level_2,Unnamed: 141_level_2,Unnamed: 142_level_2,Unnamed: 143_level_2,Unnamed: 144_level_2,Unnamed: 145_level_2,Unnamed: 146_level_2,Unnamed: 147_level_2,Unnamed: 148_level_2,Unnamed: 149_level_2,Unnamed: 150_level_2,Unnamed: 151_level_2,Unnamed: 152_level_2,Unnamed: 153_level_2,Unnamed: 154_level_2,Unnamed: 155_level_2,Unnamed: 156_level_2,Unnamed: 157_level_2,Unnamed: 158_level_2,Unnamed: 159_level_2,Unnamed: 160_level_2,Unnamed: 161_level_2,Unnamed: 162_level_2,Unnamed: 163_level_2,Unnamed: 164_level_2,Unnamed: 165_level_2,Unnamed: 166_level_2,Unnamed: 167_level_2,Unnamed: 168_level_2,Unnamed: 169_level_2,Unnamed: 170_level_2,Unnamed: 171_level_2,Unnamed: 172_level_2,Unnamed: 173_level_2,Unnamed: 174_level_2,Unnamed: 175_level_2,Unnamed: 176_level_2,Unnamed: 177_level_2,Unnamed: 178_level_2,Unnamed: 179_level_2,Unnamed: 180_level_2,Unnamed: 181_level_2,Unnamed: 182_level_2,Unnamed: 183_level_2,Unnamed: 184_level_2,Unnamed: 185_level_2
1,2,140.0,1.092857,0.357779,1.0,1.0,1.0,1.0,3.0,140.0,2.071429,1.284343,0.0,1.0,2.0,3.0,4.0,140.0,0.442857,1.081376,-2.0,0.0,1.0,1.0,2.0,140.0,0.421429,1.449723,-2.0,-1.0,1.0,2.0,2.0,140.0,0.492857,1.406544,-2.0,0.0,1.0,2.0,2.0,140.0,0.485714,1.396222,-2.0,0.0,1.0,2.0,2.0,140.0,0.457143,1.405904,-2.0,-0.25,1.0,2.0,2.0,140.0,0.542857,1.416102,-2.0,0.0,1.0,2.0,2.0,140.0,0.564286,1.420359,-2.0,0.0,1.0,2.0,2.0,140.0,0.585714,1.409044,-2.0,0.0,1.0,2.0,2.0,140.0,0.521429,1.446742,-2.0,-1.0,1.0,2.0,2.0,140.0,0.714286,1.369166,-2.0,0.0,1.0,2.0,2.0,140.0,0.6,1.39784,-2.0,0.0,1.0,2.0,2.0,140.0,0.585714,1.419219,-2.0,0.0,1.0,2.0,2.0,140.0,0.614286,1.41719,-2.0,0.0,1.0,2.0,2.0,140.0,0.585714,1.378069,-2.0,0.0,1.0,2.0,2.0,140.0,0.571429,1.420015,-2.0,0.0,1.0,2.0,2.0,140.0,0.678571,1.379541,-2.0,0.0,1.0,2.0,2.0,140.0,0.671429,1.375232,-2.0,0.0,1.0,2.0,2.0,140.0,0.628571,1.395485,-2.0,0.0,1.0,2.0,2.0,140.0,0.664286,1.381328,-2.0,0.0,1.0,2.0,2.0,140.0,0.635714,1.410193,-2.0,0.0,1.0,2.0,2.0,140.0,0.6,1.428386,-2.0,0.0,1.0,2.0,2.0
1,7,187.0,1.320856,0.682928,1.0,1.0,1.0,1.0,3.0,187.0,2.037433,1.434479,0.0,1.0,2.0,3.0,4.0,187.0,0.823529,1.050285,-2.0,0.0,1.0,2.0,2.0,187.0,-0.272727,1.293264,-2.0,-1.5,0.0,1.0,2.0,187.0,-0.096257,1.332528,-2.0,-1.0,0.0,1.0,2.0,187.0,0.117647,1.302214,-2.0,-1.0,0.0,1.0,2.0,187.0,-0.101604,1.322001,-2.0,-1.0,0.0,1.0,2.0,187.0,-0.187166,1.328855,-2.0,-2.0,0.0,1.0,2.0,187.0,0.0,1.336019,-2.0,-1.0,0.0,1.0,2.0,187.0,0.245989,1.35338,-2.0,0.0,0.0,1.0,2.0,187.0,-0.122995,1.34038,-2.0,-1.0,0.0,1.0,2.0,187.0,0.342246,1.371947,-2.0,0.0,1.0,1.0,2.0,187.0,0.122995,1.332334,-2.0,-1.0,0.0,1.0,2.0,187.0,-0.026738,1.369535,-2.0,-1.0,0.0,1.0,2.0,187.0,0.117647,1.346864,-2.0,-1.0,0.0,1.0,2.0,187.0,0.016043,1.329872,-2.0,-1.0,0.0,1.0,2.0,187.0,0.069519,1.278642,-2.0,-1.0,0.0,1.0,2.0,187.0,0.144385,1.322066,-2.0,0.0,0.0,1.0,2.0,187.0,0.171123,1.320891,-2.0,-0.5,0.0,1.0,2.0,187.0,0.074866,1.30539,-2.0,0.0,0.0,1.0,2.0,187.0,0.197861,1.32324,-2.0,0.0,0.0,1.0,2.0,187.0,0.101604,1.342181,-2.0,-1.0,0.0,1.0,2.0,187.0,0.139037,1.340809,-2.0,-0.5,0.0,1.0,2.0
1,10,448.0,1.071429,0.326716,1.0,1.0,1.0,1.0,3.0,448.0,1.959821,1.458813,0.0,1.0,2.0,3.0,4.0,448.0,-0.321429,1.190674,-2.0,-1.0,0.0,0.0,2.0,448.0,0.361607,1.279174,-2.0,0.0,1.0,1.0,2.0,448.0,0.421875,1.247158,-2.0,0.0,1.0,1.0,2.0,448.0,0.435268,1.260413,-2.0,0.0,1.0,1.0,2.0,448.0,0.412946,1.233939,-2.0,0.0,1.0,1.0,2.0,448.0,0.455357,1.247016,-2.0,0.0,1.0,1.0,2.0,448.0,0.435268,1.271018,-2.0,0.0,1.0,1.0,2.0,448.0,0.486607,1.253112,-2.0,0.0,1.0,1.0,2.0,448.0,0.388393,1.290492,-2.0,0.0,1.0,1.0,2.0,448.0,0.522321,1.252985,-2.0,0.0,1.0,1.0,2.0,448.0,0.424107,1.258012,-2.0,0.0,1.0,1.0,2.0,448.0,0.401786,1.260019,-2.0,0.0,1.0,1.0,2.0,448.0,0.459821,1.254323,-2.0,0.0,1.0,1.0,2.0,448.0,0.424107,1.223758,-2.0,0.0,1.0,1.0,2.0,448.0,0.430804,1.260176,-2.0,0.0,1.0,1.0,2.0,448.0,0.486607,1.258457,-2.0,0.0,1.0,1.0,2.0,448.0,0.457589,1.239898,-2.0,0.0,1.0,1.0,2.0,448.0,0.459821,1.256105,-2.0,0.0,1.0,1.0,2.0,448.0,0.426339,1.259922,-2.0,0.0,1.0,1.0,2.0,448.0,0.424107,1.259789,-2.0,0.0,1.0,1.0,2.0,448.0,0.4375,1.239046,-2.0,0.0,1.0,1.0,2.0
2,1,303.0,1.122112,0.383785,1.0,1.0,1.0,1.0,3.0,303.0,1.749175,1.512571,0.0,0.0,1.0,3.0,4.0,303.0,-0.511551,1.12425,-2.0,-2.0,0.0,0.0,2.0,303.0,0.171617,1.290664,-2.0,-1.0,0.0,1.0,2.0,303.0,0.363036,1.196111,-2.0,0.0,1.0,1.0,2.0,303.0,0.39934,1.171799,-2.0,0.0,1.0,1.0,2.0,303.0,0.330033,1.227441,-2.0,0.0,0.0,1.0,2.0,303.0,0.316832,1.209214,-2.0,0.0,0.0,1.0,2.0,303.0,0.445545,1.186083,-2.0,0.0,1.0,1.0,2.0,303.0,0.49835,1.190121,-2.0,0.0,1.0,1.0,2.0,303.0,0.389439,1.242256,-2.0,0.0,1.0,1.0,2.0,303.0,0.574257,1.179403,-2.0,0.0,1.0,1.0,2.0,303.0,0.481848,1.203816,-2.0,0.0,1.0,1.0,2.0,303.0,0.369637,1.250961,-2.0,0.0,1.0,1.0,2.0,303.0,0.574257,1.19613,-2.0,0.0,1.0,1.0,2.0,303.0,0.323432,1.176908,-2.0,0.0,0.0,1.0,2.0,303.0,0.316832,1.192671,-2.0,0.0,0.0,1.0,2.0,303.0,0.514851,1.201108,-2.0,0.0,1.0,1.0,2.0,303.0,0.511551,1.184488,-2.0,0.0,1.0,1.0,2.0,303.0,0.49505,1.195663,-2.0,0.0,1.0,1.0,2.0,303.0,0.488449,1.195618,-2.0,0.0,1.0,1.0,2.0,303.0,0.537954,1.192295,-2.0,0.0,1.0,1.0,2.0,303.0,0.465347,1.184036,-2.0,0.0,1.0,1.0,2.0
2,6,558.0,1.130824,0.3824,1.0,1.0,1.0,1.0,3.0,558.0,1.758065,1.409342,0.0,0.0,2.0,3.0,4.0,558.0,-0.397849,1.096491,-2.0,-1.0,0.0,0.0,2.0,558.0,0.098566,1.320722,-2.0,-1.0,0.0,1.0,2.0,558.0,0.290323,1.221418,-2.0,0.0,0.0,1.0,2.0,558.0,0.399642,1.201346,-2.0,0.0,1.0,1.0,2.0,558.0,0.275986,1.217392,-2.0,0.0,0.0,1.0,2.0,558.0,0.331541,1.248097,-2.0,0.0,0.0,1.0,2.0,558.0,0.333333,1.195443,-2.0,0.0,0.0,1.0,2.0,558.0,0.390681,1.218375,-2.0,0.0,1.0,1.0,2.0,558.0,0.284946,1.235104,-2.0,0.0,0.0,1.0,2.0,558.0,0.537634,1.207925,-2.0,0.0,1.0,1.0,2.0,558.0,0.4319,1.203609,-2.0,0.0,1.0,1.0,2.0,558.0,0.336918,1.243049,-2.0,0.0,1.0,1.0,2.0,558.0,0.483871,1.215811,-2.0,0.0,1.0,1.0,2.0,558.0,0.311828,1.204228,-2.0,0.0,0.0,1.0,2.0,558.0,0.272401,1.203373,-2.0,0.0,0.0,1.0,2.0,558.0,0.523297,1.193337,-2.0,0.0,1.0,1.0,2.0,558.0,0.501792,1.207025,-2.0,0.0,1.0,1.0,2.0,558.0,0.507168,1.209976,-2.0,0.0,1.0,1.0,2.0,558.0,0.494624,1.192047,-2.0,0.0,1.0,1.0,2.0,558.0,0.510753,1.199518,-2.0,0.0,1.0,1.0,2.0,558.0,0.417563,1.181628,-2.0,0.0,1.0,1.0,2.0
2,11,484.0,1.130165,0.393528,1.0,1.0,1.0,1.0,3.0,484.0,1.876033,1.437851,0.0,1.0,2.0,3.0,4.0,484.0,0.018595,1.199407,-2.0,-1.0,0.0,1.0,2.0,484.0,0.14876,1.284797,-2.0,-1.0,0.0,1.0,2.0,484.0,0.278926,1.21528,-2.0,0.0,0.0,1.0,2.0,484.0,0.35124,1.183294,-2.0,0.0,1.0,1.0,2.0,484.0,0.247934,1.221146,-2.0,0.0,0.0,1.0,2.0,484.0,0.115702,1.252344,-2.0,-1.0,0.0,1.0,2.0,484.0,0.384298,1.185245,-2.0,0.0,1.0,1.0,2.0,484.0,0.338843,1.192132,-2.0,0.0,0.0,1.0,2.0,484.0,0.165289,1.219898,-2.0,-1.0,0.0,1.0,2.0,484.0,0.599174,1.142298,-2.0,0.0,1.0,1.0,2.0,484.0,0.442149,1.14875,-2.0,0.0,1.0,1.0,2.0,484.0,0.396694,1.186392,-2.0,0.0,1.0,1.0,2.0,484.0,0.477273,1.178438,-2.0,0.0,1.0,1.0,2.0,484.0,0.214876,1.187977,-2.0,0.0,0.0,1.0,2.0,484.0,0.229339,1.166779,-2.0,0.0,0.0,1.0,2.0,484.0,0.413223,1.177212,-2.0,0.0,1.0,1.0,2.0,484.0,0.456612,1.13671,-2.0,0.0,1.0,1.0,2.0,484.0,0.411157,1.173535,-2.0,0.0,1.0,1.0,2.0,484.0,0.471074,1.157025,-2.0,0.0,1.0,1.0,2.0,484.0,0.53719,1.140566,-2.0,0.0,1.0,1.0,2.0,484.0,0.396694,1.134655,-2.0,0.0,0.0,1.0,2.0
2,13,99.0,1.070707,0.327406,1.0,1.0,1.0,1.0,3.0,99.0,1.626263,1.562219,0.0,0.0,2.0,3.0,4.0,99.0,-0.959596,1.133166,-2.0,-2.0,-1.0,0.0,2.0,99.0,-0.060606,1.361408,-2.0,-1.0,0.0,1.0,2.0,99.0,0.171717,1.293945,-2.0,-1.0,0.0,1.0,2.0,99.0,0.222222,1.266165,-2.0,0.0,0.0,1.0,2.0,99.0,0.212121,1.295776,-2.0,-0.5,0.0,1.0,2.0,99.0,0.212121,1.326902,-2.0,-1.0,0.0,1.0,2.0,99.0,0.353535,1.272256,-2.0,0.0,1.0,1.0,2.0,99.0,0.272727,1.26828,-2.0,0.0,0.0,1.0,2.0,99.0,-0.050505,1.264699,-2.0,-1.0,0.0,1.0,2.0,99.0,0.606061,1.260209,-2.0,0.0,1.0,2.0,2.0,99.0,0.424242,1.317782,-2.0,0.0,0.0,2.0,2.0,99.0,0.393939,1.353892,-2.0,-0.5,1.0,2.0,2.0,99.0,0.565657,1.318329,-2.0,0.0,1.0,2.0,2.0,99.0,0.191919,1.259063,-2.0,0.0,0.0,1.0,2.0,99.0,0.151515,1.248376,-2.0,0.0,0.0,1.0,2.0,99.0,0.494949,1.240258,-2.0,0.0,1.0,1.0,2.0,99.0,0.585859,1.261762,-2.0,0.0,1.0,2.0,2.0,99.0,0.585859,1.277834,-2.0,0.0,1.0,2.0,2.0,99.0,0.515152,1.296492,-2.0,0.0,0.0,2.0,2.0,99.0,0.656566,1.326047,-2.0,0.0,1.0,2.0,2.0,99.0,0.40404,1.269092,-2.0,0.0,0.0,2.0,2.0
3,3,904.0,1.182522,0.485543,1.0,1.0,1.0,1.0,3.0,904.0,1.475664,1.457962,0.0,0.0,1.0,3.0,4.0,904.0,-0.196903,1.546958,-2.0,-2.0,0.0,1.0,2.0,904.0,-0.17146,1.309487,-2.0,-1.0,0.0,1.0,2.0,904.0,-0.088496,1.271948,-2.0,-1.0,0.0,1.0,2.0,904.0,-0.005531,1.256202,-2.0,-1.0,0.0,1.0,2.0,904.0,-0.066372,1.254016,-2.0,-1.0,0.0,1.0,2.0,904.0,-0.059735,1.241928,-2.0,-1.0,0.0,1.0,2.0,904.0,-0.057522,1.256218,-2.0,-1.0,0.0,1.0,2.0,904.0,-0.089602,1.270564,-2.0,-1.0,0.0,1.0,2.0,904.0,-0.106195,1.280143,-2.0,-1.0,0.0,1.0,2.0,904.0,0.147124,1.264312,-2.0,-1.0,0.0,1.0,2.0,904.0,0.002212,1.266309,-2.0,-1.0,0.0,1.0,2.0,904.0,-0.026549,1.272141,-2.0,-1.0,0.0,1.0,2.0,904.0,0.115044,1.288004,-2.0,-1.0,0.0,1.0,2.0,904.0,-0.076327,1.245916,-2.0,-1.0,0.0,1.0,2.0,904.0,-0.089602,1.248584,-2.0,-1.0,0.0,1.0,2.0,904.0,0.068584,1.251244,-2.0,-1.0,0.0,1.0,2.0,904.0,0.048673,1.23526,-2.0,-1.0,0.0,1.0,2.0,904.0,-0.033186,1.268497,-2.0,-1.0,0.0,1.0,2.0,904.0,0.087389,1.254933,-2.0,-1.0,0.0,1.0,2.0,904.0,0.076327,1.258298,-2.0,-1.0,0.0,1.0,2.0,904.0,0.009956,1.248215,-2.0,-1.0,0.0,1.0,2.0
3,4,187.0,1.245989,0.571192,1.0,1.0,1.0,1.0,3.0,187.0,1.433155,1.429178,0.0,0.0,1.0,3.0,4.0,187.0,-0.352941,1.434419,-2.0,-2.0,0.0,1.0,2.0,187.0,-0.566845,1.195679,-2.0,-2.0,-1.0,0.0,2.0,187.0,-0.368984,1.091364,-2.0,-1.0,0.0,0.0,2.0,187.0,-0.112299,1.048916,-2.0,-1.0,0.0,1.0,2.0,187.0,-0.368984,1.120532,-2.0,-1.0,0.0,0.0,2.0,187.0,-0.219251,1.15006,-2.0,-1.0,0.0,1.0,2.0,187.0,0.090909,1.045869,-2.0,0.0,0.0,1.0,2.0,187.0,-0.058824,1.108018,-2.0,-1.0,0.0,1.0,2.0,187.0,-0.262032,1.155074,-2.0,-1.0,0.0,1.0,2.0,187.0,0.411765,1.110299,-2.0,0.0,1.0,1.0,2.0,187.0,-0.080214,1.077125,-2.0,-1.0,0.0,1.0,2.0,187.0,-0.080214,1.14956,-2.0,-1.0,0.0,1.0,2.0,187.0,0.187166,1.068738,-2.0,0.0,0.0,1.0,2.0,187.0,-0.28877,1.108148,-2.0,-1.0,0.0,0.0,2.0,187.0,-0.294118,1.142586,-2.0,-1.0,0.0,0.5,2.0,187.0,0.128342,1.028936,-2.0,0.0,0.0,1.0,2.0,187.0,0.026738,1.039195,-2.0,0.0,0.0,1.0,2.0,187.0,-0.064171,1.100415,-2.0,-0.5,0.0,1.0,2.0,187.0,0.117647,1.045759,-2.0,0.0,0.0,1.0,2.0,187.0,0.069519,1.037201,-2.0,0.0,0.0,1.0,2.0,187.0,-0.016043,1.023781,-2.0,0.0,0.0,1.0,2.0
3,5,656.0,1.094512,0.37097,1.0,1.0,1.0,1.0,3.0,656.0,1.742378,1.46275,0.0,0.0,2.0,3.0,4.0,656.0,-0.099085,1.456001,-2.0,-2.0,0.0,1.0,2.0,656.0,0.030488,1.292013,-2.0,-1.0,0.0,1.0,2.0,656.0,0.146341,1.231862,-2.0,-1.0,0.0,1.0,2.0,656.0,0.120427,1.224113,-2.0,-1.0,0.0,1.0,2.0,656.0,0.179878,1.213647,-2.0,-1.0,0.0,1.0,2.0,656.0,0.221037,1.201113,-2.0,-1.0,0.0,1.0,2.0,656.0,0.120427,1.253688,-2.0,-1.0,0.0,1.0,2.0,656.0,0.04878,1.2879,-2.0,-1.0,0.0,1.0,2.0,656.0,-0.01372,1.296429,-2.0,-1.0,0.0,1.0,2.0,656.0,0.414634,1.203823,-2.0,0.0,1.0,1.0,2.0,656.0,0.182927,1.209409,-2.0,-1.0,0.0,1.0,2.0,656.0,0.13872,1.239537,-2.0,-1.0,0.0,1.0,2.0,656.0,0.315549,1.224235,-2.0,0.0,0.0,1.0,2.0,656.0,0.146341,1.192823,-2.0,-1.0,0.0,1.0,2.0,656.0,0.07622,1.210759,-2.0,-1.0,0.0,1.0,2.0,656.0,0.315549,1.188804,-2.0,0.0,0.0,1.0,2.0,656.0,0.297256,1.181948,-2.0,0.0,0.0,1.0,2.0,656.0,0.20122,1.216577,-2.0,0.0,0.0,1.0,2.0,656.0,0.329268,1.17407,-2.0,0.0,0.0,1.0,2.0,656.0,0.3125,1.19856,-2.0,0.0,0.0,1.0,2.0,656.0,0.217988,1.164243,-2.0,0.0,0.0,1.0,2.0


# Work in progress. At least the data is cleaned and ready for analysis/plots

### Important observation

It seems that there are a lot of entries that have the same score in all the question

This might mean that the students have untruthfully graded the questions due to laziness or other factors

Though the factors are not important, we want to assess what is the percentage of entries that have the difference between the entry itself and the mode of the scores of the question 0. At a later date we will determine whether such entries should be treated differently or not.

In [16]:
# First step is to create a dataset that holds only the questions
# qdf represents questions dataframe
qdf = df[df.columns.difference(['instr', 'class', 'no.taken', 'attendance', 'difficulty'])]
qdf

Unnamed: 0,Q1,Q11,Q12,Q13 - Q14,Q15 - Q19,Q16 - Q18,Q17,Q2,Q20 - Q25,Q21 - Q22,Q23,Q24 - Q26,Q27,Q28,Q3,Q4,Q5 - Q7,Q6,Q8 - Q10,Q9
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2
3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2
5,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
6,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
7,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2
8,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
9,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1


In [17]:
# Second step is to calculate the median of the questions per row
qdf['median'] = qdf.median(1)
qdf

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


Unnamed: 0,Q1,Q11,Q12,Q13 - Q14,Q15 - Q19,Q16 - Q18,Q17,Q2,Q20 - Q25,Q21 - Q22,Q23,Q24 - Q26,Q27,Q28,Q3,Q4,Q5 - Q7,Q6,Q8 - Q10,Q9,median
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2.0
3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0
4,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2.0
5,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1.0
6,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1.0
7,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2.0
8,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1.0
9,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1.0


In [18]:
# Third step is to make the difference between each value of the questions and the median
qdf = qdf.apply(lambda x: x - x['median'], axis=1)
qdf

Unnamed: 0,Q1,Q11,Q12,Q13 - Q14,Q15 - Q19,Q16 - Q18,Q17,Q2,Q20 - Q25,Q21 - Q22,Q23,Q24 - Q26,Q27,Q28,Q3,Q4,Q5 - Q7,Q6,Q8 - Q10,Q9,median
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [20]:
df_alt

Unnamed: 0,instr,class,no.taken,attendance,difficulty,Q1,Q2,Q3,Q4,Q6,Q9,Q11,Q12,Q17,Q23,Q27,Q28,Q5 - Q7,Q8 - Q10,Q13 - Q14,Q15 - Q19,Q16 - Q18,Q20 - Q25,Q21 - Q22,Q24 - Q26
0,1,2,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,1,2,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,1,2,1,2,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2
3,1,2,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,1,2,1,0,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2
5,1,2,1,3,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
6,1,2,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
7,1,2,1,1,0,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2
8,1,2,1,1,0,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1
9,1,2,1,4,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1


In [75]:
100 *2985/5819 

51.297473792747894

In [None]:
## 51% of the values are all equal. What do we do now...