In [208]:
import pandas as pd
import numpy as np
import scipy.stats as stats
import plotly.express as px
import plotly.graph_objects as go

In [209]:
results_df = pd.read_excel("Clean Test Results.xlsx")
responses_df = pd.read_excel("Student Responses.xlsx")
answers_df = pd.read_excel("Test Answers.xlsx")

In [210]:
validity_df = pd.DataFrame()

results_df.drop(index=0,inplace=True)
results_df.reset_index(drop=True,inplace=True)

question_cols = results_df.columns[1:len(results_df.columns)-1]
validity_df['Question'] = pd.Series(question_cols)
item_difficulty_index = [results_df[col].sum() / len(results_df) for col in question_cols]
validity_df['item_difficulty_index'] = pd.Series(item_difficulty_index)

validity_df.head()

Unnamed: 0,Question,item_difficulty_index
0,Q1,0.75
1,Q2,0.666667
2,Q3,0.666667
3,Q4,0.916667
4,Q5,0.791667


In [211]:
results_df['scores_percent'] = results_df['Scores'] / (len(results_df.columns) -2)

point_biserial_corr = [stats.pointbiserialr(results_df[col],results_df['scores_percent'])[0] for col in question_cols]
point_biserial_pvalues = [stats.pointbiserialr(results_df[col],results_df['scores_percent'])[1] for col in question_cols]

validity_df['point_biserial_corr'] = pd.Series(point_biserial_corr)
validity_df['point_biserial_pvalues'] = pd.Series(point_biserial_pvalues)

validity_df.head()

Unnamed: 0,Question,item_difficulty_index,point_biserial_corr,point_biserial_pvalues
0,Q1,0.75,-0.008673,0.967915
1,Q2,0.666667,0.191208,0.370778
2,Q3,0.666667,0.541757,0.00625
3,Q4,0.916667,0.122297,0.569158
4,Q5,0.791667,0.161837,0.449942


In [212]:
#changed question_cols into a list of the questions as they are represented in the response_df
question_cols = responses_df.columns[1:len(responses_df)]
for question in question_cols:
    responses_df[question] = responses_df[question].apply(lambda x: x[0])

answers = ['a','b','c','d']

for answer in answers:
    validity_df[answer] = pd.Series([len(responses_df[responses_df[question] == answer]) / len(responses_df) for question in question_cols])

validity_df.head()

Unnamed: 0,Question,item_difficulty_index,point_biserial_corr,point_biserial_pvalues,a,b,c,d
0,Q1,0.75,-0.008673,0.967915,0.041667,0.208333,0.041667,0.708333
1,Q2,0.666667,0.191208,0.370778,0.666667,0.166667,0.083333,0.083333
2,Q3,0.666667,0.541757,0.00625,0.208333,0.5,0.291667,0.0
3,Q4,0.916667,0.122297,0.569158,0.708333,0.208333,0.041667,0.041667
4,Q5,0.791667,0.161837,0.449942,0.0,0.041667,0.083333,0.875


In [214]:
fig = px.bar(validity_df, x='Question', y='item_difficulty_index')
fig.update_layout(title = {'text' : '9th Grade Algebra Pre-Test Item Difficulty Index','xanchor' : 'center', 'yanchor': 'top',  'y':.95, 'x':0.5,},
                  paper_bgcolor = 'LightSteelBlue')
fig.show()

In [215]:
fig = px.bar(validity_df, x='Question', y='point_biserial_corr',hover_data={'point_biserial_pvalues':True})
fig.update_layout(title = {'text' : '9th Grade Algebra Pre-Test Point Biserial Correlation','xanchor' : 'center', 'yanchor': 'top',  'y':.95, 'x':0.5,},
                  paper_bgcolor = 'LightSteelBlue')
fig.show()

In [216]:
fig = px.scatter(results_df, x='Q14', y='scores_percent',width=600,height=500,trendline='ols')
fig.update_layout(title = {'text' : 'Point Biserial Correlation Visualized for Question 14','xanchor' : 'center', 'yanchor': 'top',  'y':.95, 'x':0.5,},
                  paper_bgcolor = 'LightSteelBlue')
fig.show()

In [220]:
validity_df.dropna(subset=['a','b','c','d'], inplace = True)

fig = go.Figure(data=[go.Bar(
    name = 'A',
    x = validity_df['Question'],
    y = validity_df['a'].values
   ),
    go.Bar(
    name = 'B',
    x = validity_df['Question'],
    y = validity_df['b'].values
   ),
   go.Bar(
    name = 'C',
    x = validity_df['Question'],
    y = validity_df['c'].values
   ),
   go.Bar(
    name = 'D',
    x = validity_df['Question'],
    y = validity_df['d'].values
   )
])

fig.update_layout(barmode='stack', title = {'text' : '9th Grade Algebra Pre-Test Distractor Effectiveness','xanchor' : 'center', 'yanchor': 'top',  'y':.95, 'x':0.5,},
                  paper_bgcolor = 'LightSteelBlue',
                  yaxis_title = 'Percentage of Students',
                  xaxis_title = 'Question')
fig.show()