# Data Preprocessing

In [1]:
import pandas as pd
from statsmodels.stats.anova import anova_lm
from itertools import combinations
import numpy as np

import statsmodels.api as sm
import statsmodels.stats.api as sms
from statsmodels.formula.api import ols
from scipy.stats import jarque_bera

from sklearn.linear_model import LinearRegression

#import durbin watson test
from statsmodels.stats.stattools import durbin_watson

#import kstest
from scipy.stats import kstest
from sklearn.preprocessing import PowerTransformer
import scipy.stats as stats


In [2]:
df = pd.read_csv('../data/DEID_CSE_8A_Survey_and_Grade_Data_recode.csv')

In [3]:
#df = df.drop(df.filter(regex='mid_').columns, axis=1)

#drop all columns starting with 'Neg_' or 'neg_'
df = df.drop(df.filter(regex='Neg_').columns, axis=1)

#get df of DEID, 'Final_copilotWorkspace/baseballAnalysis', 'Final_copilotWorkspace/spellCheck_copy1'
df_copilot = df[['DEID', 'Final_copilotWorkspace/baseballAnalysis', 'Final_copilotWorkspace/spellCheck_copy1']]

#TODO make a function to rename and add columns rather than copying them all when renaming. 

df_copilot

mastery_questions = [ 
    'My aim is to completely master the material presented in this class.',
    'I am striving to understand the content of this course as thoroughly as possible.',
    'My goal is to learn as much as possible.'
]

appearance_questions = [
    'One of my goals is to have other students in my class think I am good at my class work.',
    'One of my goals is to show others that I’m good at my class work.',
    'One of my goals is to show others that class work is easy for me.',
    'One of my goals is to look smart in comparison to other students in my class.',
    'I aim to look smart compared to others in my class.'
]

normative_questions = [
    'My aim is to perform well relative to other students.',
    'I am striving to do well compared to other students.',
    'My goal is to perform better than the other students.'
]

mid_interest_questions = [
    'mid_Indicate the extent to which each statement is true of you from 1 (not at all) to 7 (very): [I think what we are learning in this class is interesting.]',
    'mid_Indicate the extent to which each statement is true of you from 1 (not at all) to 7 (very): [I think I will be able to use what I learn in this course in other courses.]',
    'mid_Indicate the extent to which each statement is true of you from 1 (not at all) to 7 (very): [I would recommend this class to others.]',
    'mid_Indicate the extent to which each statement is true of you from 1 (not at all) to 7 (very): [I am enjoying this computer science class very much.]',
    'mid_Indicate the extent to which each statement is true of you from 1 (not at all) to 7 (very): [I think the field of computer science is very interesting.]',
    'mid_Indicate the extent to which each statement is true of you from 1 (not at all) to 7 (very): [This class has been a waste of my time.]',
    'mid_Indicate the extent to which each statement is true of you from 1 (not at all) to 7 (very): [I’m glad I took this class.]',
    'mid_Indicate the extent to which each statement is true of you from 1 (not at all) to 7 (very): [I think the course material in this class is useful for me to learn.]',
    'mid_Indicate the extent to which each statement is true of you from 1 (not at all) to 7 (very): [I would like to take more computer science classes after this one.]',
    'mid_Indicate the extent to which each statement is true of you from 1 (not at all) to 7 (very): [I am more likely to register for another computer science class because of my experience in this course.]'
]

end_interest_questions = [
    'end_Indicate the extent to which each statement is true of you from 1 (not at all) to 7 (very): [I think what we are learning in this class is interesting.]',
    'end_Indicate the extent to which each statement is true of you from 1 (not at all) to 7 (very): [I think I will be able to use what I learn in this course in other courses.]',
    'end_Indicate the extent to which each statement is true of you from 1 (not at all) to 7 (very): [I would recommend this class to others.]',
    'end_Indicate the extent to which each statement is true of you from 1 (not at all) to 7 (very): [I am enjoying this computer science class very much.]',
    'end_Indicate the extent to which each statement is true of you from 1 (not at all) to 7 (very): [I think the field of computer science is very interesting.]',
    'end_Indicate the extent to which each statement is true of you from 1 (not at all) to 7 (very): [This class has been a waste of my time.]',
    'end_Indicate the extent to which each statement is true of you from 1 (not at all) to 7 (very): [I’m glad I took this class.]',
    'end_Indicate the extent to which each statement is true of you from 1 (not at all) to 7 (very): [I think the course material in this class is useful for me to learn.]',
    'end_Indicate the extent to which each statement is true of you from 1 (not at all) to 7 (very): [I would like to take more computer science classes after this one.]',
    'end_Indicate the extent to which each statement is true of you from 1 (not at all) to 7 (very): [I am more likely to register for another computer science class because of my experience in this course.]'
]

copilot_read_output = [
    #'mid_Please select the approximate percentage of time that you do the following.\nWhen I get a response from Copilot, how often do you: [Read the code]',
    'end_Please select the approximate percentage of time that you do the following.\nWhen I get a response from Copilot, how often do you: [Read the code]',
]

copilot_test_output = [
    #'mid_Please select the approximate percentage of time that you do the following.\nWhen I get a response from Copilot, how often do you: [Test the code]',        
    'end_Please select the approximate percentage of time that you do the following.\nWhen I get a response from Copilot, how often do you: [Test the code]',
]

copilot_helped_hindered = [
    #'mid_Please select the statement that best described your experience working with Copilot this term.',
    'end_Please select the statement that best described your experience working with Copilot this term.'
]
misc_percentage_and_confidence = [
    'end_Please select the approximate percentage of time that you do the following.\nWhen I get a response from Copilot, how often do you: [Feel you can recognize and understand the code Copilot gives you]', #done
    'end_How confident or unconfident are you that you are learning how to write programs yourself, when using GenAI tools?', #done
    'end_How confident or unconfident are you that you can: [Recognize and understand the code Copilot generates.]', #done
    'end_How confident or unconfident are you that you can: [Identify the types of coding problems that I should be able to complete without copilot]', #done 
    'end_How confident or unconfident are you that you can: [Do the tasks in CSE8A without Copilot.]', #done
    'end_How confident or unconfident are you that you have a fundamental understanding of programming concepts?'] #done



genai_helps_program = ['end_How helpful or unhelpful are GenAI tools in learning how to program?']
genai_helps_understand = ['end_How helpful or unhelpful are GenAI tools in learning problem solving skills?']

end_progintelligencequestions = ['end_Rate how much you agree with each of the following statements from "Strongly disagree" to "Strongly agree". [You can learn new things, but you can’t really change your programming intelligence.]', 
                             'end_Rate how much you agree with each of the following statements from "Strongly disagree" to "Strongly agree". [To be honest, you can’t really change your programming intelligence.]', 
                             'end_Rate how much you agree with each of the following statements from "Strongly disagree" to "Strongly agree". [People have a certain amount of ability to learn programming, and they really can’t do much to change it.]',
                             'end_Rate how much you agree with each of the following statements from "Strongly disagree" to "Strongly agree". [You have a certain amount of programming intelligence, and you can’t really do much to change it.]', 
                             'end_Rate how much you agree with each of the following statements from "Strongly disagree" to "Strongly agree". [Even if the work is hard in CSE 8A, I can learn it.]',
                             'end_Rate how much you agree with each of the following statements from "Strongly disagree" to "Strongly agree". [I can do almost all the work in CSE 8A if I don\'t give up.]',
                             'end_Rate how much you agree with each of the following statements from "Strongly disagree" to "Strongly agree". [I\'m certain I can master the skills taught in my CSE 8A class this term.]',
                             'end_Rate how much you agree with each of the following statements from "Strongly disagree" to "Strongly agree". [Not everyone is well-suited for programming.]',
                             'end_Rate how much you agree with each of the following statements from "Strongly disagree" to "Strongly agree". [Programming comes naturally to some people but not others.]',
                             'end_Rate how much you agree with each of the following statements from "Strongly disagree" to "Strongly agree". [I can do even the hardest work in CSE 8A if I try.]',
                             'end_Rate how much you agree with each of the following statements from "Strongly disagree" to "Strongly agree". [I\'m certain I can figure out how to do the most difficult work in my CSE 8A class.]',
                             'end_Rate how much you agree with each of the following statements from "Strongly disagree" to "Strongly agree". [Anyone has the ability to learn programming and be good at it.]',
                             ]

pre_progintelligencequestions = ['pre_Rate how much you agree with each of the following statements from "Strongly disagree" to "Strongly agree". [You can learn new things, but you can’t really change your programming intelligence.]',
                                'pre_Rate how much you agree with each of the following statements from "Strongly disagree" to "Strongly agree". [To be honest, you can’t really change your programming intelligence.]',
                                'pre_Rate how much you agree with each of the following statements from "Strongly disagree" to "Strongly agree". [People have a certain amount of ability to learn programming, and they really can’t do much to change it.]',
                                'pre_Rate how much you agree with each of the following statements from "Strongly disagree" to "Strongly agree". [You have a certain amount of programming intelligence, and you can’t really do much to change it.]',
                                'pre_Rate how much you agree with each of the following statements from "Strongly disagree" to "Strongly agree". [Even if the work is hard in CSE 8A, I can learn it.]',
                                'pre_Rate how much you agree with each of the following statements from "Strongly disagree" to "Strongly agree". [I can do almost all the work in CSE 8A if I don\'t give up.]',
                                'pre_Rate how much you agree with each of the following statements from "Strongly disagree" to "Strongly agree". [I\'m certain I can master the skills taught in my CSE 8A class this term.]',
                                'pre_Rate how much you agree with each of the following statements from "Strongly disagree" to "Strongly agree". [Not everyone is well-suited for programming.]',
                                'pre_Rate how much you agree with each of the following statements from "Strongly disagree" to "Strongly agree". [Programming comes naturally to some people but not others.]',
                                'pre_Rate how much you agree with each of the following statements from "Strongly disagree" to "Strongly agree". [I can do even the hardest work in CSE 8A if I try.]',
                                'pre_Rate how much you agree with each of the following statements from "Strongly disagree" to "Strongly agree". [I\'m certain I can figure out how to do the most difficult work in my CSE 8A class.]',
                                'pre_Rate how much you agree with each of the following statements from "Strongly disagree" to "Strongly agree". [Anyone has the ability to learn programming and be good at it.]',
                                ]

mid_progintelligencequestions = ['mid_Rate how much you agree with each of the following statements from "Strongly disagree" to "Strongly agree". [You can learn new things, but you can’t really change your programming intelligence.]',
                                'mid_Rate how much you agree with each of the following statements from "Strongly disagree" to "Strongly agree". [To be honest, you can’t really change your programming intelligence.]',
                                'mid_Rate how much you agree with each of the following statements from "Strongly disagree" to "Strongly agree". [People have a certain amount of ability to learn programming, and they really can’t do much to change it.]',
                                'mid_Rate how much you agree with each of the following statements from "Strongly disagree" to "Strongly agree". [You have a certain amount of programming intelligence, and you can’t really do much to change it.]',
                                'mid_Rate how much you agree with each of the following statements from "Strongly disagree" to "Strongly agree". [Even if the work is hard in CSE 8A, I can learn it.]',
                                'mid_Rate how much you agree with each of the following statements from "Strongly disagree" to "Strongly agree". [I can do almost all the work in CSE 8A if I don\'t give up.]',
                                'mid_Rate how much you agree with each of the following statements from "Strongly disagree" to "Strongly agree". [I\'m certain I can master the skills taught in my CSE 8A class this term.]',
                                'mid_Rate how much you agree with each of the following statements from "Strongly disagree" to "Strongly agree". [Not everyone is well-suited for programming.]',
                                'mid_Rate how much you agree with each of the following statements from "Strongly disagree" to "Strongly agree". [Programming comes naturally to some people but not others.]',
                                'mid_Rate how much you agree with each of the following statements from "Strongly disagree" to "Strongly agree". [I can do even the hardest work in CSE 8A if I try.]',
                                'mid_Rate how much you agree with each of the following statements from "Strongly disagree" to "Strongly agree". [I\'m certain I can figure out how to do the most difficult work in my CSE 8A class.]',
                                'mid_Rate how much you agree with each of the following statements from "Strongly disagree" to "Strongly agree". [Anyone has the ability to learn programming and be good at it.]',
        ]

end_belonging_questions = ['end_Rate how much you agree with each of the following statements from "Strongly disagree" to "Strongly agree". [I feel like I belong in computing.]',
                           'end_Rate how much you agree with each of the following statements from "Strongly disagree" to "Strongly agree". [I see myself as a computing person.]',
                           'end_Rate how much you agree with each of the following statements from "Strongly disagree" to "Strongly agree". [I feel like an outsider in the computing community.]',
                           'end_Rate how much you agree with each of the following statements from "Strongly disagree" to "Strongly agree". [I do not have much in common with the other students in my computing classes.]',
                           'end_Rate how much you agree with each of the following statements from "Strongly disagree" to "Strongly agree". [I feel welcomed in computing.]']

mid_belonging_questions = ['mid_Rate how much you agree with each of the following statements from "Strongly disagree" to "Strongly agree". [I feel like I belong in computing.]',
                            'mid_Rate how much you agree with each of the following statements from "Strongly disagree" to "Strongly agree". [I see myself as a computing person.]',
                            'mid_Rate how much you agree with each of the following statements from "Strongly disagree" to "Strongly agree". [I feel like an outsider in the computing community.]',
                            'mid_Rate how much you agree with each of the following statements from "Strongly disagree" to "Strongly agree". [I do not have much in common with the other students in my computing classes.]',
                            'mid_Rate how much you agree with each of the following statements from "Strongly disagree" to "Strongly agree". [I feel welcomed in computing.]']

can_do_this_task_questions = [
    'Create a variable called “num” and assign the value 5 to it', 
    'Write a statement that prints “hello” if a variable “val” has a value greater than 7.',
    'Print the third letter in a string.',
    'Print the sum of all of the elements in a list that has 3 elements.',
    'Print every number from 1 to 100.',
    'Open and print the first line in a file.',
    'For a dictionary d = {“a”: 3, “b”: 4, “c”: 5}, update the value of b to 6.',
    'Imagine you’re given code that flipped an image left to right, change it to flip the image top to bottom.'
    ]


copilot_reading_and_testing_questions = copilot_read_output + copilot_test_output
columns = [
    col 
    for col in df.columns 
    if any(q in col for q in mastery_questions + normative_questions + appearance_questions)
]


columns += end_interest_questions

columns += mid_interest_questions

columns += [
    col 
    for col in df.columns 
    if any(q in col for q in copilot_read_output + copilot_test_output)
]

columns += [
    col 
    for col in df.columns 
    if any(q in col for q in copilot_helped_hindered)
]

columns += [
    col
    for col in df.columns
    if any(q in col for q in end_progintelligencequestions)]

columns += [
    col
    for col in df.columns
    if any(q in col for q in pre_progintelligencequestions)]

columns += [
    col
    for col in df.columns
    if any(q in col for q in mid_progintelligencequestions)]

columns += [
    col
    for col in df.columns
    if any(q in col for q in genai_helps_program)
]

columns += [
    col
    for col in df.columns
    if any(q in col for q in genai_helps_understand)
]

columns += [
    col
    for col in df.columns
    if any(q in col for q in misc_percentage_and_confidence)
]

columns += [
    col 
    for col in df.columns 
    if any(q in col for q in can_do_this_task_questions) and (col.startswith('end_How confident or unconfident do you feel that you could write a program for the given task without Copilot?'))
]

columns += [
    'Final Letter Grade (as GPA)',
    'Project Grade Overall',
    'Homework Grade Overall',
    'Reading Quiz Grade Overall',
    'Lab Grade Overall',
    'Quiz Grade Overall',
    'Final Exam Score',
    'Score for Participation (Overall)', 
    ]

columns += end_belonging_questions + mid_belonging_questions


columns += [
    'pre_On a scale of 1-5, how confident are you about your ability to do well in this course?',
    ]


columns += [
    'Section (A or B)',
    'Final_copilot_question', 
    'Final_noncopilot_questions']

#demographics 
demog_cols = ['end_Do you identify as Hispanic and/or Latino/a/e/?',
            'end_What is your gender?','end_Did one or more of your parent(s) complete a 4-year college or university degree?',
            'end_What is your race?  Select all that apply.', 'end_Are you eligible for the Pell Grant (a federal financial aid grant)?',
            'end_What is your fluency with English?',
            'pre_Before taking CSE 8A this quarter, did you have any prior knowledge in programming or computer science? (Note: We do not expect any prior CS experience before taking this course.)',
            'pre_What is your intended major?']
columns +=  demog_cols

columns

#Note: when we add mid, numbers change slightly because of the way we are dropping rows with missing values

['pre_Indicate the extent to which each statement is true of you from 1 (not at all) to 7 (very): [One of my goals is to show others that I’m good at my class work.]',
 'pre_Indicate the extent to which each statement is true of you from 1 (not at all) to 7 (very): [I am striving to understand the content of this course as thoroughly as possible.]',
 'pre_Indicate the extent to which each statement is true of you from 1 (not at all) to 7 (very): [My goal is to learn as much as possible.]',
 'pre_Indicate the extent to which each statement is true of you from 1 (not at all) to 7 (very): [One of my goals is to have other students in my class think I am good at my class work.]',
 'pre_Indicate the extent to which each statement is true of you from 1 (not at all) to 7 (very): [My aim is to perform well relative to other students.]',
 'pre_Indicate the extent to which each statement is true of you from 1 (not at all) to 7 (very): [My aim is to completely master the material presented in thi

In [4]:
df[end_interest_questions]


Unnamed: 0,end_Indicate the extent to which each statement is true of you from 1 (not at all) to 7 (very): [I think what we are learning in this class is interesting.],end_Indicate the extent to which each statement is true of you from 1 (not at all) to 7 (very): [I think I will be able to use what I learn in this course in other courses.],end_Indicate the extent to which each statement is true of you from 1 (not at all) to 7 (very): [I would recommend this class to others.],end_Indicate the extent to which each statement is true of you from 1 (not at all) to 7 (very): [I am enjoying this computer science class very much.],end_Indicate the extent to which each statement is true of you from 1 (not at all) to 7 (very): [I think the field of computer science is very interesting.],end_Indicate the extent to which each statement is true of you from 1 (not at all) to 7 (very): [This class has been a waste of my time.],end_Indicate the extent to which each statement is true of you from 1 (not at all) to 7 (very): [I’m glad I took this class.],end_Indicate the extent to which each statement is true of you from 1 (not at all) to 7 (very): [I think the course material in this class is useful for me to learn.],end_Indicate the extent to which each statement is true of you from 1 (not at all) to 7 (very): [I would like to take more computer science classes after this one.],end_Indicate the extent to which each statement is true of you from 1 (not at all) to 7 (very): [I am more likely to register for another computer science class because of my experience in this course.]
0,5.0,4.0,4.0,5.0,7.0,3.0,5.0,5.0,6.0,5.0
1,,,,,,,,,,
2,,,,,,,,,,
3,,,,,,,,,,
4,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...
547,,,,,,,,,,
548,,,,,,,,,,
549,7.0,7.0,7.0,6.0,7.0,1.0,7.0,7.0,7.0,7.0
550,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0


In [5]:
df[mid_interest_questions]

Unnamed: 0,mid_Indicate the extent to which each statement is true of you from 1 (not at all) to 7 (very): [I think what we are learning in this class is interesting.],mid_Indicate the extent to which each statement is true of you from 1 (not at all) to 7 (very): [I think I will be able to use what I learn in this course in other courses.],mid_Indicate the extent to which each statement is true of you from 1 (not at all) to 7 (very): [I would recommend this class to others.],mid_Indicate the extent to which each statement is true of you from 1 (not at all) to 7 (very): [I am enjoying this computer science class very much.],mid_Indicate the extent to which each statement is true of you from 1 (not at all) to 7 (very): [I think the field of computer science is very interesting.],mid_Indicate the extent to which each statement is true of you from 1 (not at all) to 7 (very): [This class has been a waste of my time.],mid_Indicate the extent to which each statement is true of you from 1 (not at all) to 7 (very): [I’m glad I took this class.],mid_Indicate the extent to which each statement is true of you from 1 (not at all) to 7 (very): [I think the course material in this class is useful for me to learn.],mid_Indicate the extent to which each statement is true of you from 1 (not at all) to 7 (very): [I would like to take more computer science classes after this one.],mid_Indicate the extent to which each statement is true of you from 1 (not at all) to 7 (very): [I am more likely to register for another computer science class because of my experience in this course.]
0,6.0,7.0,6.0,6.0,7.0,2.0,6.0,6.0,6.0,6.0
1,,,,,,,,,,
2,7.0,4.0,7.0,7.0,6.0,1.0,7.0,5.0,4.0,7.0
3,7.0,7.0,7.0,7.0,7.0,1.0,7.0,7.0,7.0,7.0
4,4.0,5.0,3.0,3.0,5.0,2.0,3.0,4.0,3.0,3.0
...,...,...,...,...,...,...,...,...,...,...
547,,,,,,,,,,
548,7.0,6.0,5.0,7.0,7.0,2.0,7.0,7.0,6.0,5.0
549,7.0,6.0,6.0,4.0,7.0,2.0,5.0,7.0,5.0,5.0
550,7.0,4.0,5.0,5.0,6.0,4.0,5.0,7.0,4.0,4.0


In [6]:

df["Final_copilot_question"] = df["Final_copilotWorkspace/spellCheck_copy1"].combine(df["Final_copilotWorkspace/baseballAnalysis"], lambda x, y: x if pd.notna(x) else y)

#get average of all columns containing "Final_" and exclude "Final_copilot_question"
df["Final_noncopilot_questions"] = df.filter(regex='Final_').drop(columns='Final_copilot_question').sum(axis=1) / (df.filter(regex='Final_').count(axis=1) - 1)

df = df[columns].dropna()

columns += ['Final_A_Part2_Q1',
    'Final_A_Part2_Q2',
    'Final_B_Part2_Q1',
    'Final_B_Part2_Q2'
]

x = [column for column in df.columns if 'Final_' in column]


df 

Unnamed: 0,pre_Indicate the extent to which each statement is true of you from 1 (not at all) to 7 (very): [One of my goals is to show others that I’m good at my class work.],pre_Indicate the extent to which each statement is true of you from 1 (not at all) to 7 (very): [I am striving to understand the content of this course as thoroughly as possible.],pre_Indicate the extent to which each statement is true of you from 1 (not at all) to 7 (very): [My goal is to learn as much as possible.],pre_Indicate the extent to which each statement is true of you from 1 (not at all) to 7 (very): [One of my goals is to have other students in my class think I am good at my class work.],pre_Indicate the extent to which each statement is true of you from 1 (not at all) to 7 (very): [My aim is to perform well relative to other students.],pre_Indicate the extent to which each statement is true of you from 1 (not at all) to 7 (very): [My aim is to completely master the material presented in this class.],pre_Indicate the extent to which each statement is true of you from 1 (not at all) to 7 (very): [One of my goals is to show others that class work is easy for me.],pre_Indicate the extent to which each statement is true of you from 1 (not at all) to 7 (very): [One of my goals is to look smart in comparison to other students in my class.],pre_Indicate the extent to which each statement is true of you from 1 (not at all) to 7 (very): [My goal is to perform better than the other students.],pre_Indicate the extent to which each statement is true of you from 1 (not at all) to 7 (very): [I aim to look smart compared to others in my class.],...,Final_copilot_question,Final_noncopilot_questions,end_Do you identify as Hispanic and/or Latino/a/e/?,end_What is your gender?,end_Did one or more of your parent(s) complete a 4-year college or university degree?,end_What is your race? Select all that apply.,end_Are you eligible for the Pell Grant (a federal financial aid grant)?,end_What is your fluency with English?,"pre_Before taking CSE 8A this quarter, did you have any prior knowledge in programming or computer science? (Note: We do not expect any prior CS experience before taking this course.)",pre_What is your intended major?
0,2.0,4.0,4.0,2.0,3.0,3.0,2.0,2.0,2.0,2.0,...,0.00000,28.326117,No,Male,Yes,White or Caucasian,Prefer not to answer,I learned English as a child as my primary lan...,Yes,MAS Data Science and Engineering
6,4.0,7.0,7.0,4.0,4.0,7.0,3.0,4.0,4.0,5.0,...,20.00000,83.437500,No,Female,Yes,Indian or other South Asian,No,I learned English as a child in a bilingual or...,Yes,Economics
10,6.0,7.0,7.0,6.0,7.0,7.0,3.0,5.0,7.0,7.0,...,20.00000,59.075039,Prefer not to answer,Male,Yes,Prefer not to answer,Unsure,I learned English as a child as my primary lan...,Yes,"Computer Science, Data Science with minor in math"
11,4.0,6.0,5.0,3.0,4.0,4.0,2.0,2.0,3.0,3.0,...,90.47619,70.546131,No,Female,Yes,East or Southeast Asian,Prefer not to answer,I learned English after learning another prima...,Yes,computer science
12,6.0,7.0,7.0,7.0,7.0,7.0,4.0,4.0,7.0,4.0,...,0.00000,69.114583,Prefer not to answer,Male,Prefer not to answer,Prefer not to answer,Prefer not to answer,Prefer not to answer,No,Data Science
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
543,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,...,20.00000,65.205721,No,Female,Yes,East or Southeast Asian,No,I learned English after learning another prima...,Yes,Management Science
544,1.0,5.0,4.0,1.0,7.0,3.0,1.0,1.0,1.0,1.0,...,20.00000,63.343750,Prefer not to answer,Prefer not to answer,Prefer not to answer,Prefer not to answer,Prefer not to answer,Prefer not to answer,No,Psychology
549,5.0,6.0,7.0,4.0,5.0,5.0,4.0,4.0,4.0,4.0,...,100.00000,57.947198,No,Female,No,East or Southeast Asian,Yes,I learned English after learning another prima...,No,International Studies - Economics
550,2.0,7.0,6.0,1.0,5.0,5.0,1.0,1.0,5.0,1.0,...,60.00000,45.218750,No,Male,No,Prefer not to answer,No,I learned English after learning another prima...,No,Business Psychology


In [7]:
def recode_english(df, col_str='end_What is your fluency with English?'):
    col = df[col_str]
    #wherever the column contains "I learned English as a child as my primary language.", replace with 4
    col = col.replace("I learned English as a child as my primary language.", 4)
    #wherever the column contains "I learned English as a child in a bilingual or multilingual setting.  I am equally comfortable in English and another language.", replace with 3
    col = col.replace("I learned English as a child in a bilingual or multilingual setting.  I am equally comfortable in English and another language.", 3)
    #wherever the column contains "I learned English after learning another primary language and I am equally comfortable in both languages.", replace with 2
    col = col.replace("I learned English after learning another primary language and I am equally comfortable in both languages.", 2)
    #wherever the column contains "I learned English after learning another primary language and I am not as comfortable in English as I am in another language.", replace with 1
    col = col.replace("I learned English after learning another primary language and I am not as comfortable in English as I am in another language.", 1)
    #wherever the column contains "Prefer not to answer", replace with n/a
    col = col.replace("Prefer not to answer", np.nan)
    return df

df = recode_english(df)


"""
takes dataframe and column name as input
returns dataframe with column recoded to binary
takes df and dict with col_name_str, new_colname, yes_str_list, no_str_list, na_str_list as input
"""
def recode_to_binary(df, recode_dict):
    
    col_name_str = recode_dict['col_name_str']
    new_colname = recode_dict['new_colname']
    yes_str_list = recode_dict['yes_str_list']
    no_str_list = recode_dict['no_str_list']
    na_str_list = recode_dict['na_str_list']

    #for each string in the yes_str_list, replace with 1
    for yes_str in yes_str_list:
        df.loc[df[col_name_str].str.contains(yes_str,case= False, na=False), col_name_str] = 1

    #if non_BIPOC_str_list exists, replace anything that contains those strings with 0 (any BLNPI would already be replaced with 1)
    if 'non_BIPOC_str_list' in recode_dict.keys():
        non_BLNPI_str_list = recode_dict['non_BIPOC_str_list']
        for non_BLNPI_str in non_BLNPI_str_list:
            df.loc[df[col_name_str].str.contains(non_BLNPI_str,case= False, na=False), col_name_str] = 0

    #for each string in the no_str_list, replace with 0
    for no_str in no_str_list:
        df.loc[df[col_name_str].str.contains(no_str,case= False, na=False), col_name_str] = 0

    #for each string in the na_str_list, replace with np.nan
    for na_str in na_str_list:
        df.loc[df[col_name_str].astype(str).str.contains(na_str,case= False, na=False), col_name_str] = np.nan

    #convert column to float
    df = df.astype({col_name_str: float})

    #rename column
    df = df.rename(columns={col_name_str:new_colname})

    return df

"""
Function to create new column that is the union of two binary columns.
If either column is 1, the new column is 1.
"""
def create_union_binary_col(df, col1, col2, new_colname):
    df[new_colname] = np.where((df[col1] == 1) | (df[col2] == 1), 1, 0)
    return df


is_transfer_dict = {'col_name_str':'end_Did you enter UCSD as a transfer student from another (2-year or 4-year) college or university?',
                    'new_colname':'is_transfer',
                    'yes_str_list':['Yes'], 'no_str_list':['No'], 'na_str_list':['Prefer not to answer']}

is_BIPOC_dict = {'col_name_str':'end_What is your race?  Select all that apply.',
                 'new_colname':'is_BIPOC',
                 'yes_str_list':['Black or African American', 'Native American/American Indian/First Nations', 'Native Hawaiian or Pacific Islander'],
                 'no_str_list':[], 'na_str_list':['Prefer not to answer'],
                 'non_BIPOC_str_list':['East or Southeast Asian', 'Indian or other South Asian', 'North African/Middle-Eastern','White or Caucasian']}

is_LI_dict = {'col_name_str':'end_Are you eligible for the Pell Grant (a federal financial aid grant)?',
              'new_colname':'is_LI',
              'yes_str_list':['Yes'], 'no_str_list':['No'], 'na_str_list':['Prefer not to answer', 'Unsure']}


is_firstgen_dict = {'col_name_str':'end_Did one or more of your parent(s) complete a 4-year college or university degree?',
                    'new_colname':'is_firstgen',
                    'yes_str_list':['No'], 'no_str_list':['Yes'], 'na_str_list':['Prefer not to answer']}


is_nonmale_dict = {'col_name_str':'end_What is your gender?',
                   'new_colname':'is_nonmale',
                   'yes_str_list':['Female', 'Nonbinary'], 'no_str_list':['Male'], 'na_str_list':['Prefer not to answer']}

is_latine_dict = {'col_name_str':'end_Do you identify as Hispanic and/or Latino/a/e/?',
                  'new_colname':'is_latine',
                  'yes_str_list':['Yes'], 'no_str_list':['No'], 'na_str_list':['Prefer not to answer']}

has_prior_exp_dict = {'col_name_str':'pre_Before taking CSE 8A this quarter, did you have any prior knowledge in programming or computer science? (Note: We do not expect any prior CS experience before taking this course.)',
                        'new_colname':'Prior_Knowledge',
                        'yes_str_list':['Yes'], 'no_str_list':['No'], 'na_str_list':['Prefer not to say']}

is_not_english_fluent_dict = {'col_name_str':'end_What is your fluency with English?',
                        'new_colname':'is_not_english_fluent',
                        'no_str_list':['I learned English as a child as my primary language.',
                                        'I learned English as a child in a bilingual or multilingual setting.  I am equally comfortable in English and another language.',
                                        'I learned English after learning another primary language and I am equally comfortable in both languages.',
                                        'I learned English after learning another primary language and I am not as comfortable in English as I am in another language.'],
                                        'yes_str_list':['I learned English after learning another primary language and I am not as comfortable in English as I am in another language.'], 'na_str_list':['Prefer not to answer']}

is_ESL_dict = {
    'col_name_str':'end_What is your fluency with English?',
    'new_colname':'is_ESL',
    'new_colname_esl_level': 'ESL_level',
    'no_str_list':['I learned English as a child as my primary language.',
                    'I learned English as a child in a bilingual or multilingual setting.  I am equally comfortable in English and another language.',
                  ],
    'yes_str_list':['I learned English after learning another primary language and I am not as comfortable in English as I am in another language.', 'I learned English after learning another primary language and I am equally comfortable in both languages.'], 
    'na_str_list':['Prefer not to answer'],
    'L1': ['I learned English as a child as my primary language.'],
    'L2': ['I learned English as a child in a bilingual or multilingual setting.  I am equally comfortable in English and another language.'],
    'L3': ['I learned English after learning another primary language and I am equally comfortable in both languages.'],
    'L4': ['I learned English after learning another primary language and I am not as comfortable in English as I am in another language.'],
    'NA': ['Prefer not to answer']
    }


In [8]:
df[is_firstgen_dict['col_name_str']].value_counts()

end_Did one or more of your parent(s) complete a 4-year college or university degree?
Yes                     111
No                       95
Prefer not to answer     12
Name: count, dtype: int64

In [9]:
df = recode_to_binary(df, is_BIPOC_dict)\
    .pipe(recode_to_binary, is_LI_dict)\
    .pipe(recode_to_binary, is_firstgen_dict)\
    .pipe(recode_to_binary, is_nonmale_dict)\
    .pipe(recode_to_binary, is_latine_dict)\
    .pipe(recode_to_binary, has_prior_exp_dict)\
    .pipe(recode_to_binary, is_ESL_dict)
"""
Create column for BLNPI that is a union of is_BIPOC and is_latine
"""
df = create_union_binary_col(df, 'is_BIPOC', 'is_latine', 'is_BLNPI')

#create list of demographic columns

df = df.drop(columns=['is_BIPOC', 'is_latine'])

df


Unnamed: 0,pre_Indicate the extent to which each statement is true of you from 1 (not at all) to 7 (very): [One of my goals is to show others that I’m good at my class work.],pre_Indicate the extent to which each statement is true of you from 1 (not at all) to 7 (very): [I am striving to understand the content of this course as thoroughly as possible.],pre_Indicate the extent to which each statement is true of you from 1 (not at all) to 7 (very): [My goal is to learn as much as possible.],pre_Indicate the extent to which each statement is true of you from 1 (not at all) to 7 (very): [One of my goals is to have other students in my class think I am good at my class work.],pre_Indicate the extent to which each statement is true of you from 1 (not at all) to 7 (very): [My aim is to perform well relative to other students.],pre_Indicate the extent to which each statement is true of you from 1 (not at all) to 7 (very): [My aim is to completely master the material presented in this class.],pre_Indicate the extent to which each statement is true of you from 1 (not at all) to 7 (very): [One of my goals is to show others that class work is easy for me.],pre_Indicate the extent to which each statement is true of you from 1 (not at all) to 7 (very): [One of my goals is to look smart in comparison to other students in my class.],pre_Indicate the extent to which each statement is true of you from 1 (not at all) to 7 (very): [My goal is to perform better than the other students.],pre_Indicate the extent to which each statement is true of you from 1 (not at all) to 7 (very): [I aim to look smart compared to others in my class.],...,Section (A or B),Final_copilot_question,Final_noncopilot_questions,is_nonmale,is_firstgen,is_LI,is_ESL,Prior_Knowledge,pre_What is your intended major?,is_BLNPI
0,2.0,4.0,4.0,2.0,3.0,3.0,2.0,2.0,2.0,2.0,...,A,0.00000,28.326117,0.0,0.0,0.0,0.0,1.0,MAS Data Science and Engineering,0
6,4.0,7.0,7.0,4.0,4.0,7.0,3.0,4.0,4.0,5.0,...,A,20.00000,83.437500,1.0,0.0,0.0,0.0,1.0,Economics,0
10,6.0,7.0,7.0,6.0,7.0,7.0,3.0,5.0,7.0,7.0,...,A,20.00000,59.075039,0.0,0.0,,0.0,1.0,"Computer Science, Data Science with minor in math",0
11,4.0,6.0,5.0,3.0,4.0,4.0,2.0,2.0,3.0,3.0,...,B,90.47619,70.546131,1.0,0.0,0.0,1.0,1.0,computer science,0
12,6.0,7.0,7.0,7.0,7.0,7.0,4.0,4.0,7.0,4.0,...,B,0.00000,69.114583,0.0,1.0,0.0,,0.0,Data Science,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
543,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,...,A,20.00000,65.205721,1.0,0.0,0.0,1.0,1.0,Management Science,0
544,1.0,5.0,4.0,1.0,7.0,3.0,1.0,1.0,1.0,1.0,...,A,20.00000,63.343750,,1.0,0.0,,0.0,Psychology,0
549,5.0,6.0,7.0,4.0,5.0,5.0,4.0,4.0,4.0,4.0,...,A,100.00000,57.947198,1.0,1.0,1.0,1.0,0.0,International Studies - Economics,0
550,2.0,7.0,6.0,1.0,5.0,5.0,1.0,1.0,5.0,1.0,...,A,60.00000,45.218750,0.0,1.0,0.0,1.0,0.0,Business Psychology,0


In [10]:

df = df.rename(columns={'mid_Please select the approximate percentage of time that you do the following.\nWhen I get a response from Copilot, how often do you: [Read the code]': 'MidQ_Read_Code'})
df = df.rename(columns={'end_Please select the approximate percentage of time that you do the following.\nWhen I get a response from Copilot, how often do you: [Read the code]': 'EndQ_Read_Code'})
df = df.rename(columns={'mid_Please select the approximate percentage of time that you do the following.\nWhen I get a response from Copilot, how often do you: [Test the code]': 'MidQ_TestCode'})
df = df.rename(columns={'end_Please select the approximate percentage of time that you do the following.\nWhen I get a response from Copilot, how often do you: [Test the code]': 'EndQ_TestCode'})
df = df.rename(columns={'end_How confident or unconfident are you that you are learning how to write programs yourself, when using GenAI tools? \n\n1: Strongly unconfident\n2: Unconfident\n3: Slightly unconfident\n4: Slightly confident\n5: Confident\n6: Strongly confident': 'ConfidentIndependentProgramming'})
df = df.rename(columns={'end_How confident or unconfident are you that you can: [Do the tasks in CSE8A without Copilot.]': 'ConfidentWithoutCopilot'})
df = df.rename(columns={'end_How confident or unconfident are you that you can: [Recognize and understand the code Copilot generates.]': 'ConfidentRecognizeCopilotOutput'})
df = df.rename(columns={'end_How confident or unconfident are you that you can: [Identify the types of coding problems that I should be able to complete without copilot]': 'ConfidentIdentifyCodingProblems'})
df = df.rename(columns={'end_How confident or unconfident are you that you have a fundamental understanding of programming concepts? \n\n1: Strongly unconfident\n2: Unconfident\n3: Slightly unconfident\n4: Slightly confident\n5: Confident\n6: Strongly confident': 'ConfidentFundamental'})
df = df.rename(columns={'end_Please select the statement that best described your experience working with Copilot this term.': 'CopilotHelpedHindered'})

#ProgIntelligenceBelief
df = df.rename(columns={'end_Rate how much you agree with each of the following statements from "Strongly disagree" to "Strongly agree". [You can learn new things, but you can’t really change your programming intelligence.]': 'ProgIntelligence1'})
df = df.rename(columns={'end_Rate how much you agree with each of the following statements from "Strongly disagree" to "Strongly agree". [To be honest, you can’t really change your programming intelligence.]': 'ProgIntelligence2'})
df = df.rename(columns={'end_Rate how much you agree with each of the following statements from "Strongly disagree" to "Strongly agree". [People have a certain amount of ability to learn programming, and they really can’t do much to change it.]': 'ProgIntelligence3'})
df = df.rename(columns={'end_Rate how much you agree with each of the following statements from "Strongly disagree" to "Strongly agree". [You have a certain amount of programming intelligence, and you can’t really do much to change it.]': 'ProgIntelligence4'})
df = df.rename(columns={'end_Rate how much you agree with each of the following statements from "Strongly disagree" to "Strongly agree". [Even if the work is hard in CSE 8A, I can learn it.]': 'ProgIntelligence5G'})
df = df.rename(columns={'end_Rate how much you agree with each of the following statements from "Strongly disagree" to "Strongly agree". [I can do almost all the work in CSE 8A if I don\'t give up.]': 'ProgIntelligence6G'})
df = df.rename(columns={'end_Rate how much you agree with each of the following statements from "Strongly disagree" to "Strongly agree". [I\'m certain I can master the skills taught in my CSE 8A class this term.]': 'ProgIntelligence7G'})
df = df.rename(columns={'end_Rate how much you agree with each of the following statements from "Strongly disagree" to "Strongly agree". [Not everyone is well-suited for programming.]': 'ProgIntelligence8'})
df = df.rename(columns={'end_Rate how much you agree with each of the following statements from "Strongly disagree" to "Strongly agree". [Programming comes naturally to some people but not others.]': 'ProgIntelligence9'})
df = df.rename(columns={'end_Rate how much you agree with each of the following statements from "Strongly disagree" to "Strongly agree". [I can do even the hardest work in CSE 8A if I try.]': 'ProgIntelligence10G'})
df = df.rename(columns={'end_Rate how much you agree with each of the following statements from "Strongly disagree" to "Strongly agree". [I\'m certain I can figure out how to do the most difficult work in my CSE 8A class.]': 'ProgIntelligence11G'})
df = df.rename(columns={'end_Rate how much you agree with each of the following statements from "Strongly disagree" to "Strongly agree". [Anyone has the ability to learn programming and be good at it.]': 'ProgIntelligence12G'})

df = df.rename(columns={'pre_Rate how much you agree with each of the following statements from "Strongly disagree" to "Strongly agree". [You can learn new things, but you can’t really change your programming intelligence.]': 'ProgIntelligence1Pre'})
df = df.rename(columns={'pre_Rate how much you agree with each of the following statements from "Strongly disagree" to "Strongly agree". [To be honest, you can’t really change your programming intelligence.]': 'ProgIntelligence2Pre'})
df = df.rename(columns={'pre_Rate how much you agree with each of the following statements from "Strongly disagree" to "Strongly agree". [People have a certain amount of ability to learn programming, and they really can’t do much to change it.]': 'ProgIntelligence3Pre'})
df = df.rename(columns={'pre_Rate how much you agree with each of the following statements from "Strongly disagree" to "Strongly agree". [You have a certain amount of programming intelligence, and you can’t really do much to change it.]': 'ProgIntelligence4Pre'})
df = df.rename(columns={'pre_Rate how much you agree with each of the following statements from "Strongly disagree" to "Strongly agree". [Even if the work is hard in CSE 8A, I can learn it.]': 'ProgIntelligence5GPre'})
df = df.rename(columns={'pre_Rate how much you agree with each of the following statements from "Strongly disagree" to "Strongly agree". [I can do almost all the work in CSE 8A if I don\'t give up.]': 'ProgIntelligence6GPre'})
df = df.rename(columns={'pre_Rate how much you agree with each of the following statements from "Strongly disagree" to "Strongly agree". [I\'m certain I can master the skills taught in my CSE 8A class this term.]': 'ProgIntelligence7GPre'})
df = df.rename(columns={'pre_Rate how much you agree with each of the following statements from "Strongly disagree" to "Strongly agree". [Not everyone is well-suited for programming.]': 'ProgIntelligence8Pre'})
df = df.rename(columns={'pre_Rate how much you agree with each of the following statements from "Strongly disagree" to "Strongly agree". [Programming comes naturally to some people but not others.]': 'ProgIntelligence9Pre'})
df = df.rename(columns={'pre_Rate how much you agree with each of the following statements from "Strongly disagree" to "Strongly agree". [I can do even the hardest work in CSE 8A if I try.]': 'ProgIntelligence10GPre'})
df = df.rename(columns={'pre_Rate how much you agree with each of the following statements from "Strongly disagree" to "Strongly agree". [I\'m certain I can figure out how to do the most difficult work in my CSE 8A class.]': 'ProgIntelligence11GPre'})
df = df.rename(columns={'pre_Rate how much you agree with each of the following statements from "Strongly disagree" to "Strongly agree". [Anyone has the ability to learn programming and be good at it.]': 'ProgIntelligence12GPre'})

df = df.rename(columns={'mid_Rate how much you agree with each of the following statements from "Strongly disagree" to "Strongly agree". [You can learn new things, but you can’t really change your programming intelligence.]': 'ProgIntelligence1Mid'})
df = df.rename(columns={'mid_Rate how much you agree with each of the following statements from "Strongly disagree" to "Strongly agree". [To be honest, you can’t really change your programming intelligence.]': 'ProgIntelligence2Mid'})
df = df.rename(columns={'mid_Rate how much you agree with each of the following statements from "Strongly disagree" to "Strongly agree". [People have a certain amount of ability to learn programming, and they really can’t do much to change it.]': 'ProgIntelligence3Mid'})
df = df.rename(columns={'mid_Rate how much you agree with each of the following statements from "Strongly disagree" to "Strongly agree". [You have a certain amount of programming intelligence, and you can’t really do much to change it.]': 'ProgIntelligence4Mid'})
df = df.rename(columns={'mid_Rate how much you agree with each of the following statements from "Strongly disagree" to "Strongly agree". [Even if the work is hard in CSE 8A, I can learn it.]': 'ProgIntelligence5GMid'})
df = df.rename(columns={'mid_Rate how much you agree with each of the following statements from "Strongly disagree" to "Strongly agree". [I can do almost all the work in CSE 8A if I don\'t give up.]': 'ProgIntelligence6GMid'})
df = df.rename(columns={'mid_Rate how much you agree with each of the following statements from "Strongly disagree" to "Strongly agree". [I\'m certain I can master the skills taught in my CSE 8A class this term.]': 'ProgIntelligence7GMid'})
df = df.rename(columns={'mid_Rate how much you agree with each of the following statements from "Strongly disagree" to "Strongly agree". [Not everyone is well-suited for programming.]': 'ProgIntelligence8Mid'})
df = df.rename(columns={'mid_Rate how much you agree with each of the following statements from "Strongly disagree" to "Strongly agree". [Programming comes naturally to some people but not others.]': 'ProgIntelligence9Mid'})
df = df.rename(columns={'mid_Rate how much you agree with each of the following statements from "Strongly disagree" to "Strongly agree". [I can do even the hardest work in CSE 8A if I try.]': 'ProgIntelligence10GMid'})
df = df.rename(columns={'mid_Rate how much you agree with each of the following statements from "Strongly disagree" to "Strongly agree". [I\'m certain I can figure out how to do the most difficult work in my CSE 8A class.]': 'ProgIntelligence11GMid'})
df = df.rename(columns={'mid_Rate how much you agree with each of the following statements from "Strongly disagree" to "Strongly agree". [Anyone has the ability to learn programming and be good at it.]': 'ProgIntelligence12GMid'})


#Belonging 
df = df.rename(columns={'end_Rate how much you agree with each of the following statements from "Strongly disagree" to "Strongly agree". [I feel like I belong in computing.]': 'Belonging1End'})
df = df.rename(columns={'end_Rate how much you agree with each of the following statements from "Strongly disagree" to "Strongly agree". [I see myself as a computing person.]': 'Belonging2End'})
df = df.rename(columns={'end_Rate how much you agree with each of the following statements from "Strongly disagree" to "Strongly agree". [I feel like an outsider in the computing community.]': 'Belonging3NEnd'})
df = df.rename(columns={'end_Rate how much you agree with each of the following statements from "Strongly disagree" to "Strongly agree". [I do not have much in common with the other students in my computing classes.]': 'Belonging4NEnd'})
df = df.rename(columns={'end_Rate how much you agree with each of the following statements from "Strongly disagree" to "Strongly agree". [I feel welcomed in computing.]': 'Belonging5End'})

df = df.rename(columns={'mid_Rate how much you agree with each of the following statements from "Strongly disagree" to "Strongly agree". [I feel like I belong in computing.]': 'Belonging1Mid'})
df = df.rename(columns={'mid_Rate how much you agree with each of the following statements from "Strongly disagree" to "Strongly agree". [I see myself as a computing person.]': 'Belonging2Mid'})
df = df.rename(columns={'mid_Rate how much you agree with each of the following statements from "Strongly disagree" to "Strongly agree". [I feel like an outsider in the computing community.]': 'Belonging3NMid'})
df = df.rename(columns={'mid_Rate how much you agree with each of the following statements from "Strongly disagree" to "Strongly agree". [I do not have much in common with the other students in my computing classes.]': 'Belonging4NMid'})
df = df.rename(columns={'mid_Rate how much you agree with each of the following statements from "Strongly disagree" to "Strongly agree". [I feel welcomed in computing.]': 'Belonging5Mid'})

#Interest 
df = df.rename(columns={'mid_Indicate the extent to which each statement is true of you from 1 (not at all) to 7 (very): [I think what we are learning in this class is interesting.]': 'Interesting1Mid'})
df = df.rename(columns={'mid_Indicate the extent to which each statement is true of you from 1 (not at all) to 7 (very): [I think I will be able to use what I learn in this course in other courses.]': 'Interesting2Mid'})
df = df.rename(columns={'mid_Indicate the extent to which each statement is true of you from 1 (not at all) to 7 (very): [I would recommend this class to others.]': 'Interesting3Mid'})
df = df.rename(columns={'mid_Indicate the extent to which each statement is true of you from 1 (not at all) to 7 (very): [I am enjoying this computer science class very much.]': 'Interesting4Mid'})
df = df.rename(columns={'mid_Indicate the extent to which each statement is true of you from 1 (not at all) to 7 (very): [I think the field of computer science is very interesting.]': 'Interesting5Mid'})
df = df.rename(columns={'mid_Indicate the extent to which each statement is true of you from 1 (not at all) to 7 (very): [This class has been a waste of my time.]': 'Interesting6NMid'})
df = df.rename(columns={'mid_Indicate the extent to which each statement is true of you from 1 (not at all) to 7 (very): [I’m glad I took this class.]': 'Interesting7Mid'})
df = df.rename(columns={'mid_Indicate the extent to which each statement is true of you from 1 (not at all) to 7 (very): [I think the course material in this class is useful for me to learn.]': 'Interesting8Mid'})
df = df.rename(columns={'mid_Indicate the extent to which each statement is true of you from 1 (not at all) to 7 (very): [I would like to take more computer science classes after this one.]': 'Interesting9Mid'})
df = df.rename(columns={'mid_Indicate the extent to which each statement is true of you from 1 (not at all) to 7 (very): [I am more likely to register for another computer science class because of my experience in this course.]': 'Interesting10Mid'})

#same renaming but for End 
df = df.rename(columns={'end_Indicate the extent to which each statement is true of you from 1 (not at all) to 7 (very): [I think what we are learning in this class is interesting.]': 'Interesting1End'})
df = df.rename(columns={'end_Indicate the extent to which each statement is true of you from 1 (not at all) to 7 (very): [I think I will be able to use what I learn in this course in other courses.]': 'Interesting2End'})
df = df.rename(columns={'end_Indicate the extent to which each statement is true of you from 1 (not at all) to 7 (very): [I would recommend this class to others.]': 'Interesting3End'})
df = df.rename(columns={'end_Indicate the extent to which each statement is true of you from 1 (not at all) to 7 (very): [I am enjoying this computer science class very much.]': 'Interesting4End'})
df = df.rename(columns={'end_Indicate the extent to which each statement is true of you from 1 (not at all) to 7 (very): [I think the field of computer science is very interesting.]': 'Interesting5End'})
df = df.rename(columns={'end_Indicate the extent to which each statement is true of you from 1 (not at all) to 7 (very): [This class has been a waste of my time.]': 'Interesting6NEnd'})
df = df.rename(columns={'end_Indicate the extent to which each statement is true of you from 1 (not at all) to 7 (very): [I’m glad I took this class.]': 'Interesting7End'})
df = df.rename(columns={'end_Indicate the extent to which each statement is true of you from 1 (not at all) to 7 (very): [I think the course material in this class is useful for me to learn.]': 'Interesting8End'})
df = df.rename(columns={'end_Indicate the extent to which each statement is true of you from 1 (not at all) to 7 (very): [I would like to take more computer science classes after this one.]': 'Interesting9End'})
df = df.rename(columns={'end_Indicate the extent to which each statement is true of you from 1 (not at all) to 7 (very): [I am more likely to register for another computer science class because of my experience in this course.]': 'Interesting10End'})

df = df.rename(columns={'end_Please select the approximate percentage of time that you do the following.\nWhen I get a response from Copilot, how often do you: [Feel you can recognize and understand the code Copilot gives you]': 'PercentageRecognizeCopilotOutput'})
df = df.rename(columns={'end_How helpful or unhelpful are GenAI tools in learning how to program?\n\n1: Strongly unhelpful\n2: Unhelpful\n3: Slightly unhelpful\n4: Slightly helpful\n5: Helpful\n6: Strongly helpful': 'GenAIHelpsProgramming'})
df = df.rename(columns={'end_How helpful or unhelpful are GenAI tools in learning problem solving skills?\n\n1: Strongly unhelpful\n2: Unhelpful\n3: Slightly unhelpful\n4: Slightly helpful\n5: Helpful\n6: Strongly helpful': 'GenAIHelpsProblemSolving'}) 
df = df.rename(columns={'Final Exam Score': 'Final_Exam_Score'})

copilot_read_output = [#'MidQ Read the code', 
                       'EndQ_Read_Code']
copilot_test_output = [#'MidQ Test the code', 
                       'EndQ_TestCode']
copilot_helped_hindered = ['CopilotHelpedHindered']
percentage_recognize_copilot_output = ['PercentageRecognizeCopilotOutput']
confident_programming_yourself = ['ConfidentIndependentProgramming']
confident_recognize_copilot_output = ['ConfidentRecognizeCopilotOutput']
confident_identify_coding_problems = ['ConfidentIdentifyCodingProblems']
confident_do_without_copilot = ['ConfidentWithoutCopilot']
confident_fundamental = ['ConfidentFundamental']
genai_helps_program = ['GenAIHelpsProgramming']
genai_helps_understand = ['GenAIHelpsProblemSolving']


end_progintelligencequestions = ['ProgIntelligence1', 'ProgIntelligence2', 'ProgIntelligence3', 'ProgIntelligence4', 'ProgIntelligence5G', 'ProgIntelligence6G', 'ProgIntelligence7G', 'ProgIntelligence8', 'ProgIntelligence9', 'ProgIntelligence10G', 'ProgIntelligence11G', 'ProgIntelligence12G']
pre_progintelligencequestions = ['ProgIntelligence1Pre', 'ProgIntelligence2Pre', 'ProgIntelligence3Pre', 'ProgIntelligence4Pre', 'ProgIntelligence5GPre', 'ProgIntelligence6GPre', 'ProgIntelligence7GPre', 'ProgIntelligence8Pre', 'ProgIntelligence9Pre', 'ProgIntelligence10GPre', 'ProgIntelligence11GPre', 'ProgIntelligence12GPre']
mid_progintelligencequestions = ['ProgIntelligence1Mid', 'ProgIntelligence2Mid', 'ProgIntelligence3Mid', 'ProgIntelligence4Mid', 'ProgIntelligence5GMid', 'ProgIntelligence6GMid', 'ProgIntelligence7GMid', 'ProgIntelligence8Mid', 'ProgIntelligence9Mid', 'ProgIntelligence10GMid', 'ProgIntelligence11GMid', 'ProgIntelligence12GMid']

end_belonging_questions = ['Belonging1End', 'Belonging2End', 'Belonging3NEnd', 'Belonging4NEnd', 'Belonging5End']
mid_belonging_questions = ['Belonging1Mid', 'Belonging2Mid', 'Belonging3NMid', 'Belonging4NMid', 'Belonging5Mid']

mid_interest_questions = ['Interesting1Mid', 'Interesting2Mid', 'Interesting3Mid', 'Interesting4Mid', 'Interesting5Mid', 'Interesting6NMid', 'Interesting7Mid', 'Interesting8Mid', 'Interesting9Mid', 'Interesting10Mid']
end_interest_questions = ['Interesting1End', 'Interesting2End', 'Interesting3End', 'Interesting4End', 'Interesting5End', 'Interesting6NEnd', 'Interesting7End', 'Interesting8End', 'Interesting9End', 'Interesting10End']

In [11]:
df.columns = [col.split('[')[1].strip()[:-1] if '[' in col else col for col in df.columns ]

In [12]:
df[mid_belonging_questions].describe()

Unnamed: 0,Belonging1Mid,Belonging2Mid,Belonging3NMid,Belonging4NMid,Belonging5Mid
count,218.0,218.0,218.0,218.0,218.0
mean,3.637615,3.665138,3.5,3.293578,4.073394
std,1.40173,1.491226,1.444041,1.275814,1.253246
min,1.0,1.0,1.0,1.0,1.0
25%,3.0,3.0,2.0,2.0,3.0
50%,4.0,4.0,3.0,3.0,4.0
75%,5.0,5.0,5.0,4.0,5.0
max,6.0,6.0,6.0,6.0,6.0


In [13]:
df[end_belonging_questions].describe()

Unnamed: 0,Belonging1End,Belonging2End,Belonging3NEnd,Belonging4NEnd,Belonging5End
count,218.0,218.0,218.0,218.0,218.0
mean,3.862385,3.834862,3.243119,3.220183,4.252294
std,1.333495,1.440134,1.417655,1.318831,1.205066
min,1.0,1.0,1.0,1.0,1.0
25%,3.0,3.0,2.0,2.0,4.0
50%,4.0,4.0,3.0,3.0,4.0
75%,5.0,5.0,4.0,4.0,5.0
max,6.0,6.0,6.0,6.0,6.0


In [14]:
#describe stats for exam score
df['Final_Exam_Score'].describe()

count    218.000000
mean      65.305970
std       18.862079
min       12.964000
25%       53.694625
50%       67.011000
75%       80.258125
max       98.572000
Name: Final_Exam_Score, dtype: float64

In [15]:
#describe stats for interest
df[end_interest_questions].describe()

Unnamed: 0,Interesting1End,Interesting2End,Interesting3End,Interesting4End,Interesting5End,Interesting6NEnd,Interesting7End,Interesting8End,Interesting9End,Interesting10End
count,218.0,218.0,218.0,218.0,218.0,218.0,218.0,218.0,218.0,218.0
mean,5.678899,5.555046,5.270642,5.091743,5.701835,2.211009,5.472477,5.724771,5.399083,5.137615
std,1.403621,1.603337,1.653138,1.674098,1.529464,1.646521,1.720485,1.451975,1.905945,1.898182
min,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
25%,5.0,5.0,4.0,4.0,5.0,1.0,5.0,5.0,4.0,4.0
50%,6.0,6.0,6.0,5.0,6.0,1.5,6.0,6.0,6.0,6.0
75%,7.0,7.0,7.0,6.0,7.0,3.0,7.0,7.0,7.0,7.0
max,7.0,7.0,7.0,7.0,7.0,7.0,7.0,7.0,7.0,7.0


In [16]:
df[can_do_this_task_questions].describe()

Unnamed: 0,Create a variable called “num” and assign the value 5 to it,Write a statement that prints “hello” if a variable “val” has a value greater than 7.,Print the third letter in a string.,Print the sum of all of the elements in a list that has 3 elements.,Print every number from 1 to 100.,Open and print the first line in a file.,"For a dictionary d = {“a”: 3, “b”: 4, “c”: 5}, update the value of b to 6.","Imagine you’re given code that flipped an image left to right, change it to flip the image top to bottom."
count,218.0,218.0,218.0,218.0,218.0,218.0,218.0,218.0
mean,5.417431,5.247706,5.09633,5.123853,5.183486,4.889908,4.798165,3.802752
std,0.962461,1.061743,1.149989,1.131159,1.10019,1.109984,1.30069,1.52176
min,1.0,1.0,1.0,1.0,1.0,2.0,1.0,1.0
25%,5.0,5.0,5.0,5.0,5.0,4.0,4.0,3.0
50%,6.0,6.0,5.0,6.0,6.0,5.0,5.0,4.0
75%,6.0,6.0,6.0,6.0,6.0,6.0,6.0,5.0
max,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0


In [17]:
# final coluumns
[column for column in df.columns if 'Final' in column]

['Final Letter Grade (as GPA)',
 'Final_Exam_Score',
 'Final_copilot_question',
 'Final_noncopilot_questions']

In [18]:
# Process the prior knowledge column
df = df.rename(columns={'pre_Before taking CSE 8A this quarter, did you have any prior knowledge in programming or computer science? (Note: We do not expect any prior CS experience before taking this course.)': 'Prior_Knowledge'})
df = df.rename(columns={'pre_What is your intended major?': 'CSMajor'})
df = df.rename(columns={'pre_On a scale of 1-5, how confident are you about your ability to do well in this course?': 'SelfEfficacy'})
##TODO: add: 
#I see myself as a computing person.
#I feel like an outsider in the computing community. (NEG)
#I do not have much in common with the other students in my computing classes. (NEG)
#I feel welcomed in computing.
#Computing is a big part of who I am.--ASK IN MEETING: Include or not? check Sofia's current sense of belonging questions


df = df.rename(columns={'end_How confident or unconfident are you that you are learning how to write programs yourself, when using GenAI tools?': 'ConfidentIndependentProgramming'})

#flip responses of 'This class has been a waste of my time.' because it's negative, unlike all the others in its category. 
df['Interesting6NMid'] = 7 - df['Interesting6NMid'] + 1
df['Interesting6NEnd'] = 7 - df['Interesting6NEnd'] + 1

#flip responses of 'I feel like an outsider in the computing community.' and 'I do not have much in common with the other students in my computing classes.' because they are negative, unlike all the others in their category.
df['Belonging3NEnd'] = 6 - df['Belonging3NEnd'] + 1
df['Belonging4NEnd'] = 6 - df['Belonging4NEnd'] + 1

df['Belonging3NMid'] = 6 - df['Belonging3NMid'] + 1
df['Belonging4NMid'] = 6 - df['Belonging4NMid'] + 1

df['ProgIntelligence5G'] = 5 - df['ProgIntelligence5G'] + 1
df['ProgIntelligence6G'] = 5 - df['ProgIntelligence6G'] + 1
df['ProgIntelligence7G'] = 5 - df['ProgIntelligence7G'] + 1
df['ProgIntelligence10G'] = 5 - df['ProgIntelligence10G'] + 1
df['ProgIntelligence11G'] = 5 - df['ProgIntelligence11G'] + 1
df['ProgIntelligence12G'] = 5 - df['ProgIntelligence12G'] + 1

df['ProgIntelligence5GPre'] = 5 - df['ProgIntelligence5GPre'] + 1
df['ProgIntelligence6GPre'] = 5 - df['ProgIntelligence6GPre'] + 1
df['ProgIntelligence7GPre'] = 5 - df['ProgIntelligence7GPre'] + 1
df['ProgIntelligence10GPre'] = 5 - df['ProgIntelligence10GPre'] + 1
df['ProgIntelligence11GPre'] = 5 - df['ProgIntelligence11GPre'] + 1
df['ProgIntelligence12GPre'] = 5 - df['ProgIntelligence12GPre'] + 1

df['ProgIntelligence5GMid'] = 5 - df['ProgIntelligence5GMid'] + 1
df['ProgIntelligence6GMid'] = 5 - df['ProgIntelligence6GMid'] + 1
df['ProgIntelligence7GMid'] = 5 - df['ProgIntelligence7GMid'] + 1
df['ProgIntelligence10GMid'] = 5 - df['ProgIntelligence10GMid'] + 1
df['ProgIntelligence11GMid'] = 5 - df['ProgIntelligence11GMid'] + 1
df['ProgIntelligence12GMid'] = 5 - df['ProgIntelligence12GMid'] + 1






#I think having "cs" might cause an issue for words like "mathematics" that contain the substring "cs". I did " cs" for this reason 

cs_major_substrings = [' cs', '-cs', 'computer', 'math-cs', 'comp sci','mathematics computer science', 'mathematics-computer science', 'bioinformatics', 'data science'] 

df['CSMajor'] = df['CSMajor'].str.lower()
df['CSMajor'] = df['CSMajor'].map(lambda x: int(any(substring in x for substring in cs_major_substrings)))
df

Unnamed: 0,One of my goals is to show others that I’m good at my class work.,I am striving to understand the content of this course as thoroughly as possible.,My goal is to learn as much as possible.,One of my goals is to have other students in my class think I am good at my class work.,My aim is to perform well relative to other students.,My aim is to completely master the material presented in this class.,One of my goals is to show others that class work is easy for me.,One of my goals is to look smart in comparison to other students in my class.,My goal is to perform better than the other students.,I aim to look smart compared to others in my class.,...,Section (A or B),Final_copilot_question,Final_noncopilot_questions,is_nonmale,is_firstgen,is_LI,is_ESL,Prior_Knowledge,CSMajor,is_BLNPI
0,2.0,4.0,4.0,2.0,3.0,3.0,2.0,2.0,2.0,2.0,...,A,0.00000,28.326117,0.0,0.0,0.0,0.0,1.0,1,0
6,4.0,7.0,7.0,4.0,4.0,7.0,3.0,4.0,4.0,5.0,...,A,20.00000,83.437500,1.0,0.0,0.0,0.0,1.0,0,0
10,6.0,7.0,7.0,6.0,7.0,7.0,3.0,5.0,7.0,7.0,...,A,20.00000,59.075039,0.0,0.0,,0.0,1.0,1,0
11,4.0,6.0,5.0,3.0,4.0,4.0,2.0,2.0,3.0,3.0,...,B,90.47619,70.546131,1.0,0.0,0.0,1.0,1.0,1,0
12,6.0,7.0,7.0,7.0,7.0,7.0,4.0,4.0,7.0,4.0,...,B,0.00000,69.114583,0.0,1.0,0.0,,0.0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
543,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,...,A,20.00000,65.205721,1.0,0.0,0.0,1.0,1.0,0,0
544,1.0,5.0,4.0,1.0,7.0,3.0,1.0,1.0,1.0,1.0,...,A,20.00000,63.343750,,1.0,0.0,,0.0,0,0
549,5.0,6.0,7.0,4.0,5.0,5.0,4.0,4.0,4.0,4.0,...,A,100.00000,57.947198,1.0,1.0,1.0,1.0,0.0,0,0
550,2.0,7.0,6.0,1.0,5.0,5.0,1.0,1.0,5.0,1.0,...,A,60.00000,45.218750,0.0,1.0,0.0,1.0,0.0,0,0


In [19]:
df['Prior_Knowledge']

0      1.0
6      1.0
10     1.0
11     1.0
12     0.0
      ... 
543    1.0
544    0.0
549    0.0
550    0.0
551    0.0
Name: Prior_Knowledge, Length: 218, dtype: float64

In [20]:

#create list demogs 
demog_cols = ['is_LI', 'is_firstgen', 'is_nonmale', 'is_BLNPI', 'is_ESL', 'Prior_Knowledge', 'CSMajor']

for col in demog_cols:
    print("----")
    print(df[col].value_counts())


----
is_LI
1.0    99
0.0    83
Name: count, dtype: int64
----
is_firstgen
0.0    111
1.0    107
Name: count, dtype: int64
----
is_nonmale
1.0    121
0.0     90
Name: count, dtype: int64
----
is_BLNPI
0    153
1     65
Name: count, dtype: int64
----
is_ESL
0.0    162
1.0     51
Name: count, dtype: int64
----
Prior_Knowledge
0.0    111
1.0    107
Name: count, dtype: int64
----
CSMajor
0    119
1     99
Name: count, dtype: int64


## List of Data Available


In [None]:
#all data available 


# Initial Statistics

## Cronbach's alpha 

In [21]:
# def compute cronbach alpha for a given set of questions
import pingouin as pg

def cronbach_alpha(df, questions):
    q = df[questions]
    return pg.cronbach_alpha(q)[0]

for t, questions in [('Mastery', mastery_questions), ('Normative', normative_questions), ('Appearance', appearance_questions)]:
    print(f'{t}: {cronbach_alpha(df, questions):.2f}')



Mastery: 0.81
Normative: 0.85
Appearance: 0.91


In [22]:
#mid belonging questions cronbach's 
for t, questions in [('Mid Belonging', mid_belonging_questions)]:
    print(f'{t}: {cronbach_alpha(df, questions):.2f}')

#end belonging questions cronbach's
for t, questions in [('End Belonging', end_belonging_questions)]:
    print(f'{t}: {cronbach_alpha(df, questions):.2f}')

Mid Belonging: 0.83
End Belonging: 0.84


In [23]:
for t, questions in [('CanDoTask', can_do_this_task_questions)]:
    print(f'{t}: {cronbach_alpha(df, questions):.2f}')

CanDoTask: 0.91


In [24]:

for t, questions in [('Read copilot output', copilot_read_output), ('Test copilot output', copilot_test_output)]:
    if len(questions) > 1:
        print(f'{t}: {cronbach_alpha(df, questions):.2f}')

for t, questions in [('Perceptions of copilot', copilot_helped_hindered)]:
    if len(questions) > 1:
        print(f'{t}: {cronbach_alpha(df, questions):.2f}')

#run cronbach alpha for the programming intelligence questions
for t, questions in [('End Survey Programming Intelligence Belief/Growth Mindset', end_progintelligencequestions)]:
    if len(questions) > 1:
        print(f'{t}: {cronbach_alpha(df, questions):.2f}')

for t, questions in [('Pre survey Programming Intelligence Belief/Growth Mindset', pre_progintelligencequestions)]: 
    if len(questions) > 1:
        print(f'{t}: {cronbach_alpha(df, questions):.2f}')

for t, questions in [('Mid survey Programming Intelligence Belief/Growth Mindset', mid_progintelligencequestions)]:
    if len(questions) > 1:
        print(f'{t}: {cronbach_alpha(df, questions):.2f}')

#both together? 
for t, questions in [('Pre & End Programming Intelligence Belief/Growth Mindset', pre_progintelligencequestions + end_progintelligencequestions)]: 
    if len(questions) > 1:
        print(f'{t}: {cronbach_alpha(df, questions):.2f}')

End Survey Programming Intelligence Belief/Growth Mindset: 0.88
Pre survey Programming Intelligence Belief/Growth Mindset: 0.83
Mid survey Programming Intelligence Belief/Growth Mindset: 0.90
Pre & End Programming Intelligence Belief/Growth Mindset: 0.89


In [25]:
df['Mastery'] = df[mastery_questions].mean(axis=1)
df['Normative'] = df[normative_questions].mean(axis=1)
df['Appearance'] = df[appearance_questions].mean(axis=1)
df['EndInterest'] = df[end_interest_questions].mean(axis=1)
df['MidInterest'] = df[end_interest_questions].mean(axis=1)
df['CanDoThisTask'] = df[can_do_this_task_questions].mean(axis=1)
df['EndProgIntelligenceBelief'] = df[end_progintelligencequestions].mean(axis=1)
df['PreProgIntelligenceBelief'] = df[pre_progintelligencequestions].mean(axis=1)
df['MidProgIntelligenceBelief'] = df[mid_progintelligencequestions].mean(axis=1)
df['MidBelonging'] = df[mid_belonging_questions].mean(axis=1)
df['EndBelonging'] = df[end_belonging_questions].mean(axis=1)


category_stats = pd.concat([
    df[category].describe()
    for category in ['Mastery', 'Normative', 'Appearance', 'EndInterest', 'EndProgIntelligenceBelief', 'PreProgIntelligenceBelief']
], axis=1)
category_stats

Unnamed: 0,Mastery,Normative,Appearance,EndInterest,EndProgIntelligenceBelief,PreProgIntelligenceBelief
count,218.0,218.0,218.0,218.0,218.0,218.0
mean,6.366972,4.631498,2.884404,5.48211,1.993119,1.71789
std,0.845996,1.627744,1.458816,1.415191,0.786684,0.619326
min,2.333333,1.0,1.0,1.0,0.5,0.5
25%,6.0,3.666667,1.6,4.8,1.416667,1.25
50%,6.666667,4.666667,2.7,5.9,1.791667,1.666667
75%,7.0,6.0,4.0,6.675,2.5,2.083333
max,7.0,7.0,7.0,7.0,4.916667,3.75


# Regression Setup

In [26]:
#print df cols 
df.columns

Index(['One of my goals is to show others that I’m good at my class work.',
       'I am striving to understand the content of this course as thoroughly as possible.',
       'My goal is to learn as much as possible.',
       'One of my goals is to have other students in my class think I am good at my class work.',
       'My aim is to perform well relative to other students.',
       'My aim is to completely master the material presented in this class.',
       'One of my goals is to show others that class work is easy for me.',
       'One of my goals is to look smart in comparison to other students in my class.',
       'My goal is to perform better than the other students.',
       'I aim to look smart compared to others in my class.',
       ...
       'Normative', 'Appearance', 'EndInterest', 'MidInterest',
       'CanDoThisTask', 'EndProgIntelligenceBelief',
       'PreProgIntelligenceBelief', 'MidProgIntelligenceBelief',
       'MidBelonging', 'EndBelonging'],
      dtype='obje

In [27]:
df['Prior_Knowledge'].value_counts()

Prior_Knowledge
0.0    111
1.0    107
Name: count, dtype: int64

### Center Columns 

In [28]:
#center all columns that are not binary
centered_cols = []
for col in df.columns:
    if df[col].nunique() > 2:
        print(f'Centering {col}')
        df[col] = df[col] - df[col].mean()
        centered_cols.append(col)


Centering One of my goals is to show others that I’m good at my class work.
Centering I am striving to understand the content of this course as thoroughly as possible.
Centering My goal is to learn as much as possible.
Centering One of my goals is to have other students in my class think I am good at my class work.
Centering My aim is to perform well relative to other students.
Centering My aim is to completely master the material presented in this class.
Centering One of my goals is to show others that class work is easy for me.
Centering One of my goals is to look smart in comparison to other students in my class.
Centering My goal is to perform better than the other students.
Centering I aim to look smart compared to others in my class.
Centering I am striving to do well compared to other students.
Centering Interesting1End
Centering Interesting2End
Centering Interesting3End
Centering Interesting4End
Centering Interesting5End
Centering Interesting6NEnd
Centering Interesting7End
Cent

### Checking Assumptions 

Check assumptions of 
- constant variance
- normal residuals/errors: done 
- outliers: done 
- leverage, 
- influential cases, 
- VIF: done 
- Homoscedasticity: done 

#focus on:

- leverage: done 
- influential cases: Cook's 

#### KS (Kolmogorov-Smirnov test for normality) Test fails so we perform Power Transformation. 

In [29]:
pt = PowerTransformer()

fit_transformed = pt.fit_transform(df[centered_cols])

df[centered_cols] = fit_transformed

#dfcopy = df.copy()

#### VIF function

In [30]:
#given equation of the form 'Q("Final Exam Score") ~ Mastery*Normative*Appearance*Prior_Knowledge' # CSMajor + SelfEfficacy + EndProgIntelligenceBelief' and data, return the VIF for each variable in the equation
#calculate VIF for each variable in the regression equation without using VIF package; do from scratch using R^2 values from output of ols(eq, data).fit().summary()
def check_vif(regression_eq_string, data):
    # Extracting the independent variables from the regression equation
    #if 1 or fewer independent variables, return an empty dictionary
    print("Checking VIF for: ", regression_eq_string)
    predictors = None
    full_model = None
    if regression_eq_string.count('+') < 1:
        return {}

    full_model = ols(formula=regression_eq_string, data=data).fit()
    predictors = full_model.model.exog_names[1:]  # Skip the intercept

    vif_dict = {}
    
    # For each predictor, regress it on the other predictors and compute R^2
    for predictor in predictors:
        other_predictors = [p for p in predictors if p != predictor]
        # Create a regression equation where the current predictor is the dependent variable
        regression_eq = f'{predictor} ~ {" + ".join(other_predictors)}'
        
        partial_model = ols(formula=regression_eq, data=data).fit()
        
        r_squared = partial_model.rsquared
        
        # Calculate the VIF for the predictor
        vif = 1 / (1 - r_squared)
        vif_dict[predictor] = vif
        print("Regression eq: ", regression_eq, "R^2: ", r_squared)
        print("Predictor: ", predictor, "VIF: ", vif)
    return vif_dict

#TODO: test VIF and see if it works on something that you know for sure should trigger it eg var1 = 0.5*var2

In [31]:
#get correlation between PreProgIntelligenceBelief and EndProgIntelligenceBelief
df[['PreProgIntelligenceBelief', 'EndProgIntelligenceBelief']].corr()


Unnamed: 0,PreProgIntelligenceBelief,EndProgIntelligenceBelief
PreProgIntelligenceBelief,1.0,0.425197
EndProgIntelligenceBelief,0.425197,1.0


#### Outliers function

-differ substantially from the main trend of the data; have large residuals
-Standardized residuals are the residuals divided by the standard
deviation; cause for concern are any whose absolute value is greater
than 3, more than 1% greater than 2.5, or more than 5% greater than 2
(rstandard, outlierTest)

In [32]:
# Create a sample dataset


def check_outliers(model, data):
    # Get residuals from the model
    residuals = model.resid
    
    # Calculate the standard deviation of residuals
    std_dev = np.std(residuals)
    
    # Standardize residuals
    standardized_residuals = residuals / std_dev
    
    # Find positional indices for outliers based on the conditions
    outliers_abs_gt_3 = np.where(np.abs(standardized_residuals) > 3)[0]
    outliers_abs_gt_2_5 = np.where(np.abs(standardized_residuals) > 2.5)[0]
    outliers_abs_gt_2 = np.where(np.abs(standardized_residuals) > 2)[0]
    
    # Find indices that meet the percentage-based criteria
    percent_abs_gt_2_5 = len(outliers_abs_gt_2_5) / len(standardized_residuals) * 100
    percent_abs_gt_2 = len(outliers_abs_gt_2) / len(standardized_residuals) * 100

    # Check if more than 1% of residuals are > 2.5 or more than 5% > 2
    if percent_abs_gt_2_5 > 1:
        flagged_outliers_2_5 = outliers_abs_gt_2_5
    else:
        flagged_outliers_2_5 = []

    if percent_abs_gt_2 > 5:
        flagged_outliers_2 = outliers_abs_gt_2
    else:
        flagged_outliers_2 = []

    #make a dictionary of the outliers
    outlier_dict = {
        'outliers_abs_gt_3': outliers_abs_gt_3,
        'outliers_abs_gt_2_5': flagged_outliers_2_5,
        'outliers_abs_gt_2': flagged_outliers_2
    }
    print(outlier_dict)
    
    # Combine all unique positional indices of outliers and ensure they're integers
    all_outlier_indices = np.unique(np.concatenate([outliers_abs_gt_3, flagged_outliers_2_5, flagged_outliers_2])).astype(int)

    # Map positional indices to the actual index labels in the DataFrame
    outlier_labels = data.index[all_outlier_indices]
    
    return outlier_labels.tolist()


#### Leverage function

In [33]:
def check_high_leverage_points(model, data):
    """
    Identifies high-leverage points in a regression model based on leverage scores.
    
    Parameters:
    - model: A fitted statsmodels regression model.
    - data: A pandas DataFrame containing the data (including the predictors).
    
    Returns:
    - List of index labels for high-leverage points, corresponding to DataFrame indices.
    """
    # Calculate leverage (hat values)
    leverage = model.get_influence().hat_matrix_diag
    
    # Define threshold values
    k = model.model.exog.shape[1] - 1  # Number of predictors (excluding intercept)
    n = data.shape[0]                  # Number of observations
    average_leverage = (k + 1) / n
    high_leverage_threshold_2x = 2 * average_leverage
    high_leverage_threshold_3x = 3 * average_leverage
    
    # Identify high-leverage points
    high_leverage_indices = np.where(
        (leverage > high_leverage_threshold_2x) | 
        (leverage > high_leverage_threshold_3x)
    )[0]  # Get positional indices of high leverage points
    
    # Map positional indices to the actual index labels in the DataFrame
    high_leverage_labels = data.index[high_leverage_indices]
    
    return high_leverage_labels.tolist()


## Table Displays & Assumption checking

In [34]:
pd.set_option('display.float_format', lambda x: '%.3f' % x)

# Function to add significance markers based on p-value thresholds
def add_significance_markers(row):
    p_value = row['p-value']
    if p_value < 0.001:
        return '***'
    elif p_value < 0.01:
        return '**'
    elif p_value < 0.05:
        return '*'
    else:
        return ''

# Function to apply bold formatting to significant predictors 
def highlight_significant(s):
    """
    Make all significant predictors bold and ensure that they maintain their three decimal places
    """
    p_values = s['p-value'].astype(float)
    if p_values < 0.05:
        return ['font-weight: bold'] * len(s)
    else:
        return [''] * len(s)

def style_df(model):
    # Add significance markers to the index column of the summary table
    summary_df = pd.DataFrame({
        'Coefficient': model.params,
        'Standard Error': model.bse,
        't-value': model.tvalues,
        'p-value': model.pvalues
    })
    summary_df.index = summary_df.index.to_series().apply(lambda x: x + add_significance_markers(summary_df.loc[x]))
    styled_summary_df = summary_df.style.format("{:.3f}").apply(highlight_significant, axis=1)
    return styled_summary_df


def display_model_info(equation, df, print_vif=False, rsq_only=False, rsqdict=None):
    print("Equation: ", equation)
    model = ols(equation, data=df).fit()
    styled_summary_df = style_df(model)
    #if rsqdict is not None, add the model's R^2 to the dictionary. Equation is the key, R^2 is the value
    if rsqdict is not None:
        rsqdict[equation] = model.rsquared_adj
    if rsq_only:
        print(f"Model adj. R^2: {model.rsquared_adj:.3f}")
        return
    display(styled_summary_df)

    print(f"Model adj. R^2: {model.rsquared_adj:.3f}, R^2: {model.rsquared:.3f}, N. obs: {model.nobs:.0f}, F-statistic: {model.fvalue:.3f}")


    ### Checking VIF ###
    if print_vif:
        print("\nVIF of each factor:")

    vif_values = check_vif(equation, df)
    high_VIF_factors = []
    for variable, vif in vif_values.items():
        if print_vif:
            print(f"{variable}: {vif:.2f}")
        # if vif > 10, add to a list of variables with high VIF
        if vif > 10:
            high_VIF_factors.append(variable)

    #print variables with high VIF
    if len(high_VIF_factors) > 0:
        print("\n^^^^^^^^^^^^^^^^Variables with high VIF:")
        print(high_VIF_factors)
    else:
        print("\nNo variables with high VIF")
    
    ### Durbin-Watson test for independent errors/autocorrelation ###
    durbinwatsonval = sm.stats.stattools.durbin_watson(model.resid)
    if (durbinwatsonval > 1) and (durbinwatsonval < 3):
        print("Durbin-Watson test for autocorrelation: Nothing significant")
    else:
        print("^^^^^^^^^^^^^^^^Durbin-Watson test for autocorrelation: Significant")

    ### kstest for normality of residuals/normally distributed errors###
    kstestval = stats.kstest(model.resid, 'norm', alternative='less')
    if kstestval[1] > 0.05:
        print("Kolmogorov-Smirnov test for normality with 'norm', alternative='less': Nothing significant")
    else: 
        print("^^^^^^^^^^^^^^^^Kolmogorov-Smirnov test for normality: Significant after power transformation")
        print(kstestval)

    ### Checking for Homoscedasticity ###
    breuschpaganval = sms.diagnostic.het_breuschpagan(model.resid, model.model.exog)
    if breuschpaganval[1] > 0.05:
        print("Breusch-Pagan test for homoscedasticity: Nothing significant")
    else:
        print("^^^^^^^^^^^^^^^^Breusch-Pagan test for homoscedasticity: Significant")
        print(breuschpaganval)
    
    ### Checking for outliers ###
    #test = model.outlier_test()
    #outliers = test[test['unadj_p'] < 0.05].index
    
    outliers = check_outliers(model, df)
    print('^^^^^^^^^^^^^^^^Outliers: ', list(outliers))

    #create instance of influence
    influence = model.get_influence()

    #leverage (hat values)
    #leverage = model.hat_matrix_diag

    #Cook's D values (and p-values) as tuple of arrays
    cooks_d = influence.cooks_distance

    #print("Cooks D values: ", cooks_d[0])
    #number of points with high cooks d values
    high_cooks_d = np.where(cooks_d[0] > 1)[0]

    print("High Cook's D values: ", high_cooks_d)
    print("Cooks D values: ", cooks_d[0][high_cooks_d])
    print("Cooks D p-values: ", cooks_d[1][high_cooks_d])
    
    ### Calculate leverage values ###
    highleverage = check_high_leverage_points(model,df)
    print('^^^^^^^^^^^^^^^^High leverage points: ', highleverage)
    
    #influential points 
    #influential_points = influence.get_influence()

    
    #if there are outliers, re-run the model without them


    if len(outliers) > 0:
        print("Re-running model without outliers")
        dfcopy = df.copy()
        df_no_outliers = dfcopy.drop(outliers)
        model_no_outliers = ols(equation, data=df_no_outliers).fit()
        styled_summary_df_no_outliers = style_df(model_no_outliers)
        display(styled_summary_df_no_outliers)
        print(f"Model adj. R^2: {model_no_outliers.rsquared_adj:.3f}, R^2: {model_no_outliers.rsquared:.3f}, N. obs: {model_no_outliers.nobs:.0f}, F-statistic: {model_no_outliers.fvalue:.3f}")
    
    
    #if there are high leverage points, re-run the model without them
    if len(highleverage) > 0:
        print("Re-running model without high leverage points")
        dfcopy = df.copy()
        df_no_highleverage = dfcopy.drop(highleverage)
        model_no_highleverage = ols(equation, data=df_no_highleverage).fit()
        styled_summary_df_no_highleverage = style_df(model_no_highleverage)
        display(styled_summary_df_no_highleverage)
        print(f"Model adj. R^2: {model_no_highleverage.rsquared_adj:.3f}, R^2: {model_no_highleverage.rsquared:.3f}, N. obs: {model_no_highleverage.nobs:.0f}, F-statistic: {model_no_highleverage.fvalue:.3f}")
    

    return model


def display_anova(model1,model2): 
    print("Displaying ANOVA results:")
    print("model1: ", model1.model.formula)
    print("model2: ", model2.model.formula)
    anova_results = anova_lm(model1, model2)
    display(anova_results)

    # print apa inline format e.g., f(1, 100) = 1.23, p = 0.05
    print(f"F({anova_results.df_diff[1]}, {anova_results.df_resid[1]}) = {anova_results.F[1]:.2f}, p = {anova_results['Pr(>F)'][1]:.2f}")
    if anova_results['Pr(>F)'][1] < 0.05:
        print("NOTE: ANOVA The difference between models is significant.")

## Function for Stepwise Regression

In [35]:
def stepwise_selection(data, possible_predictors_input, dep_var, check_anova_before_adding=False, display_best_model=True):
    #dict to store the best regression model for each number of predictors. key is number of predictors, value is a tuple of the best model string equation and the model's adjusted R^2 value
    best_models = {}
    predictors = []
    best_model = None
    best_adjr2 = -1.0
    best_model_equation_string = None

    #make a copy of the possible predictors
    possible_predictors = possible_predictors_input.copy()

    #while there are still possible predictors to consider
    while len(possible_predictors) > 0:
        best_predictor = None
        for predictor in possible_predictors:
            #create a new model with the current predictors and the possible predictor
            model_eq_str = f"{dep_var} ~ {' + '.join(predictors + [predictor])}"
            model = ols(model_eq_str, data).fit()
            if model.rsquared_adj > best_adjr2:
                
                if check_anova_before_adding and best_model is not None:
                    #check if adding the predictor significantly improves the model
                    anova_results = anova_lm(best_model, model)
                    if anova_results['Pr(>F)'][1] < 0.05:
                        print(f"Adding {predictor} significantly improves the model significantly. P val: {anova_results['Pr(>F)'][1]}")
                        print("Best adj R^2 before: ", best_adjr2, " Best adj R^2 after: ", model.rsquared_adj)
                        print("models being compared: " + best_model.model.formula + " and " + model.model.formula)
                        best_predictor = predictor
                        best_model = model
                        best_adjr2 = model.rsquared_adj
                        best_models[len(predictors) + 1] = (model_eq_str, best_adjr2)
                if best_model is None: #if best model is None, then we are on the first iteration and we should add the predictor
                    best_predictor = predictor
                    best_model = model
                    best_adjr2 = model.rsquared_adj
                    best_models[len(predictors) + 1] = (model_eq_str, best_adjr2)
                elif not check_anova_before_adding:
                    best_predictor = predictor
                    best_model = model
                    best_adjr2 = model.rsquared_adj
                    best_models[len(predictors) + 1] = (model_eq_str, best_adjr2)

        if best_predictor is None:
            break
        
        #print("best equation: ", best_model_equation_string)
        #add the best predictor to the list of predictors
        predictors.append(best_predictor)
        best_model_equation_string = f"{dep_var} ~ {' + '.join(predictors)}"

        #remove the best predictor from the list of possible predictors
        possible_predictors.remove(best_predictor)
    
    #Add main effects from any interaction terms to the best model
    main_effects = set()
    for predictor in predictors:
        if ':' in predictor:  # If it's an interaction term
            # Split the interaction term and add each part as a main effect
            main_effects.update(predictor.split(':'))

    # Ensure all main effects are included in the final model
    for effect in main_effects:
        if effect not in predictors:
            predictors.append(effect)  # Add missing main effect

    # Update the final model equation string with all predictors
    best_model_equation_string = f"{dep_var} ~ {' + '.join(predictors)}"
    #print("Best model equation string: ", best_model_equation_string)
    best_model = ols(best_model_equation_string, data).fit()

    if display_best_model: 
        display_model_info(best_model_equation_string, data)

    return best_model, best_model_equation_string, best_models

In [36]:
def blockwise_selection(data, possible_blocks, base_model, check_anova_before_adding=False, display_best_model=True, display_all_models=False):
    # Dictionary to store the best model at each step. Key is number of blocks added, value is (equation, adj R^2)
    best_models = {}
    current_model_eq = base_model
    best_model = ols(current_model_eq, data).fit()
    best_adjr2 = best_model.rsquared_adj

    # Iterate over blocks to find the best sequence of additions
    for block in possible_blocks:
        new_model_eq = f"{current_model_eq} {block}"
        new_model = ols(new_model_eq, data).fit()
        if display_all_models:
            display_model_info(new_model_eq, data, rsq_only=False)
        if new_model.rsquared_adj > best_adjr2:
            if check_anova_before_adding:
                # Compare models using ANOVA to ensure significance
                anova_results = anova_lm(best_model, new_model)
                if anova_results['Pr(>F)'][1] < 0.05:
                    print(f"----------------------Adding block '{block}' improves the model significantly.")
                    current_model_eq = new_model_eq
                    best_model = new_model
                    best_adjr2 = new_model.rsquared_adj
                    best_models[len(best_models) + 1] = (current_model_eq, best_adjr2)
                    
            else:
                # If no ANOVA check, accept the block directly
                print(f"Adding block '{block}' without ANOVA check.")
                current_model_eq = new_model_eq
                best_model = new_model
                best_adjr2 = new_model.rsquared_adj
                best_models[len(best_models) + 1] = (current_model_eq, best_adjr2)

    if display_best_model:
        print("The best blockwise model is below: ")
        display_model_info(current_model_eq, data)

    return best_model, current_model_eq, best_models



# Achievement Goals Replications

In [37]:
achievement_goals_replication_base_vars = ['Mastery', 'Normative', 'Appearance', 'Prior_Knowledge'] 

# Generate all interaction terms (pairwise and higher-order)
interaction_terms = []

# Loop over interaction orders (from 2 to the total number of variables)
for r in range(2, len(achievement_goals_replication_base_vars) + 1):
    for combo in combinations(achievement_goals_replication_base_vars, r):
        interaction_terms.append(':'.join(combo))

achievement_goals_replication_vars = achievement_goals_replication_base_vars + interaction_terms

#blockwise var interactions 

priorexpblock = '+ Mastery:Prior_Knowledge + Normative:Prior_Knowledge + Appearance:Prior_Knowledge'
betweenachievementsblock = '+ Mastery:Appearance + Normative:Appearance + Mastery:Normative' 
threewayinteractionsblock = '+ Mastery:Normative:Appearance + Mastery:Normative:Prior_Knowledge + Mastery:Appearance:Prior_Knowledge + Normative:Appearance:Prior_Knowledge'

possible_blocks = [priorexpblock, betweenachievementsblock, threewayinteractionsblock]

display_nonsignificant_tables = True #for printing tables to add nonsignificant ones to the paper. Set to false when not needed so that notebook doesn't become too cumbersome. 

## Copilot Behaviors and Sentiments 

### GenAI Helps

Both had low adj R^2 overall. 

#### GenAI Helps Problem Solving

Base: Model adj. R^2: 0.060, R^2: 0.075, N. obs: 251, F-statistic: 5.002

Significant: Mastery**	0.306	0.103	2.970	0.003

No significant improvement from blockwise/stepwise

In [38]:
base_model_eq = 'GenAIHelpsProblemSolving ~ Mastery+Normative+Appearance+Prior_Knowledge' 
base_model = display_model_info(base_model_eq, df)


Equation:  GenAIHelpsProblemSolving ~ Mastery+Normative+Appearance+Prior_Knowledge


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept,-0.043,0.093,-0.457,0.648
Mastery*,0.171,0.07,2.452,0.015
Normative,0.11,0.086,1.291,0.198
Appearance,0.009,0.082,0.104,0.917
Prior_Knowledge,0.087,0.134,0.651,0.515


Model adj. R^2: 0.039, R^2: 0.057, N. obs: 218, F-statistic: 3.212
Checking VIF for:  GenAIHelpsProblemSolving ~ Mastery+Normative+Appearance+Prior_Knowledge
Regression eq:  Mastery ~ Normative + Appearance + Prior_Knowledge R^2:  0.0935767917653082
Predictor:  Mastery VIF:  1.1032374181454974
Regression eq:  Normative ~ Mastery + Appearance + Prior_Knowledge R^2:  0.39545515153732314
Predictor:  Normative VIF:  1.6541369966892334
Regression eq:  Appearance ~ Mastery + Normative + Prior_Knowledge R^2:  0.34778493857366954
Predictor:  Appearance VIF:  1.5332365950168307
Regression eq:  Prior_Knowledge ~ Mastery + Normative + Appearance R^2:  0.006328395020115396
Predictor:  Prior_Knowledge VIF:  1.0063686986610063

No variables with high VIF
Durbin-Watson test for autocorrelation: Nothing significant
Kolmogorov-Smirnov test for normality with 'norm', alternative='less': Nothing significant
Breusch-Pagan test for homoscedasticity: Nothing significant
{'outliers_abs_gt_3': array([], dtype

Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept,-0.031,0.094,-0.329,0.743
Mastery*,0.152,0.071,2.142,0.033
Normative,0.139,0.088,1.575,0.117
Appearance,-0.016,0.084,-0.191,0.849
Prior_Knowledge,0.076,0.134,0.565,0.572


Model adj. R^2: 0.037, R^2: 0.055, N. obs: 217, F-statistic: 3.069


In [39]:
_, best_model_eq, best_models = stepwise_selection(df, achievement_goals_replication_vars,'GenAIHelpsProblemSolving', check_anova_before_adding=True, display_best_model=True)

Adding Normative:Prior_Knowledge significantly improves the model significantly. P val: 0.0024858396032786507
Best adj R^2 before:  0.03792142444496904  Best adj R^2 after:  0.07381404281366377
models being compared: GenAIHelpsProblemSolving ~ Mastery and GenAIHelpsProblemSolving ~ Mastery + Normative:Prior_Knowledge
Equation:  GenAIHelpsProblemSolving ~ Mastery + Normative:Prior_Knowledge + Normative + Prior_Knowledge


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept,-0.05,0.092,-0.542,0.588
Mastery**,0.187,0.069,2.731,0.007
Normative:Prior_Knowledge**,0.346,0.132,2.623,0.009
Normative,-0.071,0.099,-0.724,0.47
Prior_Knowledge,0.085,0.131,0.65,0.517


Model adj. R^2: 0.069, R^2: 0.086, N. obs: 218, F-statistic: 5.033
Checking VIF for:  GenAIHelpsProblemSolving ~ Mastery + Normative:Prior_Knowledge + Normative + Prior_Knowledge
Regression eq:  Mastery ~ Normative:Prior_Knowledge + Normative + Prior_Knowledge R^2:  0.08858760072799776
Predictor:  Mastery VIF:  1.0971981517902958
Regression eq:  Normative:Prior_Knowledge ~ Mastery + Normative + Prior_Knowledge R^2:  0.5323456576282437
Predictor:  Normative:Prior_Knowledge VIF:  2.1383314756116643
Regression eq:  Normative ~ Mastery + Normative:Prior_Knowledge + Prior_Knowledge R^2:  0.5597367612022672
Predictor:  Normative VIF:  2.2713683811775693
Regression eq:  Prior_Knowledge ~ Mastery + Normative:Prior_Knowledge + Normative R^2:  0.006272009893623198
Predictor:  Prior_Knowledge VIF:  1.006311596287986

No variables with high VIF
Durbin-Watson test for autocorrelation: Nothing significant
Kolmogorov-Smirnov test for normality with 'norm', alternative='less': Nothing significant
Breu

Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept,-0.07,0.093,-0.755,0.451
Mastery,0.133,0.073,1.832,0.068
Normative:Prior_Knowledge*,0.302,0.143,2.116,0.036
Normative,0.026,0.109,0.235,0.814
Prior_Knowledge,0.085,0.133,0.641,0.523


Model adj. R^2: 0.068, R^2: 0.086, N. obs: 209, F-statistic: 4.795


In [40]:
_, best_model_eq, best_models = blockwise_selection(df, possible_blocks, base_model_eq,check_anova_before_adding=True, display_best_model=True, display_all_models=display_nonsignificant_tables)

display_anova(ols(base_model_eq, df).fit(), ols(best_model_eq, df).fit())

Equation:  GenAIHelpsProblemSolving ~ Mastery+Normative+Appearance+Prior_Knowledge + Mastery:Prior_Knowledge + Normative:Prior_Knowledge + Appearance:Prior_Knowledge


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept,-0.046,0.092,-0.496,0.62
Mastery**,0.256,0.096,2.656,0.009
Normative,-0.089,0.118,-0.755,0.451
Appearance,-0.014,0.105,-0.132,0.895
Prior_Knowledge,0.086,0.132,0.654,0.514
Mastery:Prior_Knowledge,-0.14,0.139,-1.005,0.316
Normative:Prior_Knowledge*,0.37,0.172,2.156,0.032
Appearance:Prior_Knowledge,0.026,0.167,0.153,0.878


Model adj. R^2: 0.061, R^2: 0.091, N. obs: 218, F-statistic: 3.006
Checking VIF for:  GenAIHelpsProblemSolving ~ Mastery+Normative+Appearance+Prior_Knowledge + Mastery:Prior_Knowledge + Normative:Prior_Knowledge + Appearance:Prior_Knowledge
Regression eq:  Mastery ~ Normative + Appearance + Prior_Knowledge + Mastery:Prior_Knowledge + Normative:Prior_Knowledge + Appearance:Prior_Knowledge R^2:  0.5331363656849835
Predictor:  Mastery VIF:  2.1419530811544196
Regression eq:  Normative ~ Mastery + Appearance + Prior_Knowledge + Mastery:Prior_Knowledge + Normative:Prior_Knowledge + Appearance:Prior_Knowledge R^2:  0.6896133405868825
Predictor:  Normative VIF:  3.221787952777387
Regression eq:  Appearance ~ Mastery + Normative + Prior_Knowledge + Mastery:Prior_Knowledge + Normative:Prior_Knowledge + Appearance:Prior_Knowledge R^2:  0.6106160087940595
Predictor:  Appearance VIF:  2.5681589962210647
Regression eq:  Prior_Knowledge ~ Mastery + Normative + Appearance + Mastery:Prior_Knowledge + 

Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept,-0.047,0.092,-0.512,0.609
Mastery*,0.207,0.101,2.05,0.042
Normative,-0.021,0.126,-0.169,0.866
Appearance,-0.04,0.109,-0.364,0.716
Prior_Knowledge,0.099,0.132,0.749,0.455
Mastery:Prior_Knowledge,-0.1,0.143,-0.699,0.486
Normative:Prior_Knowledge*,0.405,0.182,2.219,0.028
Appearance:Prior_Knowledge,-0.042,0.172,-0.244,0.807


Model adj. R^2: 0.067, R^2: 0.098, N. obs: 214, F-statistic: 3.191
Equation:  GenAIHelpsProblemSolving ~ Mastery+Normative+Appearance+Prior_Knowledge + Mastery:Appearance + Normative:Appearance + Mastery:Normative


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept,-0.065,0.103,-0.637,0.525
Mastery,0.123,0.076,1.625,0.106
Normative,0.151,0.093,1.611,0.109
Appearance,-0.004,0.086,-0.047,0.963
Prior_Knowledge,0.069,0.135,0.511,0.61
Mastery:Appearance,0.062,0.084,0.737,0.462
Normative:Appearance,0.096,0.075,1.276,0.204
Mastery:Normative,-0.103,0.086,-1.195,0.233


Model adj. R^2: 0.040, R^2: 0.071, N. obs: 218, F-statistic: 2.306
Checking VIF for:  GenAIHelpsProblemSolving ~ Mastery+Normative+Appearance+Prior_Knowledge + Mastery:Appearance + Normative:Appearance + Mastery:Normative
Regression eq:  Mastery ~ Normative + Appearance + Prior_Knowledge + Mastery:Appearance + Normative:Appearance + Mastery:Normative R^2:  0.233232887786344
Predictor:  Mastery VIF:  1.3041769581287868
Regression eq:  Normative ~ Mastery + Appearance + Prior_Knowledge + Mastery:Appearance + Normative:Appearance + Mastery:Normative R^2:  0.49410354760488495
Predictor:  Normative VIF:  1.9766890937178987
Regression eq:  Appearance ~ Mastery + Normative + Prior_Knowledge + Mastery:Appearance + Normative:Appearance + Mastery:Normative R^2:  0.401247572299262
Predictor:  Appearance VIF:  1.670139366014912
Regression eq:  Prior_Knowledge ~ Mastery + Normative + Appearance + Mastery:Appearance + Normative:Appearance + Mastery:Normative R^2:  0.02439504457485142
Predictor:  Pri

Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept,-0.031,0.108,-0.289,0.773
Mastery,0.146,0.083,1.747,0.082
Normative,0.159,0.101,1.573,0.117
Appearance,-0.06,0.09,-0.66,0.51
Prior_Knowledge,0.061,0.137,0.445,0.657
Mastery:Appearance,0.105,0.102,1.032,0.304
Normative:Appearance,0.039,0.093,0.418,0.677
Mastery:Normative,-0.108,0.108,-1.001,0.318


Model adj. R^2: 0.039, R^2: 0.072, N. obs: 203, F-statistic: 2.159
Equation:  GenAIHelpsProblemSolving ~ Mastery+Normative+Appearance+Prior_Knowledge + Mastery:Normative:Appearance + Mastery:Normative:Prior_Knowledge + Mastery:Appearance:Prior_Knowledge + Normative:Appearance:Prior_Knowledge


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept,-0.029,0.096,-0.302,0.763
Mastery*,0.186,0.08,2.322,0.021
Normative,0.118,0.091,1.304,0.194
Appearance,0.009,0.085,0.111,0.912
Prior_Knowledge,0.012,0.156,0.076,0.94
Mastery:Normative:Appearance,-0.043,0.068,-0.627,0.531
Mastery:Normative:Prior_Knowledge,-0.049,0.136,-0.36,0.719
Mastery:Appearance:Prior_Knowledge,0.163,0.142,1.147,0.253
Normative:Appearance:Prior_Knowledge,0.116,0.106,1.094,0.275


Model adj. R^2: 0.039, R^2: 0.074, N. obs: 218, F-statistic: 2.087
Checking VIF for:  GenAIHelpsProblemSolving ~ Mastery+Normative+Appearance+Prior_Knowledge + Mastery:Normative:Appearance + Mastery:Normative:Prior_Knowledge + Mastery:Appearance:Prior_Knowledge + Normative:Appearance:Prior_Knowledge
Regression eq:  Mastery ~ Normative + Appearance + Prior_Knowledge + Mastery:Normative:Appearance + Mastery:Normative:Prior_Knowledge + Mastery:Appearance:Prior_Knowledge + Normative:Appearance:Prior_Knowledge R^2:  0.31269865250744777
Predictor:  Mastery VIF:  1.4549658656253925
Regression eq:  Normative ~ Mastery + Appearance + Prior_Knowledge + Mastery:Normative:Appearance + Mastery:Normative:Prior_Knowledge + Mastery:Appearance:Prior_Knowledge + Normative:Appearance:Prior_Knowledge R^2:  0.46004163856945535
Predictor:  Normative VIF:  1.8519946563113476
Regression eq:  Appearance ~ Mastery + Normative + Prior_Knowledge + Mastery:Normative:Appearance + Mastery:Normative:Prior_Knowledge +

Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept,-0.021,0.1,-0.209,0.835
Mastery*,0.192,0.092,2.099,0.037
Normative,0.121,0.101,1.194,0.234
Appearance,-0.067,0.096,-0.697,0.487
Prior_Knowledge,0.027,0.168,0.162,0.871
Mastery:Normative:Appearance,-0.022,0.108,-0.208,0.836
Mastery:Normative:Prior_Knowledge,0.073,0.162,0.453,0.651
Mastery:Appearance:Prior_Knowledge,0.154,0.19,0.808,0.42
Normative:Appearance:Prior_Knowledge,-0.047,0.151,-0.308,0.758


Model adj. R^2: 0.029, R^2: 0.069, N. obs: 197, F-statistic: 1.743
The best blockwise model is below: 
Equation:  GenAIHelpsProblemSolving ~ Mastery+Normative+Appearance+Prior_Knowledge


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept,-0.043,0.093,-0.457,0.648
Mastery*,0.171,0.07,2.452,0.015
Normative,0.11,0.086,1.291,0.198
Appearance,0.009,0.082,0.104,0.917
Prior_Knowledge,0.087,0.134,0.651,0.515


Model adj. R^2: 0.039, R^2: 0.057, N. obs: 218, F-statistic: 3.212
Checking VIF for:  GenAIHelpsProblemSolving ~ Mastery+Normative+Appearance+Prior_Knowledge
Regression eq:  Mastery ~ Normative + Appearance + Prior_Knowledge R^2:  0.0935767917653082
Predictor:  Mastery VIF:  1.1032374181454974
Regression eq:  Normative ~ Mastery + Appearance + Prior_Knowledge R^2:  0.39545515153732314
Predictor:  Normative VIF:  1.6541369966892334
Regression eq:  Appearance ~ Mastery + Normative + Prior_Knowledge R^2:  0.34778493857366954
Predictor:  Appearance VIF:  1.5332365950168307
Regression eq:  Prior_Knowledge ~ Mastery + Normative + Appearance R^2:  0.006328395020115396
Predictor:  Prior_Knowledge VIF:  1.0063686986610063

No variables with high VIF
Durbin-Watson test for autocorrelation: Nothing significant
Kolmogorov-Smirnov test for normality with 'norm', alternative='less': Nothing significant
Breusch-Pagan test for homoscedasticity: Nothing significant
{'outliers_abs_gt_3': array([], dtype

Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept,-0.031,0.094,-0.329,0.743
Mastery*,0.152,0.071,2.142,0.033
Normative,0.139,0.088,1.575,0.117
Appearance,-0.016,0.084,-0.191,0.849
Prior_Knowledge,0.076,0.134,0.565,0.572


Model adj. R^2: 0.037, R^2: 0.055, N. obs: 217, F-statistic: 3.069
Displaying ANOVA results:
model1:  GenAIHelpsProblemSolving ~ Mastery+Normative+Appearance+Prior_Knowledge
model2:  GenAIHelpsProblemSolving ~ Mastery+Normative+Appearance+Prior_Knowledge


Unnamed: 0,df_resid,ssr,df_diff,ss_diff,F,Pr(>F)
0,213.0,205.597,0.0,,,
1,213.0,205.597,-0.0,-0.0,,


F(-0.0, 213.0) = nan, p = nan


#### GenAI Helps Programming 
Base Model adj. R^2: 0.034, R^2: 0.050, N. obs: 251, F-statistic: 3.229

No significant factors 

Adding blockwise prior knowledge interactions improved model significantly. 
GenAIHelpsProgramming ~ Mastery+Normative+Appearance+Prior_Knowledge + Mastery:Prior_Knowledge + Normative:Prior_Knowledge + Appearance:Prior_Knowledge
Model adj. R^2: 0.055, R^2: 0.082, N. obs: 251, F-statistic: 3.098

Normative:Prior_Knowledge**	0.338	0.123	2.759	0.006

In [41]:
base_model_eq = 'GenAIHelpsProgramming ~ Mastery+Normative+Appearance+Prior_Knowledge' 
base_model = display_model_info(base_model_eq, df)


Equation:  GenAIHelpsProgramming ~ Mastery+Normative+Appearance+Prior_Knowledge


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept,-0.058,0.094,-0.616,0.539
Mastery,0.055,0.07,0.78,0.436
Normative,0.116,0.086,1.35,0.179
Appearance,0.087,0.083,1.055,0.293
Prior_Knowledge,0.118,0.134,0.878,0.381


Model adj. R^2: 0.027, R^2: 0.045, N. obs: 218, F-statistic: 2.503
Checking VIF for:  GenAIHelpsProgramming ~ Mastery+Normative+Appearance+Prior_Knowledge
Regression eq:  Mastery ~ Normative + Appearance + Prior_Knowledge R^2:  0.0935767917653082
Predictor:  Mastery VIF:  1.1032374181454974
Regression eq:  Normative ~ Mastery + Appearance + Prior_Knowledge R^2:  0.39545515153732314
Predictor:  Normative VIF:  1.6541369966892334
Regression eq:  Appearance ~ Mastery + Normative + Prior_Knowledge R^2:  0.34778493857366954
Predictor:  Appearance VIF:  1.5332365950168307
Regression eq:  Prior_Knowledge ~ Mastery + Normative + Appearance R^2:  0.006328395020115396
Predictor:  Prior_Knowledge VIF:  1.0063686986610063

No variables with high VIF
Durbin-Watson test for autocorrelation: Nothing significant
Kolmogorov-Smirnov test for normality with 'norm', alternative='less': Nothing significant
Breusch-Pagan test for homoscedasticity: Nothing significant
{'outliers_abs_gt_3': array([], dtype=in

Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept,-0.048,0.094,-0.506,0.613
Mastery,0.039,0.072,0.54,0.59
Normative,0.14,0.089,1.581,0.115
Appearance,0.066,0.085,0.783,0.435
Prior_Knowledge,0.108,0.135,0.804,0.422


Model adj. R^2: 0.026, R^2: 0.044, N. obs: 217, F-statistic: 2.445


In [42]:
_, best_stepwise_model_eq, best_models = stepwise_selection(df, achievement_goals_replication_vars,'GenAIHelpsProgramming', check_anova_before_adding=True, display_best_model=True)

Adding Normative significantly improves the model significantly. P val: 0.014793929408626096
Best adj R^2 before:  0.004884998309281241  Best adj R^2 after:  0.027565550031199315
models being compared: GenAIHelpsProgramming ~ Mastery and GenAIHelpsProgramming ~ Mastery + Normative
Adding Normative:Prior_Knowledge significantly improves the model significantly. P val: 0.008622671223483709
Best adj R^2 before:  0.027565550031199315  Best adj R^2 after:  0.05408805688466445
models being compared: GenAIHelpsProgramming ~ Mastery + Normative and GenAIHelpsProgramming ~ Mastery + Normative + Normative:Prior_Knowledge
Equation:  GenAIHelpsProgramming ~ Mastery + Normative + Normative:Prior_Knowledge + Prior_Knowledge


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept,-0.065,0.093,-0.697,0.487
Mastery,0.063,0.069,0.913,0.363
Normative,-0.021,0.1,-0.21,0.834
Normative:Prior_Knowledge**,0.352,0.133,2.646,0.009
Prior_Knowledge,0.115,0.133,0.869,0.386


Model adj. R^2: 0.053, R^2: 0.070, N. obs: 218, F-statistic: 4.037
Checking VIF for:  GenAIHelpsProgramming ~ Mastery + Normative + Normative:Prior_Knowledge + Prior_Knowledge
Regression eq:  Mastery ~ Normative + Normative:Prior_Knowledge + Prior_Knowledge R^2:  0.08858760072799743
Predictor:  Mastery VIF:  1.0971981517902953
Regression eq:  Normative ~ Mastery + Normative:Prior_Knowledge + Prior_Knowledge R^2:  0.5597367612022672
Predictor:  Normative VIF:  2.2713683811775693
Regression eq:  Normative:Prior_Knowledge ~ Mastery + Normative + Prior_Knowledge R^2:  0.5323456576282437
Predictor:  Normative:Prior_Knowledge VIF:  2.1383314756116643
Regression eq:  Prior_Knowledge ~ Mastery + Normative + Normative:Prior_Knowledge R^2:  0.006272009893623198
Predictor:  Prior_Knowledge VIF:  1.006311596287986

No variables with high VIF
Durbin-Watson test for autocorrelation: Nothing significant
^^^^^^^^^^^^^^^^Kolmogorov-Smirnov test for normality: Significant after power transformation
Kste

Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept,-0.075,0.095,-0.789,0.431
Mastery,0.02,0.074,0.268,0.789
Normative,0.052,0.112,0.466,0.642
Normative:Prior_Knowledge,0.251,0.146,1.722,0.087
Prior_Knowledge,0.149,0.136,1.095,0.275


Model adj. R^2: 0.037, R^2: 0.056, N. obs: 209, F-statistic: 3.003


In [43]:
print("stepwise anova: ")

display_anova(ols(base_model_eq, df).fit(), ols(best_stepwise_model_eq , df).fit())

stepwise anova: 
Displaying ANOVA results:
model1:  GenAIHelpsProgramming ~ Mastery+Normative+Appearance+Prior_Knowledge
model2:  GenAIHelpsProgramming ~ Mastery + Normative + Normative:Prior_Knowledge + Prior_Knowledge


Unnamed: 0,df_resid,ssr,df_diff,ss_diff,F,Pr(>F)
0,213.0,208.211,0.0,,,
1,213.0,202.639,-0.0,5.572,-inf,


F(-0.0, 213.0) = -inf, p = nan


In [44]:
_, best_blockwise_model_eq, best_models = blockwise_selection(df, possible_blocks, base_model_eq,check_anova_before_adding=True, display_best_model=True, display_all_models=display_nonsignificant_tables)

display_anova(ols(base_model_eq, df).fit(), ols(best_blockwise_model_eq, df).fit())

Equation:  GenAIHelpsProgramming ~ Mastery+Normative+Appearance+Prior_Knowledge + Mastery:Prior_Knowledge + Normative:Prior_Knowledge + Appearance:Prior_Knowledge


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept,-0.062,0.093,-0.666,0.506
Mastery,0.128,0.097,1.315,0.19
Normative,-0.095,0.119,-0.797,0.426
Appearance,0.093,0.106,0.874,0.383
Prior_Knowledge,0.117,0.133,0.881,0.38
Mastery:Prior_Knowledge,-0.12,0.14,-0.854,0.394
Normative:Prior_Knowledge*,0.405,0.173,2.342,0.02
Appearance:Prior_Knowledge,-0.042,0.168,-0.25,0.802


Model adj. R^2: 0.047, R^2: 0.078, N. obs: 218, F-statistic: 2.533
Checking VIF for:  GenAIHelpsProgramming ~ Mastery+Normative+Appearance+Prior_Knowledge + Mastery:Prior_Knowledge + Normative:Prior_Knowledge + Appearance:Prior_Knowledge
Regression eq:  Mastery ~ Normative + Appearance + Prior_Knowledge + Mastery:Prior_Knowledge + Normative:Prior_Knowledge + Appearance:Prior_Knowledge R^2:  0.5331363656849835
Predictor:  Mastery VIF:  2.1419530811544196
Regression eq:  Normative ~ Mastery + Appearance + Prior_Knowledge + Mastery:Prior_Knowledge + Normative:Prior_Knowledge + Appearance:Prior_Knowledge R^2:  0.6896133405868825
Predictor:  Normative VIF:  3.221787952777387
Regression eq:  Appearance ~ Mastery + Normative + Prior_Knowledge + Mastery:Prior_Knowledge + Normative:Prior_Knowledge + Appearance:Prior_Knowledge R^2:  0.6106160087940595
Predictor:  Appearance VIF:  2.5681589962210647
Regression eq:  Prior_Knowledge ~ Mastery + Normative + Appearance + Mastery:Prior_Knowledge + Nor

Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept,-0.068,0.094,-0.72,0.472
Mastery,0.082,0.103,0.795,0.428
Normative,-0.031,0.129,-0.242,0.809
Appearance,0.075,0.11,0.681,0.497
Prior_Knowledge,0.126,0.134,0.94,0.348
Mastery:Prior_Knowledge,-0.083,0.145,-0.569,0.57
Normative:Prior_Knowledge*,0.404,0.185,2.177,0.031
Appearance:Prior_Knowledge,-0.08,0.175,-0.455,0.649


Model adj. R^2: 0.050, R^2: 0.081, N. obs: 214, F-statistic: 2.585
Equation:  GenAIHelpsProgramming ~ Mastery+Normative+Appearance+Prior_Knowledge + Mastery:Appearance + Normative:Appearance + Mastery:Normative


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept,-0.061,0.104,-0.591,0.555
Mastery,0.067,0.077,0.868,0.386
Normative,0.085,0.095,0.901,0.369
Appearance,0.099,0.087,1.144,0.254
Prior_Knowledge,0.133,0.136,0.979,0.329
Mastery:Appearance,0.081,0.085,0.95,0.343
Normative:Appearance,-0.018,0.076,-0.242,0.809
Mastery:Normative,0.005,0.087,0.053,0.958


Model adj. R^2: 0.019, R^2: 0.051, N. obs: 218, F-statistic: 1.608
Checking VIF for:  GenAIHelpsProgramming ~ Mastery+Normative+Appearance+Prior_Knowledge + Mastery:Appearance + Normative:Appearance + Mastery:Normative
Regression eq:  Mastery ~ Normative + Appearance + Prior_Knowledge + Mastery:Appearance + Normative:Appearance + Mastery:Normative R^2:  0.233232887786344
Predictor:  Mastery VIF:  1.3041769581287868
Regression eq:  Normative ~ Mastery + Appearance + Prior_Knowledge + Mastery:Appearance + Normative:Appearance + Mastery:Normative R^2:  0.49410354760488495
Predictor:  Normative VIF:  1.9766890937178987
Regression eq:  Appearance ~ Mastery + Normative + Prior_Knowledge + Mastery:Appearance + Normative:Appearance + Mastery:Normative R^2:  0.401247572299262
Predictor:  Appearance VIF:  1.670139366014912
Regression eq:  Prior_Knowledge ~ Mastery + Normative + Appearance + Mastery:Appearance + Normative:Appearance + Mastery:Normative R^2:  0.02439504457485142
Predictor:  Prior_

Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept,-0.071,0.109,-0.646,0.519
Mastery,0.08,0.084,0.951,0.343
Normative,0.041,0.102,0.396,0.693
Appearance,0.047,0.092,0.512,0.609
Prior_Knowledge,0.135,0.139,0.969,0.334
Mastery:Appearance,0.142,0.103,1.378,0.17
Normative:Appearance,-0.037,0.094,-0.393,0.695
Mastery:Normative,0.078,0.11,0.714,0.476


Model adj. R^2: 0.019, R^2: 0.053, N. obs: 203, F-statistic: 1.569
Equation:  GenAIHelpsProgramming ~ Mastery+Normative+Appearance+Prior_Knowledge + Mastery:Normative:Appearance + Mastery:Normative:Prior_Knowledge + Mastery:Appearance:Prior_Knowledge + Normative:Appearance:Prior_Knowledge


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept,-0.048,0.096,-0.499,0.618
Mastery,0.101,0.081,1.257,0.21
Normative,0.063,0.091,0.69,0.491
Appearance,0.109,0.085,1.281,0.201
Prior_Knowledge,0.084,0.157,0.532,0.595
Mastery:Normative:Appearance,-0.03,0.069,-0.437,0.662
Mastery:Normative:Prior_Knowledge,0.173,0.137,1.267,0.206
Mastery:Appearance:Prior_Knowledge,0.047,0.143,0.328,0.743
Normative:Appearance:Prior_Knowledge,-0.011,0.106,-0.106,0.915


Model adj. R^2: 0.032, R^2: 0.068, N. obs: 218, F-statistic: 1.902
Checking VIF for:  GenAIHelpsProgramming ~ Mastery+Normative+Appearance+Prior_Knowledge + Mastery:Normative:Appearance + Mastery:Normative:Prior_Knowledge + Mastery:Appearance:Prior_Knowledge + Normative:Appearance:Prior_Knowledge
Regression eq:  Mastery ~ Normative + Appearance + Prior_Knowledge + Mastery:Normative:Appearance + Mastery:Normative:Prior_Knowledge + Mastery:Appearance:Prior_Knowledge + Normative:Appearance:Prior_Knowledge R^2:  0.31269865250744777
Predictor:  Mastery VIF:  1.4549658656253925
Regression eq:  Normative ~ Mastery + Appearance + Prior_Knowledge + Mastery:Normative:Appearance + Mastery:Normative:Prior_Knowledge + Mastery:Appearance:Prior_Knowledge + Normative:Appearance:Prior_Knowledge R^2:  0.46004163856945535
Predictor:  Normative VIF:  1.8519946563113476
Regression eq:  Appearance ~ Mastery + Normative + Prior_Knowledge + Mastery:Normative:Appearance + Mastery:Normative:Prior_Knowledge + Ma

Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept,0.002,0.101,0.021,0.984
Mastery,0.153,0.092,1.66,0.098
Normative,0.009,0.102,0.09,0.928
Appearance,0.09,0.096,0.938,0.35
Prior_Knowledge,0.027,0.169,0.16,0.873
Mastery:Normative:Appearance,-0.143,0.108,-1.321,0.188
Mastery:Normative:Prior_Knowledge*,0.33,0.163,2.028,0.044
Mastery:Appearance:Prior_Knowledge,-0.006,0.192,-0.031,0.975
Normative:Appearance:Prior_Knowledge,-0.032,0.152,-0.209,0.835


Model adj. R^2: 0.030, R^2: 0.070, N. obs: 197, F-statistic: 1.768
The best blockwise model is below: 
Equation:  GenAIHelpsProgramming ~ Mastery+Normative+Appearance+Prior_Knowledge


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept,-0.058,0.094,-0.616,0.539
Mastery,0.055,0.07,0.78,0.436
Normative,0.116,0.086,1.35,0.179
Appearance,0.087,0.083,1.055,0.293
Prior_Knowledge,0.118,0.134,0.878,0.381


Model adj. R^2: 0.027, R^2: 0.045, N. obs: 218, F-statistic: 2.503
Checking VIF for:  GenAIHelpsProgramming ~ Mastery+Normative+Appearance+Prior_Knowledge
Regression eq:  Mastery ~ Normative + Appearance + Prior_Knowledge R^2:  0.0935767917653082
Predictor:  Mastery VIF:  1.1032374181454974
Regression eq:  Normative ~ Mastery + Appearance + Prior_Knowledge R^2:  0.39545515153732314
Predictor:  Normative VIF:  1.6541369966892334
Regression eq:  Appearance ~ Mastery + Normative + Prior_Knowledge R^2:  0.34778493857366954
Predictor:  Appearance VIF:  1.5332365950168307
Regression eq:  Prior_Knowledge ~ Mastery + Normative + Appearance R^2:  0.006328395020115396
Predictor:  Prior_Knowledge VIF:  1.0063686986610063

No variables with high VIF
Durbin-Watson test for autocorrelation: Nothing significant
Kolmogorov-Smirnov test for normality with 'norm', alternative='less': Nothing significant
Breusch-Pagan test for homoscedasticity: Nothing significant
{'outliers_abs_gt_3': array([], dtype=in

Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept,-0.048,0.094,-0.506,0.613
Mastery,0.039,0.072,0.54,0.59
Normative,0.14,0.089,1.581,0.115
Appearance,0.066,0.085,0.783,0.435
Prior_Knowledge,0.108,0.135,0.804,0.422


Model adj. R^2: 0.026, R^2: 0.044, N. obs: 217, F-statistic: 2.445
Displaying ANOVA results:
model1:  GenAIHelpsProgramming ~ Mastery+Normative+Appearance+Prior_Knowledge
model2:  GenAIHelpsProgramming ~ Mastery+Normative+Appearance+Prior_Knowledge


Unnamed: 0,df_resid,ssr,df_diff,ss_diff,F,Pr(>F)
0,213.0,208.211,0.0,,,
1,213.0,208.211,-0.0,-0.0,,


F(-0.0, 213.0) = nan, p = nan


In [45]:
#anova between best blockwise and best stepwise 

display_anova(ols(best_stepwise_model_eq, df).fit(), ols(best_blockwise_model_eq, df).fit())

Displaying ANOVA results:
model1:  GenAIHelpsProgramming ~ Mastery + Normative + Normative:Prior_Knowledge + Prior_Knowledge
model2:  GenAIHelpsProgramming ~ Mastery+Normative+Appearance+Prior_Knowledge


Unnamed: 0,df_resid,ssr,df_diff,ss_diff,F,Pr(>F)
0,213.0,202.639,0.0,,,
1,213.0,208.211,-0.0,-5.572,inf,


F(-0.0, 213.0) = inf, p = nan


### Independent Coding Ability & Fundamentals 
All had decent adj r^2 of 0.13-0.16.

Mastery, normative, prior knowledge persistently significant 


#### Confidence in Fundamental Understanding of Programming
How confident or unconfident are you that you have a fundamental understanding of programming concepts?

Base Model adj. R^2: 0.160, R^2: 0.173, N. obs: 251, F-statistic: 12.897

Sig: Mastery, normative, prior knowledge

None of stepwise/blockwise improved the model significantly. 

In [46]:
base_model_eq = 'ConfidentFundamental ~ Mastery+Normative+Appearance+Prior_Knowledge' 
base_model = display_model_info(base_model_eq, df)


Equation:  ConfidentFundamental ~ Mastery+Normative+Appearance+Prior_Knowledge


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept**,-0.251,0.089,-2.82,0.005
Mastery,0.126,0.067,1.894,0.06
Normative*,0.186,0.081,2.282,0.023
Appearance,0.026,0.078,0.328,0.743
Prior_Knowledge***,0.511,0.127,4.018,0.0


Model adj. R^2: 0.129, R^2: 0.145, N. obs: 218, F-statistic: 9.011
Checking VIF for:  ConfidentFundamental ~ Mastery+Normative+Appearance+Prior_Knowledge
Regression eq:  Mastery ~ Normative + Appearance + Prior_Knowledge R^2:  0.0935767917653082
Predictor:  Mastery VIF:  1.1032374181454974
Regression eq:  Normative ~ Mastery + Appearance + Prior_Knowledge R^2:  0.39545515153732314
Predictor:  Normative VIF:  1.6541369966892334
Regression eq:  Appearance ~ Mastery + Normative + Prior_Knowledge R^2:  0.34778493857366954
Predictor:  Appearance VIF:  1.5332365950168307
Regression eq:  Prior_Knowledge ~ Mastery + Normative + Appearance R^2:  0.006328395020115396
Predictor:  Prior_Knowledge VIF:  1.0063686986610063

No variables with high VIF
Durbin-Watson test for autocorrelation: Nothing significant
Kolmogorov-Smirnov test for normality with 'norm', alternative='less': Nothing significant
Breusch-Pagan test for homoscedasticity: Nothing significant
{'outliers_abs_gt_3': array([], dtype=int

Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept**,-0.273,0.085,-3.214,0.002
Mastery,0.106,0.064,1.665,0.097
Normative**,0.247,0.078,3.174,0.002
Appearance,0.049,0.074,0.66,0.51
Prior_Knowledge***,0.556,0.121,4.608,0.0


Model adj. R^2: 0.185, R^2: 0.200, N. obs: 214, F-statistic: 13.100
Re-running model without high leverage points


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept**,-0.25,0.09,-2.787,0.006
Mastery,0.124,0.068,1.824,0.07
Normative*,0.189,0.084,2.244,0.026
Appearance,0.023,0.081,0.288,0.773
Prior_Knowledge***,0.51,0.128,3.992,0.0


Model adj. R^2: 0.128, R^2: 0.144, N. obs: 217, F-statistic: 8.921


In [47]:
_, best_model_eq, best_models = stepwise_selection(df, achievement_goals_replication_vars,'ConfidentFundamental', check_anova_before_adding=True, display_best_model=True)

Adding Normative significantly improves the model significantly. P val: 0.0024545101677004386
Best adj R^2 before:  0.03488563109226417  Best adj R^2 after:  0.07099203501178342
models being compared: ConfidentFundamental ~ Mastery and ConfidentFundamental ~ Mastery + Normative
Adding Prior_Knowledge significantly improves the model significantly. P val: 7.936409951345132e-05
Best adj R^2 before:  0.07099203501178342  Best adj R^2 after:  0.1323075512284656
models being compared: ConfidentFundamental ~ Mastery + Normative and ConfidentFundamental ~ Mastery + Normative + Prior_Knowledge
Equation:  ConfidentFundamental ~ Mastery + Normative + Prior_Knowledge


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept**,-0.251,0.089,-2.824,0.005
Mastery,0.123,0.066,1.872,0.063
Normative**,0.202,0.066,3.063,0.002
Prior_Knowledge***,0.511,0.127,4.024,0.0


Model adj. R^2: 0.132, R^2: 0.144, N. obs: 218, F-statistic: 12.030
Checking VIF for:  ConfidentFundamental ~ Mastery + Normative + Prior_Knowledge
Regression eq:  Mastery ~ Normative + Prior_Knowledge R^2:  0.08052652964829654
Predictor:  Mastery VIF:  1.0875789593118925
Regression eq:  Normative ~ Mastery + Prior_Knowledge R^2:  0.07735959399469983
Predictor:  Normative VIF:  1.0838458769973438
Regression eq:  Prior_Knowledge ~ Mastery + Normative R^2:  0.0062533583232448064
Predictor:  Prior_Knowledge VIF:  1.0062927088867375

No variables with high VIF
Durbin-Watson test for autocorrelation: Nothing significant
Kolmogorov-Smirnov test for normality with 'norm', alternative='less': Nothing significant
Breusch-Pagan test for homoscedasticity: Nothing significant
{'outliers_abs_gt_3': array([], dtype=int64), 'outliers_abs_gt_2_5': array([  8, 112, 178, 199]), 'outliers_abs_gt_2': []}
^^^^^^^^^^^^^^^^Outliers:  [22, 274, 441, 500]
High Cook's D values:  []
Cooks D values:  []
Cooks D p

Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept**,-0.273,0.085,-3.221,0.001
Mastery,0.101,0.063,1.599,0.111
Normative***,0.277,0.064,4.336,0.0
Prior_Knowledge***,0.556,0.121,4.613,0.0


Model adj. R^2: 0.187, R^2: 0.199, N. obs: 214, F-statistic: 17.368
Re-running model without high leverage points


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept***,-0.294,0.088,-3.345,0.001
Mastery,0.083,0.068,1.218,0.225
Normative***,0.254,0.07,3.627,0.0
Prior_Knowledge***,0.56,0.126,4.451,0.0


Model adj. R^2: 0.161, R^2: 0.173, N. obs: 212, F-statistic: 14.544


In [48]:
_, best_model_eq, best_models = blockwise_selection(df, possible_blocks, base_model_eq,check_anova_before_adding=True, display_best_model=True, display_all_models=display_nonsignificant_tables)

display_anova(ols(base_model_eq, df).fit(), ols(best_model_eq, df).fit())

Equation:  ConfidentFundamental ~ Mastery+Normative+Appearance+Prior_Knowledge + Mastery:Prior_Knowledge + Normative:Prior_Knowledge + Appearance:Prior_Knowledge


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept**,-0.247,0.089,-2.785,0.006
Mastery**,0.245,0.092,2.644,0.009
Normative,0.068,0.113,0.595,0.552
Appearance,0.036,0.101,0.355,0.723
Prior_Knowledge***,0.512,0.127,4.037,0.0
Mastery:Prior_Knowledge,-0.236,0.134,-1.763,0.079
Normative:Prior_Knowledge,0.227,0.165,1.378,0.17
Appearance:Prior_Knowledge,-0.05,0.16,-0.312,0.755


Model adj. R^2: 0.134, R^2: 0.161, N. obs: 218, F-statistic: 5.778
Checking VIF for:  ConfidentFundamental ~ Mastery+Normative+Appearance+Prior_Knowledge + Mastery:Prior_Knowledge + Normative:Prior_Knowledge + Appearance:Prior_Knowledge
Regression eq:  Mastery ~ Normative + Appearance + Prior_Knowledge + Mastery:Prior_Knowledge + Normative:Prior_Knowledge + Appearance:Prior_Knowledge R^2:  0.5331363656849835
Predictor:  Mastery VIF:  2.1419530811544196
Regression eq:  Normative ~ Mastery + Appearance + Prior_Knowledge + Mastery:Prior_Knowledge + Normative:Prior_Knowledge + Appearance:Prior_Knowledge R^2:  0.6896133405868825
Predictor:  Normative VIF:  3.221787952777387
Regression eq:  Appearance ~ Mastery + Normative + Prior_Knowledge + Mastery:Prior_Knowledge + Normative:Prior_Knowledge + Appearance:Prior_Knowledge R^2:  0.6106160087940595
Predictor:  Appearance VIF:  2.5681589962210647
Regression eq:  Prior_Knowledge ~ Mastery + Normative + Appearance + Mastery:Prior_Knowledge + Norm

Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept**,-0.269,0.089,-3.016,0.003
Mastery*,0.211,0.098,2.163,0.032
Normative,0.111,0.122,0.912,0.363
Appearance,0.051,0.105,0.482,0.63
Prior_Knowledge***,0.529,0.127,4.151,0.0
Mastery:Prior_Knowledge,-0.214,0.138,-1.554,0.122
Normative:Prior_Knowledge,0.216,0.176,1.227,0.221
Appearance:Prior_Knowledge,-0.091,0.166,-0.544,0.587


Model adj. R^2: 0.141, R^2: 0.169, N. obs: 214, F-statistic: 5.987
Equation:  ConfidentFundamental ~ Mastery+Normative+Appearance+Prior_Knowledge + Mastery:Appearance + Normative:Appearance + Mastery:Normative


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept**,-0.292,0.098,-2.973,0.003
Mastery,0.097,0.073,1.337,0.183
Normative*,0.221,0.089,2.477,0.014
Appearance,0.0,0.082,0.002,0.999
Prior_Knowledge***,0.494,0.129,3.834,0.0
Mastery:Appearance,-0.022,0.081,-0.278,0.781
Normative:Appearance,0.091,0.072,1.266,0.207
Mastery:Normative,-0.007,0.082,-0.083,0.934


Model adj. R^2: 0.123, R^2: 0.151, N. obs: 218, F-statistic: 5.345
Checking VIF for:  ConfidentFundamental ~ Mastery+Normative+Appearance+Prior_Knowledge + Mastery:Appearance + Normative:Appearance + Mastery:Normative
Regression eq:  Mastery ~ Normative + Appearance + Prior_Knowledge + Mastery:Appearance + Normative:Appearance + Mastery:Normative R^2:  0.233232887786344
Predictor:  Mastery VIF:  1.3041769581287868
Regression eq:  Normative ~ Mastery + Appearance + Prior_Knowledge + Mastery:Appearance + Normative:Appearance + Mastery:Normative R^2:  0.49410354760488495
Predictor:  Normative VIF:  1.9766890937178987
Regression eq:  Appearance ~ Mastery + Normative + Prior_Knowledge + Mastery:Appearance + Normative:Appearance + Mastery:Normative R^2:  0.401247572299262
Predictor:  Appearance VIF:  1.670139366014912
Regression eq:  Prior_Knowledge ~ Mastery + Normative + Appearance + Mastery:Appearance + Normative:Appearance + Mastery:Normative R^2:  0.02439504457485142
Predictor:  Prior_K

Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept**,-0.28,0.102,-2.749,0.007
Mastery,0.095,0.079,1.21,0.228
Normative,0.187,0.095,1.957,0.052
Appearance,-0.048,0.085,-0.557,0.578
Prior_Knowledge***,0.439,0.13,3.381,0.001
Mastery:Appearance,0.061,0.096,0.635,0.526
Normative:Appearance,0.094,0.088,1.078,0.282
Mastery:Normative,0.046,0.102,0.455,0.65


Model adj. R^2: 0.106, R^2: 0.137, N. obs: 203, F-statistic: 4.430
Equation:  ConfidentFundamental ~ Mastery+Normative+Appearance+Prior_Knowledge + Mastery:Normative:Appearance + Mastery:Normative:Prior_Knowledge + Mastery:Appearance:Prior_Knowledge + Normative:Appearance:Prior_Knowledge


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept**,-0.256,0.092,-2.794,0.006
Mastery,0.121,0.077,1.584,0.115
Normative*,0.175,0.086,2.032,0.043
Appearance,0.018,0.081,0.229,0.819
Prior_Knowledge**,0.448,0.149,3.004,0.003
Mastery:Normative:Appearance,0.014,0.065,0.221,0.825
Mastery:Normative:Prior_Knowledge,0.039,0.13,0.303,0.762
Mastery:Appearance:Prior_Knowledge,0.114,0.136,0.836,0.404
Normative:Appearance:Prior_Knowledge,0.088,0.101,0.874,0.383


Model adj. R^2: 0.127, R^2: 0.159, N. obs: 218, F-statistic: 4.943
Checking VIF for:  ConfidentFundamental ~ Mastery+Normative+Appearance+Prior_Knowledge + Mastery:Normative:Appearance + Mastery:Normative:Prior_Knowledge + Mastery:Appearance:Prior_Knowledge + Normative:Appearance:Prior_Knowledge
Regression eq:  Mastery ~ Normative + Appearance + Prior_Knowledge + Mastery:Normative:Appearance + Mastery:Normative:Prior_Knowledge + Mastery:Appearance:Prior_Knowledge + Normative:Appearance:Prior_Knowledge R^2:  0.31269865250744777
Predictor:  Mastery VIF:  1.4549658656253925
Regression eq:  Normative ~ Mastery + Appearance + Prior_Knowledge + Mastery:Normative:Appearance + Mastery:Normative:Prior_Knowledge + Mastery:Appearance:Prior_Knowledge + Normative:Appearance:Prior_Knowledge R^2:  0.46004163856945535
Predictor:  Normative VIF:  1.8519946563113476
Regression eq:  Appearance ~ Mastery + Normative + Prior_Knowledge + Mastery:Normative:Appearance + Mastery:Normative:Prior_Knowledge + Mas

Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept**,-0.25,0.088,-2.844,0.005
Mastery,0.129,0.073,1.759,0.08
Normative*,0.209,0.083,2.501,0.013
Appearance,0.047,0.077,0.612,0.541
Prior_Knowledge**,0.477,0.143,3.336,0.001
Mastery:Normative:Appearance,0.001,0.064,0.021,0.983
Mastery:Normative:Prior_Knowledge,0.023,0.125,0.183,0.855
Mastery:Appearance:Prior_Knowledge,0.125,0.13,0.965,0.335
Normative:Appearance:Prior_Knowledge,0.077,0.097,0.799,0.425


Model adj. R^2: 0.167, R^2: 0.198, N. obs: 215, F-statistic: 6.347
Re-running model without high leverage points


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept*,-0.231,0.095,-2.434,0.016
Mastery,0.151,0.086,1.75,0.082
Normative,0.131,0.095,1.378,0.17
Appearance,-0.013,0.091,-0.14,0.889
Prior_Knowledge*,0.382,0.158,2.414,0.017
Mastery:Normative:Appearance,-0.061,0.102,-0.603,0.547
Mastery:Normative:Prior_Knowledge,0.114,0.153,0.742,0.459
Mastery:Appearance:Prior_Knowledge,0.168,0.18,0.931,0.353
Normative:Appearance:Prior_Knowledge,0.089,0.143,0.624,0.533


Model adj. R^2: 0.102, R^2: 0.139, N. obs: 197, F-statistic: 3.787
The best blockwise model is below: 
Equation:  ConfidentFundamental ~ Mastery+Normative+Appearance+Prior_Knowledge


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept**,-0.251,0.089,-2.82,0.005
Mastery,0.126,0.067,1.894,0.06
Normative*,0.186,0.081,2.282,0.023
Appearance,0.026,0.078,0.328,0.743
Prior_Knowledge***,0.511,0.127,4.018,0.0


Model adj. R^2: 0.129, R^2: 0.145, N. obs: 218, F-statistic: 9.011
Checking VIF for:  ConfidentFundamental ~ Mastery+Normative+Appearance+Prior_Knowledge
Regression eq:  Mastery ~ Normative + Appearance + Prior_Knowledge R^2:  0.0935767917653082
Predictor:  Mastery VIF:  1.1032374181454974
Regression eq:  Normative ~ Mastery + Appearance + Prior_Knowledge R^2:  0.39545515153732314
Predictor:  Normative VIF:  1.6541369966892334
Regression eq:  Appearance ~ Mastery + Normative + Prior_Knowledge R^2:  0.34778493857366954
Predictor:  Appearance VIF:  1.5332365950168307
Regression eq:  Prior_Knowledge ~ Mastery + Normative + Appearance R^2:  0.006328395020115396
Predictor:  Prior_Knowledge VIF:  1.0063686986610063

No variables with high VIF
Durbin-Watson test for autocorrelation: Nothing significant
Kolmogorov-Smirnov test for normality with 'norm', alternative='less': Nothing significant
Breusch-Pagan test for homoscedasticity: Nothing significant
{'outliers_abs_gt_3': array([], dtype=int

Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept**,-0.273,0.085,-3.214,0.002
Mastery,0.106,0.064,1.665,0.097
Normative**,0.247,0.078,3.174,0.002
Appearance,0.049,0.074,0.66,0.51
Prior_Knowledge***,0.556,0.121,4.608,0.0


Model adj. R^2: 0.185, R^2: 0.200, N. obs: 214, F-statistic: 13.100
Re-running model without high leverage points


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept**,-0.25,0.09,-2.787,0.006
Mastery,0.124,0.068,1.824,0.07
Normative*,0.189,0.084,2.244,0.026
Appearance,0.023,0.081,0.288,0.773
Prior_Knowledge***,0.51,0.128,3.992,0.0


Model adj. R^2: 0.128, R^2: 0.144, N. obs: 217, F-statistic: 8.921
Displaying ANOVA results:
model1:  ConfidentFundamental ~ Mastery+Normative+Appearance+Prior_Knowledge
model2:  ConfidentFundamental ~ Mastery+Normative+Appearance+Prior_Knowledge


Unnamed: 0,df_resid,ssr,df_diff,ss_diff,F,Pr(>F)
0,213.0,186.448,0.0,,,
1,213.0,186.448,-0.0,-0.0,,


F(-0.0, 213.0) = nan, p = nan


#### Confident Without Copilot 
How confident or unconfident are you that you can: [Do the tasks in CSE8A without Copilot.]

Base Model adj. R^2: 0.153, R^2: 0.167, N. obs: 251, F-statistic: 12.315

Sig: Mastery, normative, prior knowledge

None of stepwise/blockwise improved the model significantly. 

In [49]:
base_model_eq = 'ConfidentWithoutCopilot ~ Mastery+Normative+Appearance+Prior_Knowledge' 
base_model = display_model_info(base_model_eq, df)


Equation:  ConfidentWithoutCopilot ~ Mastery+Normative+Appearance+Prior_Knowledge


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept**,-0.29,0.088,-3.282,0.001
Mastery**,0.175,0.066,2.649,0.009
Normative,0.121,0.081,1.5,0.135
Appearance,0.015,0.078,0.193,0.847
Prior_Knowledge***,0.59,0.126,4.677,0.0


Model adj. R^2: 0.142, R^2: 0.158, N. obs: 218, F-statistic: 9.986
Checking VIF for:  ConfidentWithoutCopilot ~ Mastery+Normative+Appearance+Prior_Knowledge
Regression eq:  Mastery ~ Normative + Appearance + Prior_Knowledge R^2:  0.0935767917653082
Predictor:  Mastery VIF:  1.1032374181454974
Regression eq:  Normative ~ Mastery + Appearance + Prior_Knowledge R^2:  0.39545515153732314
Predictor:  Normative VIF:  1.6541369966892334
Regression eq:  Appearance ~ Mastery + Normative + Prior_Knowledge R^2:  0.34778493857366954
Predictor:  Appearance VIF:  1.5332365950168307
Regression eq:  Prior_Knowledge ~ Mastery + Normative + Appearance R^2:  0.006328395020115396
Predictor:  Prior_Knowledge VIF:  1.0063686986610063

No variables with high VIF
Durbin-Watson test for autocorrelation: Nothing significant
Kolmogorov-Smirnov test for normality with 'norm', alternative='less': Nothing significant
Breusch-Pagan test for homoscedasticity: Nothing significant
{'outliers_abs_gt_3': array([], dtype=

Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept**,-0.278,0.088,-3.145,0.002
Mastery*,0.157,0.067,2.332,0.021
Normative,0.148,0.083,1.782,0.076
Appearance,-0.008,0.08,-0.107,0.915
Prior_Knowledge***,0.579,0.126,4.59,0.0


Model adj. R^2: 0.138, R^2: 0.154, N. obs: 217, F-statistic: 9.624


In [50]:
_, best_model_eq, best_models = stepwise_selection(df, achievement_goals_replication_vars,'ConfidentWithoutCopilot', check_anova_before_adding=True, display_best_model=True)

Adding Normative significantly improves the model significantly. P val: 0.04392400751744185
Best adj R^2 before:  0.04921576192341859  Best adj R^2 after:  0.06270099440109422
models being compared: ConfidentWithoutCopilot ~ Mastery and ConfidentWithoutCopilot ~ Mastery + Normative
Adding Prior_Knowledge significantly improves the model significantly. P val: 4.9483266085044815e-06
Best adj R^2 before:  0.06270099440109422  Best adj R^2 after:  0.1459627994460958
models being compared: ConfidentWithoutCopilot ~ Mastery + Normative and ConfidentWithoutCopilot ~ Mastery + Normative + Prior_Knowledge
Adding Mastery:Appearance:Prior_Knowledge significantly improves the model significantly. P val: 0.026186557317337
Best adj R^2 before:  0.1459627994460958  Best adj R^2 after:  0.16168451860556887
models being compared: ConfidentWithoutCopilot ~ Mastery + Normative + Prior_Knowledge and ConfidentWithoutCopilot ~ Mastery + Normative + Prior_Knowledge + Mastery:Appearance:Prior_Knowledge
Equati

Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept**,-0.29,0.087,-3.316,0.001
Mastery**,0.184,0.066,2.81,0.005
Normative,0.099,0.081,1.222,0.223
Prior_Knowledge***,0.591,0.125,4.726,0.0
Mastery:Appearance:Prior_Knowledge*,0.21,0.094,2.241,0.026
Appearance,0.021,0.077,0.266,0.79


Model adj. R^2: 0.158, R^2: 0.177, N. obs: 218, F-statistic: 9.145
Checking VIF for:  ConfidentWithoutCopilot ~ Mastery + Normative + Prior_Knowledge + Mastery:Appearance:Prior_Knowledge + Appearance
Regression eq:  Mastery ~ Normative + Prior_Knowledge + Mastery:Appearance:Prior_Knowledge + Appearance R^2:  0.09720012592257754
Predictor:  Mastery VIF:  1.1076651965884543
Regression eq:  Normative ~ Mastery + Prior_Knowledge + Mastery:Appearance:Prior_Knowledge + Appearance R^2:  0.40492607952028215
Predictor:  Normative VIF:  1.6804634946761767
Regression eq:  Prior_Knowledge ~ Mastery + Normative + Mastery:Appearance:Prior_Knowledge + Appearance R^2:  0.0063333756126064955
Predictor:  Prior_Knowledge VIF:  1.0063737429205808
Regression eq:  Mastery:Appearance:Prior_Knowledge ~ Mastery + Normative + Prior_Knowledge + Appearance R^2:  0.018888216856064344
Predictor:  Mastery:Appearance:Prior_Knowledge VIF:  1.0192518499732393
Regression eq:  Appearance ~ Mastery + Normative + Prior_Kno

Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept***,-0.41,0.079,-5.2,0.0
Mastery***,0.22,0.059,3.741,0.0
Normative,0.023,0.073,0.312,0.756
Prior_Knowledge***,0.735,0.113,6.522,0.0
Mastery:Appearance:Prior_Knowledge**,0.258,0.084,3.077,0.002
Appearance,0.038,0.069,0.549,0.584


Model adj. R^2: 0.256, R^2: 0.274, N. obs: 207, F-statistic: 15.165
Re-running model without high leverage points


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept**,-0.279,0.088,-3.175,0.002
Mastery**,0.203,0.071,2.874,0.004
Normative,0.096,0.087,1.098,0.273
Prior_Knowledge***,0.534,0.13,4.12,0.0
Mastery:Appearance:Prior_Knowledge,0.198,0.13,1.531,0.127
Appearance,0.004,0.083,0.042,0.966


Model adj. R^2: 0.143, R^2: 0.164, N. obs: 205, F-statistic: 7.830


In [51]:
_, best_model_eq, best_models = blockwise_selection(df, possible_blocks, base_model_eq,check_anova_before_adding=True, display_best_model=True, display_all_models=display_nonsignificant_tables)

display_anova(ols(base_model_eq, df).fit(), ols(best_model_eq, df).fit())

Equation:  ConfidentWithoutCopilot ~ Mastery+Normative+Appearance+Prior_Knowledge + Mastery:Prior_Knowledge + Normative:Prior_Knowledge + Appearance:Prior_Knowledge


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept**,-0.284,0.089,-3.204,0.002
Mastery**,0.249,0.092,2.7,0.008
Normative,0.124,0.113,1.1,0.273
Appearance,0.02,0.101,0.194,0.847
Prior_Knowledge***,0.591,0.127,4.673,0.0
Mastery:Prior_Knowledge,-0.161,0.133,-1.206,0.229
Normative:Prior_Knowledge,-0.007,0.165,-0.044,0.965
Appearance:Prior_Knowledge,-0.019,0.16,-0.12,0.905


Model adj. R^2: 0.137, R^2: 0.165, N. obs: 218, F-statistic: 5.915
Checking VIF for:  ConfidentWithoutCopilot ~ Mastery+Normative+Appearance+Prior_Knowledge + Mastery:Prior_Knowledge + Normative:Prior_Knowledge + Appearance:Prior_Knowledge
Regression eq:  Mastery ~ Normative + Appearance + Prior_Knowledge + Mastery:Prior_Knowledge + Normative:Prior_Knowledge + Appearance:Prior_Knowledge R^2:  0.5331363656849835
Predictor:  Mastery VIF:  2.1419530811544196
Regression eq:  Normative ~ Mastery + Appearance + Prior_Knowledge + Mastery:Prior_Knowledge + Normative:Prior_Knowledge + Appearance:Prior_Knowledge R^2:  0.6896133405868825
Predictor:  Normative VIF:  3.221787952777387
Regression eq:  Appearance ~ Mastery + Normative + Prior_Knowledge + Mastery:Prior_Knowledge + Normative:Prior_Knowledge + Appearance:Prior_Knowledge R^2:  0.6106160087940595
Predictor:  Appearance VIF:  2.5681589962210647
Regression eq:  Prior_Knowledge ~ Mastery + Normative + Appearance + Mastery:Prior_Knowledge + N

Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept**,-0.273,0.09,-3.04,0.003
Mastery*,0.218,0.098,2.223,0.027
Normative,0.169,0.123,1.379,0.17
Appearance,-0.015,0.105,-0.146,0.884
Prior_Knowledge***,0.568,0.128,4.436,0.0
Mastery:Prior_Knowledge,-0.146,0.139,-1.05,0.295
Normative:Prior_Knowledge,-0.041,0.177,-0.229,0.819
Appearance:Prior_Knowledge,0.011,0.167,0.068,0.946


Model adj. R^2: 0.125, R^2: 0.154, N. obs: 214, F-statistic: 5.366
Equation:  ConfidentWithoutCopilot ~ Mastery+Normative+Appearance+Prior_Knowledge + Mastery:Appearance + Normative:Appearance + Mastery:Normative


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept***,-0.333,0.097,-3.413,0.001
Mastery*,0.178,0.072,2.467,0.014
Normative,0.112,0.089,1.262,0.208
Appearance,0.002,0.082,0.02,0.984
Prior_Knowledge***,0.594,0.128,4.649,0.0
Mastery:Appearance,0.016,0.08,0.205,0.838
Normative:Appearance,0.042,0.072,0.587,0.558
Mastery:Normative,0.056,0.082,0.684,0.495


Model adj. R^2: 0.137, R^2: 0.164, N. obs: 218, F-statistic: 5.901
Checking VIF for:  ConfidentWithoutCopilot ~ Mastery+Normative+Appearance+Prior_Knowledge + Mastery:Appearance + Normative:Appearance + Mastery:Normative
Regression eq:  Mastery ~ Normative + Appearance + Prior_Knowledge + Mastery:Appearance + Normative:Appearance + Mastery:Normative R^2:  0.233232887786344
Predictor:  Mastery VIF:  1.3041769581287868
Regression eq:  Normative ~ Mastery + Appearance + Prior_Knowledge + Mastery:Appearance + Normative:Appearance + Mastery:Normative R^2:  0.49410354760488495
Predictor:  Normative VIF:  1.9766890937178987
Regression eq:  Appearance ~ Mastery + Normative + Prior_Knowledge + Mastery:Appearance + Normative:Appearance + Mastery:Normative R^2:  0.401247572299262
Predictor:  Appearance VIF:  1.670139366014912
Regression eq:  Prior_Knowledge ~ Mastery + Normative + Appearance + Mastery:Appearance + Normative:Appearance + Mastery:Normative R^2:  0.02439504457485142
Predictor:  Prio

Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept**,-0.28,0.103,-2.728,0.007
Mastery,0.121,0.079,1.525,0.129
Normative,0.139,0.096,1.447,0.15
Appearance,-0.042,0.086,-0.483,0.629
Prior_Knowledge***,0.516,0.131,3.95,0.0
Mastery:Appearance,0.11,0.097,1.135,0.258
Normative:Appearance,0.086,0.088,0.975,0.331
Mastery:Normative,-0.011,0.103,-0.111,0.912


Model adj. R^2: 0.119, R^2: 0.150, N. obs: 203, F-statistic: 4.914
Equation:  ConfidentWithoutCopilot ~ Mastery+Normative+Appearance+Prior_Knowledge + Mastery:Normative:Appearance + Mastery:Normative:Prior_Knowledge + Mastery:Appearance:Prior_Knowledge + Normative:Appearance:Prior_Knowledge


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept**,-0.259,0.09,-2.875,0.004
Mastery**,0.228,0.075,3.037,0.003
Normative,0.113,0.085,1.328,0.186
Appearance,0.034,0.079,0.433,0.665
Prior_Knowledge***,0.508,0.146,3.466,0.001
Mastery:Normative:Appearance,-0.096,0.064,-1.504,0.134
Mastery:Normative:Prior_Knowledge,-0.018,0.128,-0.139,0.89
Mastery:Appearance:Prior_Knowledge,0.194,0.133,1.452,0.148
Normative:Appearance:Prior_Knowledge,0.101,0.099,1.013,0.312


Model adj. R^2: 0.157, R^2: 0.188, N. obs: 218, F-statistic: 6.059
Checking VIF for:  ConfidentWithoutCopilot ~ Mastery+Normative+Appearance+Prior_Knowledge + Mastery:Normative:Appearance + Mastery:Normative:Prior_Knowledge + Mastery:Appearance:Prior_Knowledge + Normative:Appearance:Prior_Knowledge
Regression eq:  Mastery ~ Normative + Appearance + Prior_Knowledge + Mastery:Normative:Appearance + Mastery:Normative:Prior_Knowledge + Mastery:Appearance:Prior_Knowledge + Normative:Appearance:Prior_Knowledge R^2:  0.31269865250744777
Predictor:  Mastery VIF:  1.4549658656253925
Regression eq:  Normative ~ Mastery + Appearance + Prior_Knowledge + Mastery:Normative:Appearance + Mastery:Normative:Prior_Knowledge + Mastery:Appearance:Prior_Knowledge + Normative:Appearance:Prior_Knowledge R^2:  0.46004163856945535
Predictor:  Normative VIF:  1.8519946563113476
Regression eq:  Appearance ~ Mastery + Normative + Prior_Knowledge + Mastery:Normative:Appearance + Mastery:Normative:Prior_Knowledge + 

Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept***,-0.298,0.087,-3.439,0.001
Mastery***,0.272,0.072,3.76,0.0
Normative,0.094,0.081,1.158,0.248
Appearance,0.055,0.076,0.727,0.468
Prior_Knowledge***,0.576,0.141,4.093,0.0
Mastery:Normative:Appearance,-0.108,0.061,-1.758,0.08
Mastery:Normative:Prior_Knowledge,-0.002,0.122,-0.018,0.986
Mastery:Appearance:Prior_Knowledge,0.192,0.127,1.51,0.133
Normative:Appearance:Prior_Knowledge,0.084,0.095,0.885,0.377


Model adj. R^2: 0.203, R^2: 0.233, N. obs: 215, F-statistic: 7.827
Re-running model without high leverage points


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept*,-0.243,0.096,-2.533,0.012
Mastery*,0.204,0.088,2.335,0.021
Normative,0.113,0.097,1.166,0.245
Appearance,0.001,0.092,0.009,0.993
Prior_Knowledge*,0.415,0.16,2.589,0.01
Mastery:Normative:Appearance,-0.071,0.103,-0.686,0.493
Mastery:Normative:Prior_Knowledge,0.062,0.155,0.4,0.69
Mastery:Appearance:Prior_Knowledge,0.157,0.182,0.859,0.391
Normative:Appearance:Prior_Knowledge,0.13,0.145,0.898,0.37


Model adj. R^2: 0.117, R^2: 0.153, N. obs: 197, F-statistic: 4.251
The best blockwise model is below: 
Equation:  ConfidentWithoutCopilot ~ Mastery+Normative+Appearance+Prior_Knowledge


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept**,-0.29,0.088,-3.282,0.001
Mastery**,0.175,0.066,2.649,0.009
Normative,0.121,0.081,1.5,0.135
Appearance,0.015,0.078,0.193,0.847
Prior_Knowledge***,0.59,0.126,4.677,0.0


Model adj. R^2: 0.142, R^2: 0.158, N. obs: 218, F-statistic: 9.986
Checking VIF for:  ConfidentWithoutCopilot ~ Mastery+Normative+Appearance+Prior_Knowledge
Regression eq:  Mastery ~ Normative + Appearance + Prior_Knowledge R^2:  0.0935767917653082
Predictor:  Mastery VIF:  1.1032374181454974
Regression eq:  Normative ~ Mastery + Appearance + Prior_Knowledge R^2:  0.39545515153732314
Predictor:  Normative VIF:  1.6541369966892334
Regression eq:  Appearance ~ Mastery + Normative + Prior_Knowledge R^2:  0.34778493857366954
Predictor:  Appearance VIF:  1.5332365950168307
Regression eq:  Prior_Knowledge ~ Mastery + Normative + Appearance R^2:  0.006328395020115396
Predictor:  Prior_Knowledge VIF:  1.0063686986610063

No variables with high VIF
Durbin-Watson test for autocorrelation: Nothing significant
Kolmogorov-Smirnov test for normality with 'norm', alternative='less': Nothing significant
Breusch-Pagan test for homoscedasticity: Nothing significant
{'outliers_abs_gt_3': array([], dtype=

Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept**,-0.278,0.088,-3.145,0.002
Mastery*,0.157,0.067,2.332,0.021
Normative,0.148,0.083,1.782,0.076
Appearance,-0.008,0.08,-0.107,0.915
Prior_Knowledge***,0.579,0.126,4.59,0.0


Model adj. R^2: 0.138, R^2: 0.154, N. obs: 217, F-statistic: 9.624
Displaying ANOVA results:
model1:  ConfidentWithoutCopilot ~ Mastery+Normative+Appearance+Prior_Knowledge
model2:  ConfidentWithoutCopilot ~ Mastery+Normative+Appearance+Prior_Knowledge


Unnamed: 0,df_resid,ssr,df_diff,ss_diff,F,Pr(>F)
0,213.0,183.574,0.0,,,
1,213.0,183.574,-0.0,-0.0,,


F(-0.0, 213.0) = nan, p = nan


#### Confident You're Learning To Code Yourself 
How confident or unconfident are you that you are learning how to write programs yourself, when using GenAI tools?

Base Model adj. R^2: 0.132, R^2: 0.146, N. obs: 251, F-statistic: 10.512

Very highly sig: Mastery,
Highly sig: Normative**, 
Sig: Prior Knowledge

None of stepwise/blockwise improved the model significantly. 


In [52]:
base_model_eq = 'ConfidentIndependentProgramming ~ Mastery+Normative+Appearance+Prior_Knowledge' 
base_model = display_model_info(base_model_eq, df)



Equation:  ConfidentIndependentProgramming ~ Mastery+Normative+Appearance+Prior_Knowledge


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept,-0.113,0.088,-1.28,0.202
Mastery***,0.265,0.066,3.997,0.0
Normative**,0.219,0.081,2.702,0.007
Appearance,-0.072,0.078,-0.922,0.358
Prior_Knowledge,0.231,0.126,1.824,0.07


Model adj. R^2: 0.138, R^2: 0.154, N. obs: 218, F-statistic: 9.712
Checking VIF for:  ConfidentIndependentProgramming ~ Mastery+Normative+Appearance+Prior_Knowledge
Regression eq:  Mastery ~ Normative + Appearance + Prior_Knowledge R^2:  0.0935767917653082
Predictor:  Mastery VIF:  1.1032374181454974
Regression eq:  Normative ~ Mastery + Appearance + Prior_Knowledge R^2:  0.39545515153732314
Predictor:  Normative VIF:  1.6541369966892334
Regression eq:  Appearance ~ Mastery + Normative + Prior_Knowledge R^2:  0.34778493857366954
Predictor:  Appearance VIF:  1.5332365950168307
Regression eq:  Prior_Knowledge ~ Mastery + Normative + Appearance R^2:  0.006328395020115396
Predictor:  Prior_Knowledge VIF:  1.0063686986610063

No variables with high VIF
Durbin-Watson test for autocorrelation: Nothing significant
Kolmogorov-Smirnov test for normality with 'norm', alternative='less': Nothing significant
Breusch-Pagan test for homoscedasticity: Nothing significant
{'outliers_abs_gt_3': array([]

Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept,-0.06,0.085,-0.713,0.476
Mastery***,0.295,0.063,4.688,0.0
Normative**,0.248,0.077,3.215,0.002
Appearance,-0.055,0.074,-0.736,0.463
Prior_Knowledge,0.2,0.121,1.653,0.1


Model adj. R^2: 0.185, R^2: 0.200, N. obs: 215, F-statistic: 13.106
Re-running model without high leverage points


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept,-0.111,0.089,-1.247,0.214
Mastery***,0.261,0.068,3.858,0.0
Normative**,0.224,0.084,2.678,0.008
Appearance,-0.076,0.08,-0.953,0.342
Prior_Knowledge,0.229,0.127,1.8,0.073


Model adj. R^2: 0.138, R^2: 0.154, N. obs: 217, F-statistic: 9.615


In [53]:
_, best_model_eq, best_models = stepwise_selection(df, achievement_goals_replication_vars,'ConfidentIndependentProgramming', check_anova_before_adding=True, display_best_model=True)

Adding Normative significantly improves the model significantly. P val: 0.007368301237156466
Best adj R^2 before:  0.10406045704076305  Best adj R^2 after:  0.12952751273256669
models being compared: ConfidentIndependentProgramming ~ Mastery and ConfidentIndependentProgramming ~ Mastery + Normative
Equation:  ConfidentIndependentProgramming ~ Mastery + Normative


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept,0.0,0.063,0.0,1.0
Mastery***,0.28,0.066,4.241,0.0
Normative**,0.178,0.066,2.705,0.007


Model adj. R^2: 0.130, R^2: 0.138, N. obs: 218, F-statistic: 17.145
Checking VIF for:  ConfidentIndependentProgramming ~ Mastery + Normative
Regression eq:  Mastery ~ Normative R^2:  0.0767072731496169
Predictor:  Mastery VIF:  1.083080122824413
Regression eq:  Normative ~ Mastery R^2:  0.07670727314961667
Predictor:  Normative VIF:  1.0830801228244127

No variables with high VIF
Durbin-Watson test for autocorrelation: Nothing significant
Kolmogorov-Smirnov test for normality with 'norm', alternative='less': Nothing significant
Breusch-Pagan test for homoscedasticity: Nothing significant
{'outliers_abs_gt_3': array([], dtype=int64), 'outliers_abs_gt_2_5': array([  8,  11, 199]), 'outliers_abs_gt_2': []}
^^^^^^^^^^^^^^^^Outliers:  [22, 31, 500]
High Cook's D values:  []
Cooks D values:  []
Cooks D p-values:  []
^^^^^^^^^^^^^^^^High leverage points:  [225, 245, 274, 334, 352, 356, 377, 438, 441, 478, 517]
Re-running model without outliers


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept,0.038,0.06,0.637,0.525
Mastery***,0.308,0.063,4.923,0.0
Normative***,0.219,0.063,3.484,0.001


Model adj. R^2: 0.180, R^2: 0.187, N. obs: 215, F-statistic: 24.425
Re-running model without high leverage points


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept,-0.021,0.065,-0.327,0.744
Mastery***,0.251,0.072,3.51,0.001
Normative**,0.231,0.076,3.046,0.003


Model adj. R^2: 0.138, R^2: 0.147, N. obs: 207, F-statistic: 17.519


In [54]:
_, best_model_eq, best_models = blockwise_selection(df, possible_blocks, base_model_eq,check_anova_before_adding=True, display_best_model=True, display_all_models=display_nonsignificant_tables)

display_anova(ols(base_model_eq, df).fit(), ols(best_model_eq, df).fit())

Equation:  ConfidentIndependentProgramming ~ Mastery+Normative+Appearance+Prior_Knowledge + Mastery:Prior_Knowledge + Normative:Prior_Knowledge + Appearance:Prior_Knowledge


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept,-0.11,0.088,-1.251,0.212
Mastery***,0.398,0.092,4.349,0.0
Normative,0.071,0.112,0.634,0.527
Appearance,-0.066,0.1,-0.655,0.513
Prior_Knowledge,0.231,0.126,1.844,0.067
Mastery:Prior_Knowledge*,-0.262,0.132,-1.98,0.049
Normative:Prior_Knowledge,0.281,0.163,1.722,0.086
Appearance:Prior_Knowledge,-0.045,0.158,-0.284,0.776


Model adj. R^2: 0.150, R^2: 0.178, N. obs: 218, F-statistic: 6.486
Checking VIF for:  ConfidentIndependentProgramming ~ Mastery+Normative+Appearance+Prior_Knowledge + Mastery:Prior_Knowledge + Normative:Prior_Knowledge + Appearance:Prior_Knowledge
Regression eq:  Mastery ~ Normative + Appearance + Prior_Knowledge + Mastery:Prior_Knowledge + Normative:Prior_Knowledge + Appearance:Prior_Knowledge R^2:  0.5331363656849835
Predictor:  Mastery VIF:  2.1419530811544196
Regression eq:  Normative ~ Mastery + Appearance + Prior_Knowledge + Mastery:Prior_Knowledge + Normative:Prior_Knowledge + Appearance:Prior_Knowledge R^2:  0.6896133405868825
Predictor:  Normative VIF:  3.221787952777387
Regression eq:  Appearance ~ Mastery + Normative + Prior_Knowledge + Mastery:Prior_Knowledge + Normative:Prior_Knowledge + Appearance:Prior_Knowledge R^2:  0.6106160087940595
Predictor:  Appearance VIF:  2.5681589962210647
Regression eq:  Prior_Knowledge ~ Mastery + Normative + Appearance + Mastery:Prior_Knowl

Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept,-0.032,0.082,-0.395,0.693
Mastery***,0.463,0.085,5.439,0.0
Normative,0.09,0.105,0.863,0.389
Appearance,-0.033,0.093,-0.35,0.726
Prior_Knowledge,0.198,0.117,1.698,0.091
Mastery:Prior_Knowledge*,-0.31,0.123,-2.534,0.012
Normative:Prior_Knowledge*,0.303,0.152,2.003,0.047
Appearance:Prior_Knowledge,-0.078,0.146,-0.537,0.592


Model adj. R^2: 0.223, R^2: 0.249, N. obs: 213, F-statistic: 9.694
Re-running model without high leverage points


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept,-0.125,0.089,-1.41,0.16
Mastery***,0.378,0.097,3.886,0.0
Normative,0.098,0.122,0.804,0.423
Appearance,-0.054,0.104,-0.515,0.607
Prior_Knowledge,0.234,0.127,1.846,0.066
Mastery:Prior_Knowledge,-0.255,0.137,-1.859,0.064
Normative:Prior_Knowledge,0.253,0.175,1.444,0.15
Appearance:Prior_Knowledge,-0.05,0.166,-0.303,0.763


Model adj. R^2: 0.142, R^2: 0.170, N. obs: 214, F-statistic: 6.039
Equation:  ConfidentIndependentProgramming ~ Mastery+Normative+Appearance+Prior_Knowledge + Mastery:Appearance + Normative:Appearance + Mastery:Normative


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept,-0.156,0.098,-1.598,0.112
Mastery**,0.241,0.072,3.334,0.001
Normative**,0.261,0.089,2.933,0.004
Appearance,-0.104,0.082,-1.273,0.205
Prior_Knowledge,0.21,0.128,1.641,0.102
Mastery:Appearance,-0.078,0.08,-0.974,0.331
Normative:Appearance,0.088,0.072,1.227,0.221
Mastery:Normative,0.025,0.082,0.308,0.758


Model adj. R^2: 0.134, R^2: 0.162, N. obs: 218, F-statistic: 5.806
Checking VIF for:  ConfidentIndependentProgramming ~ Mastery+Normative+Appearance+Prior_Knowledge + Mastery:Appearance + Normative:Appearance + Mastery:Normative
Regression eq:  Mastery ~ Normative + Appearance + Prior_Knowledge + Mastery:Appearance + Normative:Appearance + Mastery:Normative R^2:  0.233232887786344
Predictor:  Mastery VIF:  1.3041769581287868
Regression eq:  Normative ~ Mastery + Appearance + Prior_Knowledge + Mastery:Appearance + Normative:Appearance + Mastery:Normative R^2:  0.49410354760488495
Predictor:  Normative VIF:  1.9766890937178987
Regression eq:  Appearance ~ Mastery + Normative + Prior_Knowledge + Mastery:Appearance + Normative:Appearance + Mastery:Normative R^2:  0.401247572299262
Predictor:  Appearance VIF:  1.670139366014912
Regression eq:  Prior_Knowledge ~ Mastery + Normative + Appearance + Mastery:Appearance + Normative:Appearance + Mastery:Normative R^2:  0.02439504457485142
Predicto

Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept,-0.124,0.092,-1.337,0.183
Mastery***,0.272,0.068,3.98,0.0
Normative***,0.287,0.084,3.41,0.001
Appearance,-0.093,0.077,-1.201,0.231
Prior_Knowledge,0.176,0.122,1.448,0.149
Mastery:Appearance,-0.071,0.076,-0.934,0.351
Normative:Appearance,0.113,0.069,1.643,0.102
Mastery:Normative,0.053,0.078,0.686,0.494


Model adj. R^2: 0.186, R^2: 0.212, N. obs: 215, F-statistic: 7.968
Re-running model without high leverage points


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept,-0.178,0.103,-1.73,0.085
Mastery***,0.295,0.079,3.713,0.0
Normative*,0.194,0.096,2.02,0.045
Appearance,-0.118,0.086,-1.364,0.174
Prior_Knowledge,0.203,0.131,1.551,0.122
Mastery:Appearance,-0.09,0.097,-0.923,0.357
Normative:Appearance,0.023,0.088,0.262,0.794
Mastery:Normative,0.099,0.103,0.956,0.34


Model adj. R^2: 0.122, R^2: 0.152, N. obs: 203, F-statistic: 5.008
Equation:  ConfidentIndependentProgramming ~ Mastery+Normative+Appearance+Prior_Knowledge + Mastery:Normative:Appearance + Mastery:Normative:Prior_Knowledge + Mastery:Appearance:Prior_Knowledge + Normative:Appearance:Prior_Knowledge


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept,-0.116,0.091,-1.271,0.205
Mastery***,0.277,0.076,3.638,0.0
Normative*,0.193,0.086,2.245,0.026
Appearance,-0.076,0.08,-0.945,0.346
Prior_Knowledge,0.109,0.148,0.737,0.462
Mastery:Normative:Appearance,0.007,0.065,0.11,0.913
Mastery:Normative:Prior_Knowledge,0.211,0.129,1.638,0.103
Mastery:Appearance:Prior_Knowledge,-0.125,0.135,-0.928,0.354
Normative:Appearance:Prior_Knowledge,0.119,0.1,1.189,0.236


Model adj. R^2: 0.138, R^2: 0.170, N. obs: 218, F-statistic: 5.356
Checking VIF for:  ConfidentIndependentProgramming ~ Mastery+Normative+Appearance+Prior_Knowledge + Mastery:Normative:Appearance + Mastery:Normative:Prior_Knowledge + Mastery:Appearance:Prior_Knowledge + Normative:Appearance:Prior_Knowledge
Regression eq:  Mastery ~ Normative + Appearance + Prior_Knowledge + Mastery:Normative:Appearance + Mastery:Normative:Prior_Knowledge + Mastery:Appearance:Prior_Knowledge + Normative:Appearance:Prior_Knowledge R^2:  0.31269865250744777
Predictor:  Mastery VIF:  1.4549658656253925
Regression eq:  Normative ~ Mastery + Appearance + Prior_Knowledge + Mastery:Normative:Appearance + Mastery:Normative:Prior_Knowledge + Mastery:Appearance:Prior_Knowledge + Normative:Appearance:Prior_Knowledge R^2:  0.46004163856945535
Predictor:  Normative VIF:  1.8519946563113476
Regression eq:  Appearance ~ Mastery + Normative + Prior_Knowledge + Mastery:Normative:Appearance + Mastery:Normative:Prior_Know

Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept,-0.069,0.087,-0.797,0.426
Mastery***,0.299,0.072,4.138,0.0
Normative**,0.218,0.082,2.668,0.008
Appearance,-0.06,0.076,-0.794,0.428
Prior_Knowledge,0.097,0.141,0.691,0.491
Mastery:Normative:Appearance,0.027,0.062,0.428,0.669
Mastery:Normative:Prior_Knowledge,0.215,0.122,1.761,0.08
Mastery:Appearance:Prior_Knowledge,-0.119,0.128,-0.935,0.351
Normative:Appearance:Prior_Knowledge,0.095,0.095,0.999,0.319


Model adj. R^2: 0.185, R^2: 0.216, N. obs: 215, F-statistic: 7.089
Re-running model without high leverage points


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept,-0.116,0.096,-1.203,0.23
Mastery**,0.261,0.088,2.976,0.003
Normative,0.179,0.097,1.854,0.065
Appearance,-0.114,0.092,-1.245,0.215
Prior_Knowledge,0.08,0.16,0.499,0.618
Mastery:Normative:Appearance,-0.0,0.103,-0.003,0.997
Mastery:Normative:Prior_Knowledge,0.26,0.155,1.677,0.095
Mastery:Appearance:Prior_Knowledge,-0.103,0.182,-0.565,0.573
Normative:Appearance:Prior_Knowledge,0.087,0.145,0.602,0.548


Model adj. R^2: 0.112, R^2: 0.148, N. obs: 197, F-statistic: 4.093
The best blockwise model is below: 
Equation:  ConfidentIndependentProgramming ~ Mastery+Normative+Appearance+Prior_Knowledge


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept,-0.113,0.088,-1.28,0.202
Mastery***,0.265,0.066,3.997,0.0
Normative**,0.219,0.081,2.702,0.007
Appearance,-0.072,0.078,-0.922,0.358
Prior_Knowledge,0.231,0.126,1.824,0.07


Model adj. R^2: 0.138, R^2: 0.154, N. obs: 218, F-statistic: 9.712
Checking VIF for:  ConfidentIndependentProgramming ~ Mastery+Normative+Appearance+Prior_Knowledge
Regression eq:  Mastery ~ Normative + Appearance + Prior_Knowledge R^2:  0.0935767917653082
Predictor:  Mastery VIF:  1.1032374181454974
Regression eq:  Normative ~ Mastery + Appearance + Prior_Knowledge R^2:  0.39545515153732314
Predictor:  Normative VIF:  1.6541369966892334
Regression eq:  Appearance ~ Mastery + Normative + Prior_Knowledge R^2:  0.34778493857366954
Predictor:  Appearance VIF:  1.5332365950168307
Regression eq:  Prior_Knowledge ~ Mastery + Normative + Appearance R^2:  0.006328395020115396
Predictor:  Prior_Knowledge VIF:  1.0063686986610063

No variables with high VIF
Durbin-Watson test for autocorrelation: Nothing significant
Kolmogorov-Smirnov test for normality with 'norm', alternative='less': Nothing significant
Breusch-Pagan test for homoscedasticity: Nothing significant
{'outliers_abs_gt_3': array([]

Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept,-0.06,0.085,-0.713,0.476
Mastery***,0.295,0.063,4.688,0.0
Normative**,0.248,0.077,3.215,0.002
Appearance,-0.055,0.074,-0.736,0.463
Prior_Knowledge,0.2,0.121,1.653,0.1


Model adj. R^2: 0.185, R^2: 0.200, N. obs: 215, F-statistic: 13.106
Re-running model without high leverage points


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept,-0.111,0.089,-1.247,0.214
Mastery***,0.261,0.068,3.858,0.0
Normative**,0.224,0.084,2.678,0.008
Appearance,-0.076,0.08,-0.953,0.342
Prior_Knowledge,0.229,0.127,1.8,0.073


Model adj. R^2: 0.138, R^2: 0.154, N. obs: 217, F-statistic: 9.615
Displaying ANOVA results:
model1:  ConfidentIndependentProgramming ~ Mastery+Normative+Appearance+Prior_Knowledge
model2:  ConfidentIndependentProgramming ~ Mastery+Normative+Appearance+Prior_Knowledge


Unnamed: 0,df_resid,ssr,df_diff,ss_diff,F,Pr(>F)
0,213.0,184.373,0.0,,,
1,213.0,184.373,-0.0,-0.0,,


F(-0.0, 213.0) = nan, p = nan


#### Confident In Identifying Non-Copilot Problems 
How confident or unconfident are you that you can: [Identify the types of coding problems that I should be able to complete without copilot]

Very Highly Sig: Mastery, Prior Knowledge

Somewhat high R^2

Base model: Model adj. R^2: 0.166, R^2: 0.179, N. obs: 251, F-statistic: 13.407
Mastery, normative, prior knowledge are significant. 

No significant improvement from block or stepwise 


In [55]:
base_model_eq = 'ConfidentIdentifyCodingProblems ~ Mastery+Normative+Appearance+Prior_Knowledge' 
base_model = display_model_info(base_model_eq, df)


Equation:  ConfidentIdentifyCodingProblems ~ Mastery+Normative+Appearance+Prior_Knowledge


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept***,-0.315,0.087,-3.61,0.0
Mastery*,0.144,0.065,2.203,0.029
Normative,0.133,0.08,1.671,0.096
Appearance,0.068,0.077,0.881,0.379
Prior_Knowledge***,0.641,0.125,5.145,0.0


Model adj. R^2: 0.164, R^2: 0.179, N. obs: 218, F-statistic: 11.646
Checking VIF for:  ConfidentIdentifyCodingProblems ~ Mastery+Normative+Appearance+Prior_Knowledge
Regression eq:  Mastery ~ Normative + Appearance + Prior_Knowledge R^2:  0.0935767917653082
Predictor:  Mastery VIF:  1.1032374181454974
Regression eq:  Normative ~ Mastery + Appearance + Prior_Knowledge R^2:  0.39545515153732314
Predictor:  Normative VIF:  1.6541369966892334
Regression eq:  Appearance ~ Mastery + Normative + Prior_Knowledge R^2:  0.34778493857366954
Predictor:  Appearance VIF:  1.5332365950168307
Regression eq:  Prior_Knowledge ~ Mastery + Normative + Appearance R^2:  0.006328395020115396
Predictor:  Prior_Knowledge VIF:  1.0063686986610063

No variables with high VIF
Durbin-Watson test for autocorrelation: Nothing significant
Kolmogorov-Smirnov test for normality with 'norm', alternative='less': Nothing significant
Breusch-Pagan test for homoscedasticity: Nothing significant
{'outliers_abs_gt_3': array([

Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept***,-0.357,0.084,-4.248,0.0
Mastery*,0.162,0.063,2.587,0.01
Normative,0.118,0.077,1.534,0.126
Appearance,0.081,0.074,1.099,0.273
Prior_Knowledge***,0.706,0.12,5.886,0.0


Model adj. R^2: 0.202, R^2: 0.217, N. obs: 215, F-statistic: 14.514
Re-running model without high leverage points


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept***,-0.302,0.087,-3.46,0.001
Mastery,0.124,0.066,1.863,0.064
Normative*,0.163,0.082,1.993,0.048
Appearance,0.041,0.079,0.528,0.598
Prior_Knowledge***,0.629,0.124,5.052,0.0


Model adj. R^2: 0.159, R^2: 0.175, N. obs: 217, F-statistic: 11.247


In [56]:
_, best_model_eq, best_models = stepwise_selection(df, achievement_goals_replication_vars,'ConfidentIdentifyCodingProblems', check_anova_before_adding=True, display_best_model=True)

Adding Normative significantly improves the model significantly. P val: 0.0077424569393086595
Best adj R^2 before:  0.03926097177386412  Best adj R^2 after:  0.06618336335188302
models being compared: ConfidentIdentifyCodingProblems ~ Mastery and ConfidentIdentifyCodingProblems ~ Mastery + Normative
Adding Prior_Knowledge significantly improves the model significantly. P val: 6.176215337794052e-07
Best adj R^2 before:  0.06618336335188302  Best adj R^2 after:  0.16492508627464963
models being compared: ConfidentIdentifyCodingProblems ~ Mastery + Normative and ConfidentIdentifyCodingProblems ~ Mastery + Normative + Prior_Knowledge
Equation:  ConfidentIdentifyCodingProblems ~ Mastery + Normative + Prior_Knowledge


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept***,-0.314,0.087,-3.607,0.0
Mastery*,0.137,0.065,2.115,0.036
Normative**,0.175,0.065,2.705,0.007
Prior_Knowledge***,0.64,0.124,5.14,0.0


Model adj. R^2: 0.165, R^2: 0.176, N. obs: 218, F-statistic: 15.286
Checking VIF for:  ConfidentIdentifyCodingProblems ~ Mastery + Normative + Prior_Knowledge
Regression eq:  Mastery ~ Normative + Prior_Knowledge R^2:  0.08052652964829654
Predictor:  Mastery VIF:  1.0875789593118925
Regression eq:  Normative ~ Mastery + Prior_Knowledge R^2:  0.07735959399469983
Predictor:  Normative VIF:  1.0838458769973438
Regression eq:  Prior_Knowledge ~ Mastery + Normative R^2:  0.0062533583232448064
Predictor:  Prior_Knowledge VIF:  1.0062927088867375

No variables with high VIF
Durbin-Watson test for autocorrelation: Nothing significant
Kolmogorov-Smirnov test for normality with 'norm', alternative='less': Nothing significant
Breusch-Pagan test for homoscedasticity: Nothing significant
{'outliers_abs_gt_3': array([], dtype=int64), 'outliers_abs_gt_2_5': array([ 8, 16, 62]), 'outliers_abs_gt_2': []}
^^^^^^^^^^^^^^^^Outliers:  [22, 51, 176]
High Cook's D values:  []
Cooks D values:  []
Cooks D p-va

Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept***,-0.356,0.084,-4.234,0.0
Mastery*,0.153,0.062,2.469,0.014
Normative**,0.167,0.062,2.701,0.007
Prior_Knowledge***,0.704,0.12,5.87,0.0


Model adj. R^2: 0.201, R^2: 0.212, N. obs: 215, F-statistic: 18.932
Re-running model without high leverage points


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept***,-0.312,0.088,-3.541,0.0
Mastery,0.094,0.068,1.379,0.169
Normative**,0.224,0.07,3.187,0.002
Prior_Knowledge***,0.623,0.126,4.946,0.0


Model adj. R^2: 0.167, R^2: 0.179, N. obs: 212, F-statistic: 15.149


In [57]:
_, best_model_eq, best_models = blockwise_selection(df, possible_blocks, base_model_eq,check_anova_before_adding=True, display_best_model=True, display_all_models=display_nonsignificant_tables)

display_anova(ols(base_model_eq, df).fit(), ols(best_model_eq, df).fit())

Equation:  ConfidentIdentifyCodingProblems ~ Mastery+Normative+Appearance+Prior_Knowledge + Mastery:Prior_Knowledge + Normative:Prior_Knowledge + Appearance:Prior_Knowledge


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept***,-0.306,0.087,-3.518,0.001
Mastery*,0.231,0.091,2.555,0.011
Normative,0.169,0.111,1.517,0.131
Appearance,0.097,0.099,0.977,0.33
Prior_Knowledge***,0.643,0.124,5.174,0.0
Mastery:Prior_Knowledge,-0.202,0.131,-1.541,0.125
Normative:Prior_Knowledge,-0.058,0.162,-0.36,0.719
Appearance:Prior_Knowledge,-0.078,0.157,-0.496,0.621


Model adj. R^2: 0.168, R^2: 0.195, N. obs: 218, F-statistic: 7.276
Checking VIF for:  ConfidentIdentifyCodingProblems ~ Mastery+Normative+Appearance+Prior_Knowledge + Mastery:Prior_Knowledge + Normative:Prior_Knowledge + Appearance:Prior_Knowledge
Regression eq:  Mastery ~ Normative + Appearance + Prior_Knowledge + Mastery:Prior_Knowledge + Normative:Prior_Knowledge + Appearance:Prior_Knowledge R^2:  0.5331363656849835
Predictor:  Mastery VIF:  2.1419530811544196
Regression eq:  Normative ~ Mastery + Appearance + Prior_Knowledge + Mastery:Prior_Knowledge + Normative:Prior_Knowledge + Appearance:Prior_Knowledge R^2:  0.6896133405868825
Predictor:  Normative VIF:  3.221787952777387
Regression eq:  Appearance ~ Mastery + Normative + Prior_Knowledge + Mastery:Prior_Knowledge + Normative:Prior_Knowledge + Appearance:Prior_Knowledge R^2:  0.6106160087940595
Predictor:  Appearance VIF:  2.5681589962210647
Regression eq:  Prior_Knowledge ~ Mastery + Normative + Appearance + Mastery:Prior_Knowl

Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept***,-0.305,0.084,-3.632,0.0
Mastery**,0.263,0.087,3.024,0.003
Normative,0.148,0.107,1.387,0.167
Appearance,0.163,0.096,1.695,0.092
Prior_Knowledge***,0.662,0.12,5.534,0.0
Mastery:Prior_Knowledge,-0.213,0.126,-1.692,0.092
Normative:Prior_Knowledge,-0.031,0.155,-0.2,0.842
Appearance:Prior_Knowledge,-0.143,0.151,-0.95,0.343


Model adj. R^2: 0.209, R^2: 0.235, N. obs: 215, F-statistic: 9.098
Re-running model without high leverage points


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept**,-0.293,0.088,-3.321,0.001
Mastery*,0.2,0.096,2.073,0.039
Normative,0.215,0.121,1.783,0.076
Appearance,0.058,0.104,0.559,0.577
Prior_Knowledge***,0.62,0.126,4.934,0.0
Mastery:Prior_Knowledge,-0.182,0.136,-1.336,0.183
Normative:Prior_Knowledge,-0.094,0.174,-0.541,0.589
Appearance:Prior_Knowledge,-0.044,0.164,-0.266,0.791


Model adj. R^2: 0.156, R^2: 0.184, N. obs: 214, F-statistic: 6.638
Equation:  ConfidentIdentifyCodingProblems ~ Mastery+Normative+Appearance+Prior_Knowledge + Mastery:Appearance + Normative:Appearance + Mastery:Normative


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept***,-0.323,0.096,-3.345,0.001
Mastery,0.139,0.071,1.944,0.053
Normative,0.128,0.088,1.459,0.146
Appearance,0.07,0.081,0.864,0.388
Prior_Knowledge***,0.644,0.126,5.092,0.0
Mastery:Appearance,0.05,0.079,0.63,0.53
Normative:Appearance,0.014,0.071,0.201,0.841
Mastery:Normative,-0.018,0.081,-0.224,0.823


Model adj. R^2: 0.154, R^2: 0.182, N. obs: 218, F-statistic: 6.661
Checking VIF for:  ConfidentIdentifyCodingProblems ~ Mastery+Normative+Appearance+Prior_Knowledge + Mastery:Appearance + Normative:Appearance + Mastery:Normative
Regression eq:  Mastery ~ Normative + Appearance + Prior_Knowledge + Mastery:Appearance + Normative:Appearance + Mastery:Normative R^2:  0.233232887786344
Predictor:  Mastery VIF:  1.3041769581287868
Regression eq:  Normative ~ Mastery + Appearance + Prior_Knowledge + Mastery:Appearance + Normative:Appearance + Mastery:Normative R^2:  0.49410354760488495
Predictor:  Normative VIF:  1.9766890937178987
Regression eq:  Appearance ~ Mastery + Normative + Prior_Knowledge + Mastery:Appearance + Normative:Appearance + Mastery:Normative R^2:  0.401247572299262
Predictor:  Appearance VIF:  1.670139366014912
Regression eq:  Prior_Knowledge ~ Mastery + Normative + Appearance + Mastery:Appearance + Normative:Appearance + Mastery:Normative R^2:  0.02439504457485142
Predicto

Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept***,-0.384,0.092,-4.174,0.0
Mastery*,0.169,0.067,2.501,0.013
Normative,0.114,0.083,1.369,0.172
Appearance,0.09,0.076,1.183,0.238
Prior_Knowledge***,0.732,0.12,6.094,0.0
Mastery:Appearance,0.048,0.075,0.646,0.519
Normative:Appearance,0.029,0.067,0.435,0.664
Mastery:Normative,0.016,0.077,0.213,0.832


Model adj. R^2: 0.213, R^2: 0.239, N. obs: 214, F-statistic: 9.252
Re-running model without high leverage points


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept*,-0.236,0.103,-2.3,0.023
Mastery,0.044,0.079,0.559,0.577
Normative*,0.201,0.096,2.09,0.038
Appearance,0.033,0.086,0.387,0.699
Prior_Knowledge***,0.565,0.131,4.329,0.0
Mastery:Appearance,0.182,0.097,1.882,0.061
Normative:Appearance,0.021,0.088,0.236,0.814
Mastery:Normative,-0.142,0.103,-1.378,0.17


Model adj. R^2: 0.139, R^2: 0.169, N. obs: 203, F-statistic: 5.656
Equation:  ConfidentIdentifyCodingProblems ~ Mastery+Normative+Appearance+Prior_Knowledge + Mastery:Normative:Appearance + Mastery:Normative:Prior_Knowledge + Mastery:Appearance:Prior_Knowledge + Normative:Appearance:Prior_Knowledge


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept**,-0.283,0.089,-3.179,0.002
Mastery*,0.184,0.075,2.465,0.014
Normative,0.147,0.084,1.753,0.081
Appearance,0.084,0.079,1.065,0.288
Prior_Knowledge***,0.603,0.145,4.155,0.0
Mastery:Normative:Appearance,-0.096,0.063,-1.512,0.132
Mastery:Normative:Prior_Knowledge,-0.132,0.127,-1.046,0.297
Mastery:Appearance:Prior_Knowledge,0.224,0.132,1.697,0.091
Normative:Appearance:Prior_Knowledge,0.069,0.098,0.697,0.486


Model adj. R^2: 0.172, R^2: 0.202, N. obs: 218, F-statistic: 6.622
Checking VIF for:  ConfidentIdentifyCodingProblems ~ Mastery+Normative+Appearance+Prior_Knowledge + Mastery:Normative:Appearance + Mastery:Normative:Prior_Knowledge + Mastery:Appearance:Prior_Knowledge + Normative:Appearance:Prior_Knowledge
Regression eq:  Mastery ~ Normative + Appearance + Prior_Knowledge + Mastery:Normative:Appearance + Mastery:Normative:Prior_Knowledge + Mastery:Appearance:Prior_Knowledge + Normative:Appearance:Prior_Knowledge R^2:  0.31269865250744777
Predictor:  Mastery VIF:  1.4549658656253925
Regression eq:  Normative ~ Mastery + Appearance + Prior_Knowledge + Mastery:Normative:Appearance + Mastery:Normative:Prior_Knowledge + Mastery:Appearance:Prior_Knowledge + Normative:Appearance:Prior_Knowledge R^2:  0.46004163856945535
Predictor:  Normative VIF:  1.8519946563113476
Regression eq:  Appearance ~ Mastery + Normative + Prior_Knowledge + Mastery:Normative:Appearance + Mastery:Normative:Prior_Know

Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept***,-0.307,0.085,-3.608,0.0
Mastery**,0.21,0.071,2.944,0.004
Normative,0.133,0.08,1.656,0.099
Appearance,0.114,0.075,1.516,0.131
Prior_Knowledge***,0.65,0.139,4.682,0.0
Mastery:Normative:Appearance,-0.086,0.061,-1.416,0.158
Mastery:Normative:Prior_Knowledge,-0.101,0.121,-0.837,0.403
Mastery:Appearance:Prior_Knowledge*,0.252,0.126,1.994,0.047
Normative:Appearance:Prior_Knowledge,0.092,0.095,0.973,0.332


Model adj. R^2: 0.226, R^2: 0.255, N. obs: 215, F-statistic: 8.794
Re-running model without high leverage points


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept*,-0.234,0.093,-2.509,0.013
Mastery,0.167,0.085,1.958,0.052
Normative,0.149,0.094,1.581,0.115
Appearance,0.108,0.089,1.211,0.227
Prior_Knowledge**,0.491,0.156,3.149,0.002
Mastery:Normative:Appearance,-0.166,0.1,-1.661,0.098
Mastery:Normative:Prior_Knowledge,-0.011,0.151,-0.07,0.944
Mastery:Appearance:Prior_Knowledge,0.188,0.177,1.063,0.289
Normative:Appearance:Prior_Knowledge,0.196,0.141,1.393,0.165


Model adj. R^2: 0.165, R^2: 0.199, N. obs: 197, F-statistic: 5.850
The best blockwise model is below: 
Equation:  ConfidentIdentifyCodingProblems ~ Mastery+Normative+Appearance+Prior_Knowledge


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept***,-0.315,0.087,-3.61,0.0
Mastery*,0.144,0.065,2.203,0.029
Normative,0.133,0.08,1.671,0.096
Appearance,0.068,0.077,0.881,0.379
Prior_Knowledge***,0.641,0.125,5.145,0.0


Model adj. R^2: 0.164, R^2: 0.179, N. obs: 218, F-statistic: 11.646
Checking VIF for:  ConfidentIdentifyCodingProblems ~ Mastery+Normative+Appearance+Prior_Knowledge
Regression eq:  Mastery ~ Normative + Appearance + Prior_Knowledge R^2:  0.0935767917653082
Predictor:  Mastery VIF:  1.1032374181454974
Regression eq:  Normative ~ Mastery + Appearance + Prior_Knowledge R^2:  0.39545515153732314
Predictor:  Normative VIF:  1.6541369966892334
Regression eq:  Appearance ~ Mastery + Normative + Prior_Knowledge R^2:  0.34778493857366954
Predictor:  Appearance VIF:  1.5332365950168307
Regression eq:  Prior_Knowledge ~ Mastery + Normative + Appearance R^2:  0.006328395020115396
Predictor:  Prior_Knowledge VIF:  1.0063686986610063

No variables with high VIF
Durbin-Watson test for autocorrelation: Nothing significant
Kolmogorov-Smirnov test for normality with 'norm', alternative='less': Nothing significant
Breusch-Pagan test for homoscedasticity: Nothing significant
{'outliers_abs_gt_3': array([

Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept***,-0.357,0.084,-4.248,0.0
Mastery*,0.162,0.063,2.587,0.01
Normative,0.118,0.077,1.534,0.126
Appearance,0.081,0.074,1.099,0.273
Prior_Knowledge***,0.706,0.12,5.886,0.0


Model adj. R^2: 0.202, R^2: 0.217, N. obs: 215, F-statistic: 14.514
Re-running model without high leverage points


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept***,-0.302,0.087,-3.46,0.001
Mastery,0.124,0.066,1.863,0.064
Normative*,0.163,0.082,1.993,0.048
Appearance,0.041,0.079,0.528,0.598
Prior_Knowledge***,0.629,0.124,5.052,0.0


Model adj. R^2: 0.159, R^2: 0.175, N. obs: 217, F-statistic: 11.247
Displaying ANOVA results:
model1:  ConfidentIdentifyCodingProblems ~ Mastery+Normative+Appearance+Prior_Knowledge
model2:  ConfidentIdentifyCodingProblems ~ Mastery+Normative+Appearance+Prior_Knowledge


Unnamed: 0,df_resid,ssr,df_diff,ss_diff,F,Pr(>F)
0,213.0,178.878,0.0,,,
1,213.0,178.878,-0.0,-0.0,,


F(-0.0, 213.0) = nan, p = nan


### Understanding Copilot Output

decent r^2


#### Confident In Recognizing & Understanding Copilot Output 

How confident are you that you can recognize and understand copilot output? Somewhat high R^2

Base model: Model adj. R^2: 0.137, R^2: 0.150, N. obs: 251, F-statistic: 10.882

Significant: Mastery, normative, prior knowledge are 

No significant improvement from block or stepwise 


In [58]:
base_model_eq = 'ConfidentRecognizeCopilotOutput ~ Mastery+Normative+Appearance+Prior_Knowledge' 
base_model = display_model_info(base_model_eq, df)

Equation:  ConfidentRecognizeCopilotOutput ~ Mastery+Normative+Appearance+Prior_Knowledge


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept*,-0.231,0.09,-2.582,0.01
Mastery**,0.195,0.067,2.91,0.004
Normative,0.154,0.082,1.874,0.062
Appearance,-0.094,0.079,-1.189,0.236
Prior_Knowledge***,0.471,0.128,3.68,0.0


Model adj. R^2: 0.117, R^2: 0.133, N. obs: 218, F-statistic: 8.163
Checking VIF for:  ConfidentRecognizeCopilotOutput ~ Mastery+Normative+Appearance+Prior_Knowledge
Regression eq:  Mastery ~ Normative + Appearance + Prior_Knowledge R^2:  0.0935767917653082
Predictor:  Mastery VIF:  1.1032374181454974
Regression eq:  Normative ~ Mastery + Appearance + Prior_Knowledge R^2:  0.39545515153732314
Predictor:  Normative VIF:  1.6541369966892334
Regression eq:  Appearance ~ Mastery + Normative + Prior_Knowledge R^2:  0.34778493857366954
Predictor:  Appearance VIF:  1.5332365950168307
Regression eq:  Prior_Knowledge ~ Mastery + Normative + Appearance R^2:  0.006328395020115396
Predictor:  Prior_Knowledge VIF:  1.0063686986610063

No variables with high VIF
Durbin-Watson test for autocorrelation: Nothing significant
Kolmogorov-Smirnov test for normality with 'norm', alternative='less': Nothing significant
Breusch-Pagan test for homoscedasticity: Nothing significant
{'outliers_abs_gt_3': array([8

Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept**,-0.23,0.088,-2.629,0.009
Mastery**,0.207,0.066,3.158,0.002
Normative*,0.158,0.08,1.972,0.05
Appearance,-0.094,0.077,-1.216,0.225
Prior_Knowledge***,0.497,0.125,3.966,0.0


Model adj. R^2: 0.135, R^2: 0.151, N. obs: 217, F-statistic: 9.396
Re-running model without high leverage points


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept*,-0.227,0.09,-2.521,0.012
Mastery**,0.189,0.069,2.752,0.006
Normative,0.163,0.085,1.929,0.055
Appearance,-0.102,0.081,-1.262,0.208
Prior_Knowledge***,0.467,0.129,3.635,0.0


Model adj. R^2: 0.115, R^2: 0.132, N. obs: 217, F-statistic: 8.043


In [59]:
_, best_model_eq, best_models = stepwise_selection(df, achievement_goals_replication_vars,'ConfidentRecognizeCopilotOutput', check_anova_before_adding=True, display_best_model=True)

Adding Prior_Knowledge significantly improves the model significantly. P val: 0.00025687020626823234
Best adj R^2 before:  0.05758004994908783  Best adj R^2 after:  0.11037338386461137
models being compared: ConfidentRecognizeCopilotOutput ~ Mastery and ConfidentRecognizeCopilotOutput ~ Mastery + Prior_Knowledge
Adding Mastery:Prior_Knowledge significantly improves the model significantly. P val: 0.029082072104099414
Best adj R^2 before:  0.11037338386461137  Best adj R^2 after:  0.1259338511991639
models being compared: ConfidentRecognizeCopilotOutput ~ Mastery + Prior_Knowledge and ConfidentRecognizeCopilotOutput ~ Mastery + Prior_Knowledge + Mastery:Prior_Knowledge
Equation:  ConfidentRecognizeCopilotOutput ~ Mastery + Prior_Knowledge + Mastery:Prior_Knowledge


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept*,-0.225,0.089,-2.521,0.012
Mastery***,0.361,0.087,4.155,0.0
Prior_Knowledge***,0.479,0.127,3.765,0.0
Mastery:Prior_Knowledge*,-0.28,0.128,-2.197,0.029


Model adj. R^2: 0.126, R^2: 0.138, N. obs: 218, F-statistic: 11.422
Checking VIF for:  ConfidentRecognizeCopilotOutput ~ Mastery + Prior_Knowledge + Mastery:Prior_Knowledge
Regression eq:  Mastery ~ Prior_Knowledge + Mastery:Prior_Knowledge R^2:  0.46706751784461775
Predictor:  Mastery VIF:  1.8764103024000687
Regression eq:  Prior_Knowledge ~ Mastery + Mastery:Prior_Knowledge R^2:  0.005595868566575679
Predictor:  Prior_Knowledge VIF:  1.00562735852526
Regression eq:  Mastery:Prior_Knowledge ~ Mastery + Prior_Knowledge R^2:  0.4657586729378038
Predictor:  Mastery:Prior_Knowledge VIF:  1.8718132599344572

No variables with high VIF
Durbin-Watson test for autocorrelation: Nothing significant
^^^^^^^^^^^^^^^^Kolmogorov-Smirnov test for normality: Significant after power transformation
KstestResult(statistic=0.13877615815731154, pvalue=0.000199931779566, statistic_location=-0.08919308215957691, statistic_sign=-1)
Breusch-Pagan test for homoscedasticity: Nothing significant
{'outliers_abs_

Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept*,-0.225,0.087,-2.572,0.011
Mastery***,0.361,0.085,4.238,0.0
Prior_Knowledge***,0.504,0.125,4.032,0.0
Mastery:Prior_Knowledge*,-0.253,0.125,-2.017,0.045


Model adj. R^2: 0.139, R^2: 0.151, N. obs: 217, F-statistic: 12.647
Re-running model without high leverage points


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept*,-0.233,0.092,-2.536,0.012
Mastery***,0.377,0.094,4.009,0.0
Prior_Knowledge***,0.517,0.132,3.926,0.0
Mastery:Prior_Knowledge*,-0.347,0.139,-2.5,0.013


Model adj. R^2: 0.122, R^2: 0.135, N. obs: 210, F-statistic: 10.707


In [60]:
_, best_model_eq, best_models = blockwise_selection(df, possible_blocks, base_model_eq,check_anova_before_adding=True, display_best_model=True, display_all_models=display_nonsignificant_tables)

display_anova(ols(base_model_eq, df).fit(), ols(best_model_eq, df).fit())

Equation:  ConfidentRecognizeCopilotOutput ~ Mastery+Normative+Appearance+Prior_Knowledge + Mastery:Prior_Knowledge + Normative:Prior_Knowledge + Appearance:Prior_Knowledge


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept*,-0.224,0.089,-2.512,0.013
Mastery***,0.351,0.093,3.788,0.0
Normative,0.079,0.114,0.698,0.486
Appearance,-0.126,0.101,-1.245,0.215
Prior_Knowledge***,0.473,0.127,3.721,0.0
Mastery:Prior_Knowledge*,-0.314,0.134,-2.342,0.02
Normative:Prior_Knowledge,0.122,0.165,0.736,0.463
Appearance:Prior_Knowledge,0.054,0.16,0.335,0.738


Model adj. R^2: 0.130, R^2: 0.158, N. obs: 218, F-statistic: 5.613
Checking VIF for:  ConfidentRecognizeCopilotOutput ~ Mastery+Normative+Appearance+Prior_Knowledge + Mastery:Prior_Knowledge + Normative:Prior_Knowledge + Appearance:Prior_Knowledge
Regression eq:  Mastery ~ Normative + Appearance + Prior_Knowledge + Mastery:Prior_Knowledge + Normative:Prior_Knowledge + Appearance:Prior_Knowledge R^2:  0.5331363656849835
Predictor:  Mastery VIF:  2.1419530811544196
Regression eq:  Normative ~ Mastery + Appearance + Prior_Knowledge + Mastery:Prior_Knowledge + Normative:Prior_Knowledge + Appearance:Prior_Knowledge R^2:  0.6896133405868825
Predictor:  Normative VIF:  3.221787952777387
Regression eq:  Appearance ~ Mastery + Normative + Prior_Knowledge + Mastery:Prior_Knowledge + Normative:Prior_Knowledge + Appearance:Prior_Knowledge R^2:  0.6106160087940595
Predictor:  Appearance VIF:  2.5681589962210647
Regression eq:  Prior_Knowledge ~ Mastery + Normative + Appearance + Mastery:Prior_Knowl

Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept*,-0.217,0.084,-2.586,0.01
Mastery***,0.384,0.087,4.409,0.0
Normative,0.101,0.107,0.944,0.346
Appearance,-0.112,0.096,-1.163,0.246
Prior_Knowledge***,0.514,0.12,4.285,0.0
Mastery:Prior_Knowledge*,-0.322,0.126,-2.559,0.011
Normative:Prior_Knowledge,0.118,0.155,0.759,0.448
Appearance:Prior_Knowledge,0.047,0.151,0.308,0.758


Model adj. R^2: 0.180, R^2: 0.207, N. obs: 214, F-statistic: 7.670
Re-running model without high leverage points


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept**,-0.237,0.09,-2.629,0.009
Mastery**,0.319,0.099,3.231,0.001
Normative,0.123,0.123,1.002,0.318
Appearance,-0.125,0.106,-1.179,0.24
Prior_Knowledge***,0.487,0.129,3.789,0.0
Mastery:Prior_Knowledge*,-0.285,0.139,-2.049,0.042
Normative:Prior_Knowledge,0.103,0.178,0.578,0.564
Appearance:Prior_Knowledge,0.03,0.168,0.179,0.858


Model adj. R^2: 0.128, R^2: 0.157, N. obs: 214, F-statistic: 5.476
Equation:  ConfidentRecognizeCopilotOutput ~ Mastery+Normative+Appearance+Prior_Knowledge + Mastery:Appearance + Normative:Appearance + Mastery:Normative


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept,-0.17,0.099,-1.729,0.085
Mastery*,0.186,0.073,2.556,0.011
Normative,0.152,0.09,1.692,0.092
Appearance,-0.064,0.082,-0.774,0.44
Prior_Knowledge***,0.474,0.129,3.667,0.0
Mastery:Appearance,0.074,0.081,0.909,0.364
Normative:Appearance,-0.057,0.072,-0.785,0.433
Mastery:Normative,-0.123,0.083,-1.486,0.139


Model adj. R^2: 0.116, R^2: 0.145, N. obs: 218, F-statistic: 5.077
Checking VIF for:  ConfidentRecognizeCopilotOutput ~ Mastery+Normative+Appearance+Prior_Knowledge + Mastery:Appearance + Normative:Appearance + Mastery:Normative
Regression eq:  Mastery ~ Normative + Appearance + Prior_Knowledge + Mastery:Appearance + Normative:Appearance + Mastery:Normative R^2:  0.233232887786344
Predictor:  Mastery VIF:  1.3041769581287868
Regression eq:  Normative ~ Mastery + Appearance + Prior_Knowledge + Mastery:Appearance + Normative:Appearance + Mastery:Normative R^2:  0.49410354760488495
Predictor:  Normative VIF:  1.9766890937178987
Regression eq:  Appearance ~ Mastery + Normative + Prior_Knowledge + Mastery:Appearance + Normative:Appearance + Mastery:Normative R^2:  0.401247572299262
Predictor:  Appearance VIF:  1.670139366014912
Regression eq:  Prior_Knowledge ~ Mastery + Normative + Appearance + Mastery:Appearance + Normative:Appearance + Mastery:Normative R^2:  0.02439504457485142
Predicto

Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept,-0.154,0.094,-1.643,0.102
Mastery**,0.207,0.069,2.983,0.003
Normative,0.16,0.085,1.883,0.061
Appearance,-0.039,0.078,-0.498,0.619
Prior_Knowledge***,0.499,0.124,4.037,0.0
Mastery:Appearance,0.095,0.077,1.229,0.221
Normative:Appearance,-0.046,0.07,-0.663,0.508
Mastery:Normative,-0.121,0.079,-1.542,0.125


Model adj. R^2: 0.151, R^2: 0.179, N. obs: 215, F-statistic: 6.443
Re-running model without high leverage points


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept,-0.131,0.104,-1.263,0.208
Mastery*,0.17,0.08,2.119,0.035
Normative,0.156,0.097,1.613,0.108
Appearance,-0.074,0.087,-0.857,0.393
Prior_Knowledge***,0.456,0.132,3.458,0.001
Mastery:Appearance*,0.213,0.098,2.178,0.031
Normative:Appearance,-0.076,0.089,-0.854,0.394
Mastery:Normative,-0.15,0.104,-1.449,0.149


Model adj. R^2: 0.124, R^2: 0.154, N. obs: 203, F-statistic: 5.068
Equation:  ConfidentRecognizeCopilotOutput ~ Mastery+Normative+Appearance+Prior_Knowledge + Mastery:Normative:Appearance + Mastery:Normative:Prior_Knowledge + Mastery:Appearance:Prior_Knowledge + Normative:Appearance:Prior_Knowledge


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept*,-0.222,0.093,-2.401,0.017
Mastery**,0.217,0.077,2.808,0.005
Normative,0.138,0.087,1.58,0.116
Appearance,-0.083,0.082,-1.016,0.311
Prior_Knowledge**,0.467,0.151,3.095,0.002
Mastery:Normative:Appearance,-0.027,0.066,-0.411,0.682
Mastery:Normative:Prior_Knowledge,0.014,0.132,0.106,0.916
Mastery:Appearance:Prior_Knowledge,0.092,0.137,0.668,0.505
Normative:Appearance:Prior_Knowledge,-0.006,0.102,-0.058,0.954


Model adj. R^2: 0.106, R^2: 0.139, N. obs: 218, F-statistic: 4.205
Checking VIF for:  ConfidentRecognizeCopilotOutput ~ Mastery+Normative+Appearance+Prior_Knowledge + Mastery:Normative:Appearance + Mastery:Normative:Prior_Knowledge + Mastery:Appearance:Prior_Knowledge + Normative:Appearance:Prior_Knowledge
Regression eq:  Mastery ~ Normative + Appearance + Prior_Knowledge + Mastery:Normative:Appearance + Mastery:Normative:Prior_Knowledge + Mastery:Appearance:Prior_Knowledge + Normative:Appearance:Prior_Knowledge R^2:  0.31269865250744777
Predictor:  Mastery VIF:  1.4549658656253925
Regression eq:  Normative ~ Mastery + Appearance + Prior_Knowledge + Mastery:Normative:Appearance + Mastery:Normative:Prior_Knowledge + Mastery:Appearance:Prior_Knowledge + Normative:Appearance:Prior_Knowledge R^2:  0.46004163856945535
Predictor:  Normative VIF:  1.8519946563113476
Regression eq:  Appearance ~ Mastery + Normative + Prior_Knowledge + Mastery:Normative:Appearance + Mastery:Normative:Prior_Know

Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept*,-0.22,0.09,-2.429,0.016
Mastery**,0.236,0.076,3.108,0.002
Normative,0.137,0.085,1.608,0.109
Appearance,-0.079,0.08,-0.994,0.322
Prior_Knowledge***,0.502,0.148,3.401,0.001
Mastery:Normative:Appearance,-0.032,0.064,-0.493,0.623
Mastery:Normative:Prior_Knowledge,0.024,0.129,0.183,0.855
Mastery:Appearance:Prior_Knowledge,0.095,0.134,0.71,0.479
Normative:Appearance:Prior_Knowledge,-0.023,0.1,-0.235,0.815


Model adj. R^2: 0.126, R^2: 0.158, N. obs: 217, F-statistic: 4.885
Re-running model without high leverage points


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept,-0.171,0.096,-1.782,0.076
Mastery**,0.25,0.088,2.851,0.005
Normative,0.095,0.097,0.986,0.326
Appearance,-0.048,0.092,-0.526,0.599
Prior_Knowledge*,0.395,0.161,2.458,0.015
Mastery:Normative:Appearance,-0.156,0.103,-1.51,0.133
Mastery:Normative:Prior_Knowledge,0.108,0.155,0.698,0.486
Mastery:Appearance:Prior_Knowledge,0.076,0.182,0.419,0.676
Normative:Appearance:Prior_Knowledge,0.092,0.145,0.635,0.527


Model adj. R^2: 0.110, R^2: 0.147, N. obs: 197, F-statistic: 4.036
The best blockwise model is below: 
Equation:  ConfidentRecognizeCopilotOutput ~ Mastery+Normative+Appearance+Prior_Knowledge


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept*,-0.231,0.09,-2.582,0.01
Mastery**,0.195,0.067,2.91,0.004
Normative,0.154,0.082,1.874,0.062
Appearance,-0.094,0.079,-1.189,0.236
Prior_Knowledge***,0.471,0.128,3.68,0.0


Model adj. R^2: 0.117, R^2: 0.133, N. obs: 218, F-statistic: 8.163
Checking VIF for:  ConfidentRecognizeCopilotOutput ~ Mastery+Normative+Appearance+Prior_Knowledge
Regression eq:  Mastery ~ Normative + Appearance + Prior_Knowledge R^2:  0.0935767917653082
Predictor:  Mastery VIF:  1.1032374181454974
Regression eq:  Normative ~ Mastery + Appearance + Prior_Knowledge R^2:  0.39545515153732314
Predictor:  Normative VIF:  1.6541369966892334
Regression eq:  Appearance ~ Mastery + Normative + Prior_Knowledge R^2:  0.34778493857366954
Predictor:  Appearance VIF:  1.5332365950168307
Regression eq:  Prior_Knowledge ~ Mastery + Normative + Appearance R^2:  0.006328395020115396
Predictor:  Prior_Knowledge VIF:  1.0063686986610063

No variables with high VIF
Durbin-Watson test for autocorrelation: Nothing significant
Kolmogorov-Smirnov test for normality with 'norm', alternative='less': Nothing significant
Breusch-Pagan test for homoscedasticity: Nothing significant
{'outliers_abs_gt_3': array([8

Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept**,-0.23,0.088,-2.629,0.009
Mastery**,0.207,0.066,3.158,0.002
Normative*,0.158,0.08,1.972,0.05
Appearance,-0.094,0.077,-1.216,0.225
Prior_Knowledge***,0.497,0.125,3.966,0.0


Model adj. R^2: 0.135, R^2: 0.151, N. obs: 217, F-statistic: 9.396
Re-running model without high leverage points


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept*,-0.227,0.09,-2.521,0.012
Mastery**,0.189,0.069,2.752,0.006
Normative,0.163,0.085,1.929,0.055
Appearance,-0.102,0.081,-1.262,0.208
Prior_Knowledge***,0.467,0.129,3.635,0.0


Model adj. R^2: 0.115, R^2: 0.132, N. obs: 217, F-statistic: 8.043
Displaying ANOVA results:
model1:  ConfidentRecognizeCopilotOutput ~ Mastery+Normative+Appearance+Prior_Knowledge
model2:  ConfidentRecognizeCopilotOutput ~ Mastery+Normative+Appearance+Prior_Knowledge


Unnamed: 0,df_resid,ssr,df_diff,ss_diff,F,Pr(>F)
0,213.0,189.025,0.0,,,
1,213.0,189.025,-0.0,-0.0,,


F(-0.0, 213.0) = nan, p = nan


#### Percentage Recognize copilot output 

How often do you recognize copilot output? 

Base model mastery and prior knowledge is significant. Model adj. R^2: 0.094, R^2: 0.108, N. obs: 251, F-statistic: 7.460

significant: mastery, PK 

no significant improvements from block or stepwise 

In [61]:
base_model_eq = 'PercentageRecognizeCopilotOutput ~ Mastery+Normative+Appearance+Prior_Knowledge' 
base_model = display_model_info(base_model_eq, df)





Equation:  PercentageRecognizeCopilotOutput ~ Mastery+Normative+Appearance+Prior_Knowledge


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept,-0.116,0.091,-1.271,0.205
Mastery***,0.259,0.068,3.795,0.0
Normative,0.087,0.083,1.042,0.298
Appearance,-0.024,0.08,-0.294,0.769
Prior_Knowledge,0.236,0.13,1.812,0.071


Model adj. R^2: 0.087, R^2: 0.104, N. obs: 218, F-statistic: 6.151
Checking VIF for:  PercentageRecognizeCopilotOutput ~ Mastery+Normative+Appearance+Prior_Knowledge
Regression eq:  Mastery ~ Normative + Appearance + Prior_Knowledge R^2:  0.0935767917653082
Predictor:  Mastery VIF:  1.1032374181454974
Regression eq:  Normative ~ Mastery + Appearance + Prior_Knowledge R^2:  0.39545515153732314
Predictor:  Normative VIF:  1.6541369966892334
Regression eq:  Appearance ~ Mastery + Normative + Prior_Knowledge R^2:  0.34778493857366954
Predictor:  Appearance VIF:  1.5332365950168307
Regression eq:  Prior_Knowledge ~ Mastery + Normative + Appearance R^2:  0.006328395020115396
Predictor:  Prior_Knowledge VIF:  1.0063686986610063

No variables with high VIF
Durbin-Watson test for autocorrelation: Nothing significant
Kolmogorov-Smirnov test for normality with 'norm', alternative='less': Nothing significant
Breusch-Pagan test for homoscedasticity: Nothing significant
{'outliers_abs_gt_3': array([

Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept,-0.087,0.08,-1.086,0.279
Mastery***,0.327,0.06,5.461,0.0
Normative,0.041,0.073,0.557,0.578
Appearance,0.023,0.07,0.327,0.744
Prior_Knowledge*,0.248,0.113,2.189,0.03


Model adj. R^2: 0.162, R^2: 0.178, N. obs: 207, F-statistic: 10.930
Re-running model without high leverage points


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept,-0.117,0.092,-1.275,0.204
Mastery***,0.26,0.07,3.736,0.0
Normative,0.084,0.086,0.978,0.329
Appearance,-0.021,0.083,-0.258,0.797
Prior_Knowledge,0.237,0.131,1.812,0.071


Model adj. R^2: 0.086, R^2: 0.103, N. obs: 217, F-statistic: 6.086


In [62]:
_, best_model_eq, best_models = stepwise_selection(df, achievement_goals_replication_vars,'PercentageRecognizeCopilotOutput', check_anova_before_adding=True, display_best_model=True)


Equation:  PercentageRecognizeCopilotOutput ~ Mastery


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept,0.0,0.065,0.0,1.0
Mastery***,0.29,0.065,4.451,0.0


Model adj. R^2: 0.080, R^2: 0.084, N. obs: 218, F-statistic: 19.815
Checking VIF for:  PercentageRecognizeCopilotOutput ~ Mastery

No variables with high VIF
Durbin-Watson test for autocorrelation: Nothing significant
Kolmogorov-Smirnov test for normality with 'norm', alternative='less': Nothing significant
Breusch-Pagan test for homoscedasticity: Nothing significant
{'outliers_abs_gt_3': array([77]), 'outliers_abs_gt_2_5': array([ 39,  77, 148, 199, 200]), 'outliers_abs_gt_2': array([  1,   8,  10,  39,  77, 121, 135, 148, 199, 200, 202])}
^^^^^^^^^^^^^^^^Outliers:  [6, 22, 26, 116, 214, 287, 334, 365, 500, 502, 508]
High Cook's D values:  []
Cooks D values:  []
Cooks D p-values:  []
^^^^^^^^^^^^^^^^High leverage points:  [0, 23, 177, 356, 444, 492, 495, 508, 544]
Re-running model without outliers


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept,0.034,0.057,0.596,0.552
Mastery***,0.374,0.057,6.532,0.0


Model adj. R^2: 0.168, R^2: 0.172, N. obs: 207, F-statistic: 42.668
Re-running model without high leverage points


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept,0.022,0.067,0.326,0.745
Mastery***,0.249,0.071,3.503,0.001


Model adj. R^2: 0.051, R^2: 0.056, N. obs: 209, F-statistic: 12.272


In [63]:
_, best_model_eq, best_models = blockwise_selection(df, possible_blocks, base_model_eq,check_anova_before_adding=True, display_best_model=True, display_all_models=display_nonsignificant_tables)

display_anova(ols(base_model_eq, df).fit(), ols(best_model_eq, df).fit())

Equation:  PercentageRecognizeCopilotOutput ~ Mastery+Normative+Appearance+Prior_Knowledge + Mastery:Prior_Knowledge + Normative:Prior_Knowledge + Appearance:Prior_Knowledge


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept,-0.114,0.092,-1.242,0.216
Mastery*,0.235,0.095,2.462,0.015
Normative,0.176,0.117,1.506,0.134
Appearance,-0.04,0.104,-0.382,0.703
Prior_Knowledge,0.236,0.131,1.808,0.072
Mastery:Prior_Knowledge,0.039,0.138,0.285,0.776
Normative:Prior_Knowledge,-0.178,0.17,-1.045,0.297
Appearance:Prior_Knowledge,0.051,0.165,0.309,0.757


Model adj. R^2: 0.079, R^2: 0.109, N. obs: 218, F-statistic: 3.664
Checking VIF for:  PercentageRecognizeCopilotOutput ~ Mastery+Normative+Appearance+Prior_Knowledge + Mastery:Prior_Knowledge + Normative:Prior_Knowledge + Appearance:Prior_Knowledge
Regression eq:  Mastery ~ Normative + Appearance + Prior_Knowledge + Mastery:Prior_Knowledge + Normative:Prior_Knowledge + Appearance:Prior_Knowledge R^2:  0.5331363656849835
Predictor:  Mastery VIF:  2.1419530811544196
Regression eq:  Normative ~ Mastery + Appearance + Prior_Knowledge + Mastery:Prior_Knowledge + Normative:Prior_Knowledge + Appearance:Prior_Knowledge R^2:  0.6896133405868825
Predictor:  Normative VIF:  3.221787952777387
Regression eq:  Appearance ~ Mastery + Normative + Prior_Knowledge + Mastery:Prior_Knowledge + Normative:Prior_Knowledge + Appearance:Prior_Knowledge R^2:  0.6106160087940595
Predictor:  Appearance VIF:  2.5681589962210647
Regression eq:  Prior_Knowledge ~ Mastery + Normative + Appearance + Mastery:Prior_Know

Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept,-0.04,0.08,-0.497,0.62
Mastery***,0.343,0.084,4.084,0.0
Normative,0.158,0.101,1.555,0.122
Appearance,0.033,0.09,0.373,0.709
Prior_Knowledge,0.202,0.113,1.78,0.077
Mastery:Prior_Knowledge,-0.023,0.12,-0.19,0.85
Normative:Prior_Knowledge,-0.152,0.147,-1.033,0.303
Appearance:Prior_Knowledge,-0.036,0.142,-0.255,0.799


Model adj. R^2: 0.173, R^2: 0.201, N. obs: 207, F-statistic: 7.150
Re-running model without high leverage points


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept,-0.122,0.093,-1.316,0.19
Mastery*,0.212,0.102,2.089,0.038
Normative,0.207,0.127,1.631,0.104
Appearance,-0.04,0.109,-0.368,0.713
Prior_Knowledge,0.232,0.132,1.752,0.081
Mastery:Prior_Knowledge,0.053,0.143,0.372,0.71
Normative:Prior_Knowledge,-0.235,0.183,-1.286,0.2
Appearance:Prior_Knowledge,0.08,0.173,0.464,0.643


Model adj. R^2: 0.075, R^2: 0.105, N. obs: 214, F-statistic: 3.458
Equation:  PercentageRecognizeCopilotOutput ~ Mastery+Normative+Appearance+Prior_Knowledge + Mastery:Appearance + Normative:Appearance + Mastery:Normative


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept,-0.109,0.101,-1.08,0.281
Mastery**,0.246,0.075,3.298,0.001
Normative,0.104,0.092,1.133,0.259
Appearance,-0.025,0.084,-0.301,0.764
Prior_Knowledge,0.228,0.132,1.725,0.086
Mastery:Appearance,-0.01,0.083,-0.115,0.909
Normative:Appearance,0.013,0.074,0.17,0.865
Mastery:Normative,-0.035,0.085,-0.414,0.679


Model adj. R^2: 0.075, R^2: 0.105, N. obs: 218, F-statistic: 3.517
Checking VIF for:  PercentageRecognizeCopilotOutput ~ Mastery+Normative+Appearance+Prior_Knowledge + Mastery:Appearance + Normative:Appearance + Mastery:Normative
Regression eq:  Mastery ~ Normative + Appearance + Prior_Knowledge + Mastery:Appearance + Normative:Appearance + Mastery:Normative R^2:  0.233232887786344
Predictor:  Mastery VIF:  1.3041769581287868
Regression eq:  Normative ~ Mastery + Appearance + Prior_Knowledge + Mastery:Appearance + Normative:Appearance + Mastery:Normative R^2:  0.49410354760488495
Predictor:  Normative VIF:  1.9766890937178987
Regression eq:  Appearance ~ Mastery + Normative + Prior_Knowledge + Mastery:Appearance + Normative:Appearance + Mastery:Normative R^2:  0.401247572299262
Predictor:  Appearance VIF:  1.670139366014912
Regression eq:  Prior_Knowledge ~ Mastery + Normative + Appearance + Mastery:Appearance + Normative:Appearance + Mastery:Normative R^2:  0.02439504457485142
Predict

Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept,-0.032,0.087,-0.371,0.711
Mastery***,0.342,0.066,5.162,0.0
Normative,0.044,0.081,0.541,0.589
Appearance,0.058,0.072,0.806,0.421
Prior_Knowledge*,0.233,0.114,2.049,0.042
Mastery:Appearance,0.063,0.071,0.878,0.381
Normative:Appearance,-0.063,0.066,-0.96,0.338
Mastery:Normative,-0.089,0.075,-1.189,0.236


Model adj. R^2: 0.173, R^2: 0.201, N. obs: 206, F-statistic: 7.131
Re-running model without high leverage points


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept,-0.048,0.107,-0.447,0.655
Mastery**,0.244,0.082,2.96,0.003
Normative,0.121,0.1,1.216,0.225
Appearance,-0.012,0.089,-0.137,0.892
Prior_Knowledge,0.166,0.136,1.222,0.223
Mastery:Appearance,0.065,0.101,0.646,0.519
Normative:Appearance,-0.082,0.092,-0.896,0.371
Mastery:Normative,-0.081,0.107,-0.763,0.447


Model adj. R^2: 0.068, R^2: 0.100, N. obs: 203, F-statistic: 3.091
Equation:  PercentageRecognizeCopilotOutput ~ Mastery+Normative+Appearance+Prior_Knowledge + Mastery:Normative:Appearance + Mastery:Normative:Prior_Knowledge + Mastery:Appearance:Prior_Knowledge + Normative:Appearance:Prior_Knowledge


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept,-0.09,0.094,-0.958,0.339
Mastery***,0.296,0.079,3.763,0.0
Normative,0.099,0.089,1.111,0.268
Appearance,-0.014,0.083,-0.168,0.866
Prior_Knowledge,0.144,0.153,0.943,0.347
Mastery:Normative:Appearance,-0.079,0.067,-1.178,0.24
Mastery:Normative:Prior_Knowledge,0.026,0.133,0.198,0.843
Mastery:Appearance:Prior_Knowledge,-0.088,0.139,-0.632,0.528
Normative:Appearance:Prior_Knowledge,0.105,0.104,1.008,0.314


Model adj. R^2: 0.079, R^2: 0.113, N. obs: 218, F-statistic: 3.333
Checking VIF for:  PercentageRecognizeCopilotOutput ~ Mastery+Normative+Appearance+Prior_Knowledge + Mastery:Normative:Appearance + Mastery:Normative:Prior_Knowledge + Mastery:Appearance:Prior_Knowledge + Normative:Appearance:Prior_Knowledge
Regression eq:  Mastery ~ Normative + Appearance + Prior_Knowledge + Mastery:Normative:Appearance + Mastery:Normative:Prior_Knowledge + Mastery:Appearance:Prior_Knowledge + Normative:Appearance:Prior_Knowledge R^2:  0.31269865250744777
Predictor:  Mastery VIF:  1.4549658656253925
Regression eq:  Normative ~ Mastery + Appearance + Prior_Knowledge + Mastery:Normative:Appearance + Mastery:Normative:Prior_Knowledge + Mastery:Appearance:Prior_Knowledge + Normative:Appearance:Prior_Knowledge R^2:  0.46004163856945535
Predictor:  Normative VIF:  1.8519946563113476
Regression eq:  Appearance ~ Mastery + Normative + Prior_Knowledge + Mastery:Normative:Appearance + Mastery:Normative:Prior_Kno

Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept,-0.059,0.093,-0.635,0.526
Mastery***,0.315,0.077,4.076,0.0
Normative,0.104,0.087,1.197,0.233
Appearance,-0.012,0.081,-0.144,0.886
Prior_Knowledge,0.112,0.15,0.745,0.457
Mastery:Normative:Appearance,-0.09,0.066,-1.369,0.173
Mastery:Normative:Prior_Knowledge,0.024,0.131,0.186,0.852
Mastery:Appearance:Prior_Knowledge,-0.089,0.137,-0.651,0.516
Normative:Appearance:Prior_Knowledge,0.107,0.102,1.052,0.294


Model adj. R^2: 0.090, R^2: 0.124, N. obs: 217, F-statistic: 3.683
Re-running model without high leverage points


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept,-0.081,0.099,-0.817,0.415
Mastery**,0.281,0.091,3.099,0.002
Normative,0.112,0.1,1.116,0.266
Appearance,0.011,0.095,0.121,0.904
Prior_Knowledge,0.19,0.166,1.146,0.253
Mastery:Normative:Appearance,-0.125,0.107,-1.169,0.244
Mastery:Normative:Prior_Knowledge,0.034,0.161,0.21,0.834
Mastery:Appearance:Prior_Knowledge,-0.035,0.189,-0.186,0.852
Normative:Appearance:Prior_Knowledge,-0.035,0.15,-0.236,0.813


Model adj. R^2: 0.058, R^2: 0.097, N. obs: 197, F-statistic: 2.518
The best blockwise model is below: 
Equation:  PercentageRecognizeCopilotOutput ~ Mastery+Normative+Appearance+Prior_Knowledge


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept,-0.116,0.091,-1.271,0.205
Mastery***,0.259,0.068,3.795,0.0
Normative,0.087,0.083,1.042,0.298
Appearance,-0.024,0.08,-0.294,0.769
Prior_Knowledge,0.236,0.13,1.812,0.071


Model adj. R^2: 0.087, R^2: 0.104, N. obs: 218, F-statistic: 6.151
Checking VIF for:  PercentageRecognizeCopilotOutput ~ Mastery+Normative+Appearance+Prior_Knowledge
Regression eq:  Mastery ~ Normative + Appearance + Prior_Knowledge R^2:  0.0935767917653082
Predictor:  Mastery VIF:  1.1032374181454974
Regression eq:  Normative ~ Mastery + Appearance + Prior_Knowledge R^2:  0.39545515153732314
Predictor:  Normative VIF:  1.6541369966892334
Regression eq:  Appearance ~ Mastery + Normative + Prior_Knowledge R^2:  0.34778493857366954
Predictor:  Appearance VIF:  1.5332365950168307
Regression eq:  Prior_Knowledge ~ Mastery + Normative + Appearance R^2:  0.006328395020115396
Predictor:  Prior_Knowledge VIF:  1.0063686986610063

No variables with high VIF
Durbin-Watson test for autocorrelation: Nothing significant
Kolmogorov-Smirnov test for normality with 'norm', alternative='less': Nothing significant
Breusch-Pagan test for homoscedasticity: Nothing significant
{'outliers_abs_gt_3': array([

Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept,-0.087,0.08,-1.086,0.279
Mastery***,0.327,0.06,5.461,0.0
Normative,0.041,0.073,0.557,0.578
Appearance,0.023,0.07,0.327,0.744
Prior_Knowledge*,0.248,0.113,2.189,0.03


Model adj. R^2: 0.162, R^2: 0.178, N. obs: 207, F-statistic: 10.930
Re-running model without high leverage points


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept,-0.117,0.092,-1.275,0.204
Mastery***,0.26,0.07,3.736,0.0
Normative,0.084,0.086,0.978,0.329
Appearance,-0.021,0.083,-0.258,0.797
Prior_Knowledge,0.237,0.131,1.812,0.071


Model adj. R^2: 0.086, R^2: 0.103, N. obs: 217, F-statistic: 6.086
Displaying ANOVA results:
model1:  PercentageRecognizeCopilotOutput ~ Mastery+Normative+Appearance+Prior_Knowledge
model2:  PercentageRecognizeCopilotOutput ~ Mastery+Normative+Appearance+Prior_Knowledge


Unnamed: 0,df_resid,ssr,df_diff,ss_diff,F,Pr(>F)
0,213.0,195.425,0.0,,,
1,213.0,195.425,-0.0,-0.0,,


F(-0.0, 213.0) = nan, p = nan


### Reading & Testing Copilot Output

Not very high r2


#### Test Code 
How often do you test the code copilot outputs?

Base model mastery is significant. Model adj. R^2: 0.032, R^2: 0.048, N. obs: 251, F-statistic: 3.076

Sig: Mastery

--- --- --- --- --- 


Threeway block 

Mastery:Normative:Prior_Knowledge*	5.727	2.557	2.239	0.026
Mastery:Appearance:Prior_Knowledge**	-8.885	3.080	-2.885	0.004 

are significant, Mastery is Not significant 

In [64]:
base_model_eq = 'EndQ_TestCode ~ Mastery+Normative+Appearance+Prior_Knowledge' 
base_model = display_model_info(base_model_eq, df)

Equation:  EndQ_TestCode ~ Mastery+Normative+Appearance+Prior_Knowledge


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept,-0.027,0.094,-0.289,0.773
Mastery,0.124,0.071,1.75,0.082
Normative,0.124,0.087,1.439,0.152
Appearance,-0.12,0.083,-1.442,0.151
Prior_Knowledge,0.056,0.135,0.412,0.681


Model adj. R^2: 0.018, R^2: 0.036, N. obs: 218, F-statistic: 1.981
Checking VIF for:  EndQ_TestCode ~ Mastery+Normative+Appearance+Prior_Knowledge
Regression eq:  Mastery ~ Normative + Appearance + Prior_Knowledge R^2:  0.0935767917653082
Predictor:  Mastery VIF:  1.1032374181454974
Regression eq:  Normative ~ Mastery + Appearance + Prior_Knowledge R^2:  0.39545515153732314
Predictor:  Normative VIF:  1.6541369966892334
Regression eq:  Appearance ~ Mastery + Normative + Prior_Knowledge R^2:  0.34778493857366954
Predictor:  Appearance VIF:  1.5332365950168307
Regression eq:  Prior_Knowledge ~ Mastery + Normative + Appearance R^2:  0.006328395020115396
Predictor:  Prior_Knowledge VIF:  1.0063686986610063

No variables with high VIF
Durbin-Watson test for autocorrelation: Nothing significant
Kolmogorov-Smirnov test for normality with 'norm', alternative='less': Nothing significant
Breusch-Pagan test for homoscedasticity: Nothing significant
{'outliers_abs_gt_3': array([], dtype=int64), 'o

Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept,-0.042,0.095,-0.442,0.659
Mastery*,0.147,0.072,2.041,0.042
Normative,0.09,0.089,1.016,0.311
Appearance,-0.09,0.085,-1.06,0.29
Prior_Knowledge,0.07,0.135,0.516,0.606


Model adj. R^2: 0.018, R^2: 0.036, N. obs: 217, F-statistic: 2.003


In [65]:
_, best_model_eq, best_models = stepwise_selection(df, achievement_goals_replication_vars,'EndQ_TestCode', check_anova_before_adding=True, display_best_model=True)

Equation:  EndQ_TestCode ~ Mastery


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept,0.0,0.067,0.0,1.0
Mastery*,0.152,0.067,2.262,0.025


Model adj. R^2: 0.019, R^2: 0.023, N. obs: 218, F-statistic: 5.116
Checking VIF for:  EndQ_TestCode ~ Mastery

No variables with high VIF
Durbin-Watson test for autocorrelation: Nothing significant
^^^^^^^^^^^^^^^^Kolmogorov-Smirnov test for normality: Significant after power transformation
KstestResult(statistic=0.09188615733047367, pvalue=0.023618327904223718, statistic_location=1.4197611575795954, statistic_sign=-1)
Breusch-Pagan test for homoscedasticity: Nothing significant
{'outliers_abs_gt_3': array([], dtype=int64), 'outliers_abs_gt_2_5': [], 'outliers_abs_gt_2': []}
^^^^^^^^^^^^^^^^Outliers:  []
High Cook's D values:  []
Cooks D values:  []
Cooks D p-values:  []
^^^^^^^^^^^^^^^^High leverage points:  [0, 23, 177, 356, 444, 492, 495, 508, 544]
Re-running model without high leverage points


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept,0.016,0.068,0.24,0.811
Mastery,0.123,0.072,1.702,0.09


Model adj. R^2: 0.009, R^2: 0.014, N. obs: 209, F-statistic: 2.898


In [66]:
# Run the blockwise selection
_, best_model_eq, best_models = blockwise_selection(df, possible_blocks, base_model_eq,check_anova_before_adding=True, display_best_model=True, display_all_models=display_nonsignificant_tables)

display_anova(ols(base_model_eq, df).fit(), ols(best_model_eq, df).fit())

Equation:  EndQ_TestCode ~ Mastery+Normative+Appearance+Prior_Knowledge + Mastery:Prior_Knowledge + Normative:Prior_Knowledge + Appearance:Prior_Knowledge


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept,-0.02,0.095,-0.215,0.83
Mastery*,0.205,0.099,2.081,0.039
Normative,0.137,0.121,1.13,0.26
Appearance,-0.084,0.108,-0.774,0.44
Prior_Knowledge,0.057,0.135,0.424,0.672
Mastery:Prior_Knowledge,-0.186,0.142,-1.305,0.193
Normative:Prior_Knowledge,-0.01,0.176,-0.058,0.954
Appearance:Prior_Knowledge,-0.097,0.17,-0.57,0.57


Model adj. R^2: 0.016, R^2: 0.048, N. obs: 218, F-statistic: 1.505
Checking VIF for:  EndQ_TestCode ~ Mastery+Normative+Appearance+Prior_Knowledge + Mastery:Prior_Knowledge + Normative:Prior_Knowledge + Appearance:Prior_Knowledge
Regression eq:  Mastery ~ Normative + Appearance + Prior_Knowledge + Mastery:Prior_Knowledge + Normative:Prior_Knowledge + Appearance:Prior_Knowledge R^2:  0.5331363656849835
Predictor:  Mastery VIF:  2.1419530811544196
Regression eq:  Normative ~ Mastery + Appearance + Prior_Knowledge + Mastery:Prior_Knowledge + Normative:Prior_Knowledge + Appearance:Prior_Knowledge R^2:  0.6896133405868825
Predictor:  Normative VIF:  3.221787952777387
Regression eq:  Appearance ~ Mastery + Normative + Prior_Knowledge + Mastery:Prior_Knowledge + Normative:Prior_Knowledge + Appearance:Prior_Knowledge R^2:  0.6106160087940595
Predictor:  Appearance VIF:  2.5681589962210647
Regression eq:  Prior_Knowledge ~ Mastery + Normative + Appearance + Mastery:Prior_Knowledge + Normative:P

Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept,-0.04,0.095,-0.423,0.672
Mastery*,0.253,0.104,2.447,0.015
Normative,0.066,0.129,0.511,0.61
Appearance,-0.025,0.111,-0.227,0.82
Prior_Knowledge,0.083,0.135,0.613,0.54
Mastery:Prior_Knowledge,-0.208,0.146,-1.426,0.155
Normative:Prior_Knowledge,-0.038,0.187,-0.205,0.838
Appearance:Prior_Knowledge,-0.073,0.176,-0.414,0.679


Model adj. R^2: 0.016, R^2: 0.049, N. obs: 214, F-statistic: 1.502
Equation:  EndQ_TestCode ~ Mastery+Normative+Appearance+Prior_Knowledge + Mastery:Appearance + Normative:Appearance + Mastery:Normative


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept,-0.05,0.105,-0.479,0.633
Mastery,0.108,0.077,1.392,0.165
Normative,0.142,0.095,1.492,0.137
Appearance,-0.133,0.087,-1.521,0.13
Prior_Knowledge,0.047,0.137,0.344,0.731
Mastery:Appearance,-0.003,0.086,-0.03,0.976
Normative:Appearance,0.05,0.077,0.651,0.516
Mastery:Normative,-0.007,0.088,-0.082,0.935


Model adj. R^2: 0.006, R^2: 0.038, N. obs: 218, F-statistic: 1.182
Checking VIF for:  EndQ_TestCode ~ Mastery+Normative+Appearance+Prior_Knowledge + Mastery:Appearance + Normative:Appearance + Mastery:Normative
Regression eq:  Mastery ~ Normative + Appearance + Prior_Knowledge + Mastery:Appearance + Normative:Appearance + Mastery:Normative R^2:  0.233232887786344
Predictor:  Mastery VIF:  1.3041769581287868
Regression eq:  Normative ~ Mastery + Appearance + Prior_Knowledge + Mastery:Appearance + Normative:Appearance + Mastery:Normative R^2:  0.49410354760488495
Predictor:  Normative VIF:  1.9766890937178987
Regression eq:  Appearance ~ Mastery + Normative + Prior_Knowledge + Mastery:Appearance + Normative:Appearance + Mastery:Normative R^2:  0.401247572299262
Predictor:  Appearance VIF:  1.670139366014912
Regression eq:  Prior_Knowledge ~ Mastery + Normative + Appearance + Mastery:Appearance + Normative:Appearance + Mastery:Normative R^2:  0.02439504457485142
Predictor:  Prior_Knowledg

Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept,-0.028,0.109,-0.254,0.8
Mastery,0.152,0.084,1.808,0.072
Normative,0.078,0.102,0.768,0.443
Appearance,-0.094,0.091,-1.034,0.302
Prior_Knowledge,0.056,0.138,0.402,0.688
Mastery:Appearance,0.005,0.103,0.049,0.961
Normative:Appearance,-0.049,0.093,-0.522,0.603
Mastery:Normative,-0.018,0.109,-0.164,0.87


Model adj. R^2: 0.003, R^2: 0.037, N. obs: 203, F-statistic: 1.073
Equation:  EndQ_TestCode ~ Mastery+Normative+Appearance+Prior_Knowledge + Mastery:Normative:Appearance + Mastery:Normative:Prior_Knowledge + Mastery:Appearance:Prior_Knowledge + Normative:Appearance:Prior_Knowledge


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept,-0.034,0.097,-0.347,0.729
Mastery,0.116,0.081,1.427,0.155
Normative,0.126,0.091,1.382,0.169
Appearance,-0.134,0.085,-1.571,0.118
Prior_Knowledge,-0.058,0.158,-0.365,0.716
Mastery:Normative:Appearance,0.02,0.069,0.288,0.773
Mastery:Normative:Prior_Knowledge,0.178,0.138,1.294,0.197
Mastery:Appearance:Prior_Knowledge*,-0.297,0.144,-2.069,0.04
Normative:Appearance:Prior_Knowledge,0.12,0.107,1.126,0.261


Model adj. R^2: 0.023, R^2: 0.059, N. obs: 218, F-statistic: 1.626
Checking VIF for:  EndQ_TestCode ~ Mastery+Normative+Appearance+Prior_Knowledge + Mastery:Normative:Appearance + Mastery:Normative:Prior_Knowledge + Mastery:Appearance:Prior_Knowledge + Normative:Appearance:Prior_Knowledge
Regression eq:  Mastery ~ Normative + Appearance + Prior_Knowledge + Mastery:Normative:Appearance + Mastery:Normative:Prior_Knowledge + Mastery:Appearance:Prior_Knowledge + Normative:Appearance:Prior_Knowledge R^2:  0.31269865250744777
Predictor:  Mastery VIF:  1.4549658656253925
Regression eq:  Normative ~ Mastery + Appearance + Prior_Knowledge + Mastery:Normative:Appearance + Mastery:Normative:Prior_Knowledge + Mastery:Appearance:Prior_Knowledge + Normative:Appearance:Prior_Knowledge R^2:  0.46004163856945535
Predictor:  Normative VIF:  1.8519946563113476
Regression eq:  Appearance ~ Mastery + Normative + Prior_Knowledge + Mastery:Normative:Appearance + Mastery:Normative:Prior_Knowledge + Mastery:Ap

Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept,-0.074,0.099,-0.75,0.454
Mastery,0.095,0.09,1.049,0.295
Normative,0.149,0.1,1.493,0.137
Appearance,-0.08,0.095,-0.846,0.399
Prior_Knowledge,0.063,0.166,0.379,0.705
Mastery:Normative:Appearance,0.074,0.106,0.692,0.489
Mastery:Normative:Prior_Knowledge,0.067,0.16,0.421,0.675
Mastery:Appearance:Prior_Knowledge,-0.258,0.188,-1.374,0.171
Normative:Appearance:Prior_Knowledge,0.013,0.149,0.088,0.93


Model adj. R^2: 0.013, R^2: 0.053, N. obs: 197, F-statistic: 1.321
The best blockwise model is below: 
Equation:  EndQ_TestCode ~ Mastery+Normative+Appearance+Prior_Knowledge


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept,-0.027,0.094,-0.289,0.773
Mastery,0.124,0.071,1.75,0.082
Normative,0.124,0.087,1.439,0.152
Appearance,-0.12,0.083,-1.442,0.151
Prior_Knowledge,0.056,0.135,0.412,0.681


Model adj. R^2: 0.018, R^2: 0.036, N. obs: 218, F-statistic: 1.981
Checking VIF for:  EndQ_TestCode ~ Mastery+Normative+Appearance+Prior_Knowledge
Regression eq:  Mastery ~ Normative + Appearance + Prior_Knowledge R^2:  0.0935767917653082
Predictor:  Mastery VIF:  1.1032374181454974
Regression eq:  Normative ~ Mastery + Appearance + Prior_Knowledge R^2:  0.39545515153732314
Predictor:  Normative VIF:  1.6541369966892334
Regression eq:  Appearance ~ Mastery + Normative + Prior_Knowledge R^2:  0.34778493857366954
Predictor:  Appearance VIF:  1.5332365950168307
Regression eq:  Prior_Knowledge ~ Mastery + Normative + Appearance R^2:  0.006328395020115396
Predictor:  Prior_Knowledge VIF:  1.0063686986610063

No variables with high VIF
Durbin-Watson test for autocorrelation: Nothing significant
Kolmogorov-Smirnov test for normality with 'norm', alternative='less': Nothing significant
Breusch-Pagan test for homoscedasticity: Nothing significant
{'outliers_abs_gt_3': array([], dtype=int64), 'o

Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept,-0.042,0.095,-0.442,0.659
Mastery*,0.147,0.072,2.041,0.042
Normative,0.09,0.089,1.016,0.311
Appearance,-0.09,0.085,-1.06,0.29
Prior_Knowledge,0.07,0.135,0.516,0.606


Model adj. R^2: 0.018, R^2: 0.036, N. obs: 217, F-statistic: 2.003
Displaying ANOVA results:
model1:  EndQ_TestCode ~ Mastery+Normative+Appearance+Prior_Knowledge
model2:  EndQ_TestCode ~ Mastery+Normative+Appearance+Prior_Knowledge


Unnamed: 0,df_resid,ssr,df_diff,ss_diff,F,Pr(>F)
0,213.0,210.182,0.0,,,
1,213.0,210.182,-0.0,-0.0,,


F(-0.0, 213.0) = nan, p = nan


#### Read Code 
How often do you read the code copilot outputs?

Base Model adj. R^2: 0.043, R^2: 0.058, N. obs: 251, F-statistic: 3.781

Mastery is significant 

No interesting blockwise/stepwisevariations


In [67]:
base_model_eq = 'EndQ_Read_Code ~ Mastery+Normative+Appearance+Prior_Knowledge' 
base_model = display_model_info(base_model_eq, df)

#print average value of EndQ_Read_Code, and average value of all the factors 
print("Average value of EndQ_Read_Code: ", df['EndQ_Read_Code'].mean())
print("Average value of each factor in a table: ")
df[['Mastery', 'Normative', 'Appearance', 'Prior_Knowledge']].mean()



Equation:  EndQ_Read_Code ~ Mastery+Normative+Appearance+Prior_Knowledge


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept,-0.084,0.094,-0.897,0.371
Mastery,0.12,0.07,1.701,0.09
Normative,0.126,0.086,1.462,0.145
Appearance,-0.146,0.083,-1.756,0.081
Prior_Knowledge,0.172,0.134,1.278,0.203


Model adj. R^2: 0.028, R^2: 0.046, N. obs: 218, F-statistic: 2.543
Checking VIF for:  EndQ_Read_Code ~ Mastery+Normative+Appearance+Prior_Knowledge
Regression eq:  Mastery ~ Normative + Appearance + Prior_Knowledge R^2:  0.0935767917653082
Predictor:  Mastery VIF:  1.1032374181454974
Regression eq:  Normative ~ Mastery + Appearance + Prior_Knowledge R^2:  0.39545515153732314
Predictor:  Normative VIF:  1.6541369966892334
Regression eq:  Appearance ~ Mastery + Normative + Prior_Knowledge R^2:  0.34778493857366954
Predictor:  Appearance VIF:  1.5332365950168307
Regression eq:  Prior_Knowledge ~ Mastery + Normative + Appearance R^2:  0.006328395020115396
Predictor:  Prior_Knowledge VIF:  1.0063686986610063

No variables with high VIF
Durbin-Watson test for autocorrelation: Nothing significant
^^^^^^^^^^^^^^^^Kolmogorov-Smirnov test for normality: Significant after power transformation
KstestResult(statistic=0.08820287645868741, pvalue=0.031641038719673005, statistic_location=1.20743679651

Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept,-0.07,0.094,-0.747,0.456
Mastery,0.097,0.072,1.363,0.174
Normative,0.159,0.088,1.797,0.074
Appearance*,-0.174,0.085,-2.059,0.041
Prior_Knowledge,0.158,0.134,1.18,0.239


Model adj. R^2: 0.030, R^2: 0.048, N. obs: 217, F-statistic: 2.652
Average value of EndQ_Read_Code:  1.6296851737616978e-17
Average value of each factor in a table: 


Mastery           -0.000
Normative         -0.000
Appearance         0.000
Prior_Knowledge    0.491
dtype: float64

In [68]:
_, best_model_eq, best_models = stepwise_selection(df, achievement_goals_replication_vars,'EndQ_Read_Code', check_anova_before_adding=True, display_best_model=True)

Adding Mastery:Normative:Appearance significantly improves the model significantly. P val: 0.024176011518167964


Best adj R^2 before:  0.018301044208043238  Best adj R^2 after:  0.03682691954098116
models being compared: EndQ_Read_Code ~ Mastery and EndQ_Read_Code ~ Mastery + Mastery:Normative:Appearance
Equation:  EndQ_Read_Code ~ Mastery + Mastery:Normative:Appearance + Normative + Appearance


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept,0.03,0.068,0.435,0.664
Mastery*,0.201,0.079,2.536,0.012
Mastery:Normative:Appearance*,-0.132,0.065,-2.015,0.045
Normative,0.121,0.086,1.415,0.159
Appearance,-0.111,0.084,-1.322,0.187


Model adj. R^2: 0.039, R^2: 0.056, N. obs: 218, F-statistic: 3.173
Checking VIF for:  EndQ_Read_Code ~ Mastery + Mastery:Normative:Appearance + Normative + Appearance
Regression eq:  Mastery ~ Mastery:Normative:Appearance + Normative + Appearance R^2:  0.29477782576281053
Predictor:  Mastery VIF:  1.4179928489651645
Regression eq:  Mastery:Normative:Appearance ~ Mastery + Normative + Appearance R^2:  0.27303773577937673
Predictor:  Mastery:Normative:Appearance VIF:  1.3755872198842407
Regression eq:  Normative ~ Mastery + Mastery:Normative:Appearance + Appearance R^2:  0.3961958610901082
Predictor:  Normative VIF:  1.6561661895948583
Regression eq:  Appearance ~ Mastery + Mastery:Normative:Appearance + Normative R^2:  0.37555414673467447
Predictor:  Appearance VIF:  1.6014198745509203

No variables with high VIF
Durbin-Watson test for autocorrelation: Nothing significant
^^^^^^^^^^^^^^^^Kolmogorov-Smirnov test for normality: Significant after power transformation
KstestResult(statistic

Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept,0.001,0.07,0.021,0.983
Mastery,0.141,0.086,1.632,0.104
Mastery:Normative:Appearance,0.007,0.1,0.073,0.942
Normative,0.126,0.092,1.37,0.172
Appearance,-0.164,0.09,-1.828,0.069


Model adj. R^2: 0.030, R^2: 0.049, N. obs: 204, F-statistic: 2.572


In [69]:
# Run the blockwise selection
_, best_model_eq, best_models = blockwise_selection(df, possible_blocks, base_model_eq,check_anova_before_adding=True, display_best_model=True, display_all_models=display_nonsignificant_tables)

display_anova(ols(base_model_eq, df).fit(), ols(best_model_eq, df).fit())

Equation:  EndQ_Read_Code ~ Mastery+Normative+Appearance+Prior_Knowledge + Mastery:Prior_Knowledge + Normative:Prior_Knowledge + Appearance:Prior_Knowledge


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept,-0.08,0.095,-0.851,0.396
Mastery,0.144,0.098,1.458,0.146
Normative,0.18,0.121,1.492,0.137
Appearance,-0.177,0.108,-1.645,0.101
Prior_Knowledge,0.173,0.135,1.278,0.203
Mastery:Prior_Knowledge,-0.053,0.142,-0.375,0.708
Normative:Prior_Knowledge,-0.12,0.176,-0.683,0.496
Appearance:Prior_Knowledge,0.08,0.17,0.471,0.638


Model adj. R^2: 0.018, R^2: 0.049, N. obs: 218, F-statistic: 1.557
Checking VIF for:  EndQ_Read_Code ~ Mastery+Normative+Appearance+Prior_Knowledge + Mastery:Prior_Knowledge + Normative:Prior_Knowledge + Appearance:Prior_Knowledge
Regression eq:  Mastery ~ Normative + Appearance + Prior_Knowledge + Mastery:Prior_Knowledge + Normative:Prior_Knowledge + Appearance:Prior_Knowledge R^2:  0.5331363656849835
Predictor:  Mastery VIF:  2.1419530811544196
Regression eq:  Normative ~ Mastery + Appearance + Prior_Knowledge + Mastery:Prior_Knowledge + Normative:Prior_Knowledge + Appearance:Prior_Knowledge R^2:  0.6896133405868825
Predictor:  Normative VIF:  3.221787952777387
Regression eq:  Appearance ~ Mastery + Normative + Prior_Knowledge + Mastery:Prior_Knowledge + Normative:Prior_Knowledge + Appearance:Prior_Knowledge R^2:  0.6106160087940595
Predictor:  Appearance VIF:  2.5681589962210647
Regression eq:  Prior_Knowledge ~ Mastery + Normative + Appearance + Mastery:Prior_Knowledge + Normative:

Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept,-0.05,0.095,-0.532,0.596
Mastery,0.129,0.104,1.242,0.216
Normative,0.205,0.13,1.584,0.115
Appearance*,-0.233,0.111,-2.09,0.038
Prior_Knowledge,0.133,0.135,0.984,0.326
Mastery:Prior_Knowledge,-0.04,0.146,-0.275,0.784
Normative:Prior_Knowledge,-0.188,0.187,-1.007,0.315
Appearance:Prior_Knowledge,0.177,0.177,1.003,0.317


Model adj. R^2: 0.018, R^2: 0.050, N. obs: 214, F-statistic: 1.548
Equation:  EndQ_Read_Code ~ Mastery+Normative+Appearance+Prior_Knowledge + Mastery:Appearance + Normative:Appearance + Mastery:Normative


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept,-0.12,0.104,-1.158,0.248
Mastery,0.14,0.077,1.821,0.07
Normative,0.098,0.094,1.038,0.3
Appearance,-0.152,0.087,-1.757,0.08
Prior_Knowledge,0.184,0.136,1.354,0.177
Mastery:Appearance,0.013,0.085,0.153,0.879
Normative:Appearance,0.008,0.076,0.101,0.92
Mastery:Normative,0.088,0.087,1.009,0.314


Model adj. R^2: 0.022, R^2: 0.054, N. obs: 218, F-statistic: 1.701
Checking VIF for:  EndQ_Read_Code ~ Mastery+Normative+Appearance+Prior_Knowledge + Mastery:Appearance + Normative:Appearance + Mastery:Normative
Regression eq:  Mastery ~ Normative + Appearance + Prior_Knowledge + Mastery:Appearance + Normative:Appearance + Mastery:Normative R^2:  0.233232887786344
Predictor:  Mastery VIF:  1.3041769581287868
Regression eq:  Normative ~ Mastery + Appearance + Prior_Knowledge + Mastery:Appearance + Normative:Appearance + Mastery:Normative R^2:  0.49410354760488495
Predictor:  Normative VIF:  1.9766890937178987
Regression eq:  Appearance ~ Mastery + Normative + Prior_Knowledge + Mastery:Appearance + Normative:Appearance + Mastery:Normative R^2:  0.401247572299262
Predictor:  Appearance VIF:  1.670139366014912
Regression eq:  Prior_Knowledge ~ Mastery + Normative + Appearance + Mastery:Appearance + Normative:Appearance + Mastery:Normative R^2:  0.02439504457485142
Predictor:  Prior_Knowled

Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept,-0.047,0.109,-0.431,0.667
Mastery,0.11,0.084,1.314,0.19
Normative,0.121,0.102,1.192,0.235
Appearance,-0.168,0.091,-1.851,0.066
Prior_Knowledge,0.133,0.138,0.96,0.338
Mastery:Appearance,0.082,0.103,0.802,0.424
Normative:Appearance,-0.03,0.093,-0.317,0.752
Mastery:Normative,-0.035,0.109,-0.32,0.749


Model adj. R^2: 0.018, R^2: 0.052, N. obs: 203, F-statistic: 1.514
Equation:  EndQ_Read_Code ~ Mastery+Normative+Appearance+Prior_Knowledge + Mastery:Normative:Appearance + Mastery:Normative:Prior_Knowledge + Mastery:Appearance:Prior_Knowledge + Normative:Appearance:Prior_Knowledge


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept,-0.042,0.096,-0.432,0.666
Mastery*,0.201,0.08,2.498,0.013
Normative,0.112,0.091,1.233,0.219
Appearance,-0.117,0.085,-1.384,0.168
Prior_Knowledge,0.039,0.157,0.246,0.806
Mastery:Normative:Appearance,-0.13,0.068,-1.905,0.058
Mastery:Normative:Prior_Knowledge,0.148,0.137,1.084,0.28
Mastery:Appearance:Prior_Knowledge,-0.163,0.143,-1.143,0.255
Normative:Appearance:Prior_Knowledge,0.113,0.106,1.064,0.288


Model adj. R^2: 0.036, R^2: 0.071, N. obs: 218, F-statistic: 2.008
Checking VIF for:  EndQ_Read_Code ~ Mastery+Normative+Appearance+Prior_Knowledge + Mastery:Normative:Appearance + Mastery:Normative:Prior_Knowledge + Mastery:Appearance:Prior_Knowledge + Normative:Appearance:Prior_Knowledge
Regression eq:  Mastery ~ Normative + Appearance + Prior_Knowledge + Mastery:Normative:Appearance + Mastery:Normative:Prior_Knowledge + Mastery:Appearance:Prior_Knowledge + Normative:Appearance:Prior_Knowledge R^2:  0.31269865250744777
Predictor:  Mastery VIF:  1.4549658656253925
Regression eq:  Normative ~ Mastery + Appearance + Prior_Knowledge + Mastery:Normative:Appearance + Mastery:Normative:Prior_Knowledge + Mastery:Appearance:Prior_Knowledge + Normative:Appearance:Prior_Knowledge R^2:  0.46004163856945535
Predictor:  Normative VIF:  1.8519946563113476
Regression eq:  Appearance ~ Mastery + Normative + Prior_Knowledge + Mastery:Normative:Appearance + Mastery:Normative:Prior_Knowledge + Mastery:A

Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept,-0.047,0.1,-0.47,0.639
Mastery,0.102,0.091,1.12,0.264
Normative,0.144,0.1,1.433,0.153
Appearance,-0.166,0.095,-1.74,0.084
Prior_Knowledge,0.104,0.167,0.623,0.534
Mastery:Normative:Appearance,-0.029,0.107,-0.27,0.787
Mastery:Normative:Prior_Knowledge,0.057,0.161,0.356,0.722
Mastery:Appearance:Prior_Knowledge,-0.056,0.189,-0.297,0.767
Normative:Appearance:Prior_Knowledge,0.136,0.15,0.905,0.367


Model adj. R^2: 0.015, R^2: 0.055, N. obs: 197, F-statistic: 1.380
The best blockwise model is below: 
Equation:  EndQ_Read_Code ~ Mastery+Normative+Appearance+Prior_Knowledge


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept,-0.084,0.094,-0.897,0.371
Mastery,0.12,0.07,1.701,0.09
Normative,0.126,0.086,1.462,0.145
Appearance,-0.146,0.083,-1.756,0.081
Prior_Knowledge,0.172,0.134,1.278,0.203


Model adj. R^2: 0.028, R^2: 0.046, N. obs: 218, F-statistic: 2.543
Checking VIF for:  EndQ_Read_Code ~ Mastery+Normative+Appearance+Prior_Knowledge
Regression eq:  Mastery ~ Normative + Appearance + Prior_Knowledge R^2:  0.0935767917653082
Predictor:  Mastery VIF:  1.1032374181454974
Regression eq:  Normative ~ Mastery + Appearance + Prior_Knowledge R^2:  0.39545515153732314
Predictor:  Normative VIF:  1.6541369966892334
Regression eq:  Appearance ~ Mastery + Normative + Prior_Knowledge R^2:  0.34778493857366954
Predictor:  Appearance VIF:  1.5332365950168307
Regression eq:  Prior_Knowledge ~ Mastery + Normative + Appearance R^2:  0.006328395020115396
Predictor:  Prior_Knowledge VIF:  1.0063686986610063

No variables with high VIF
Durbin-Watson test for autocorrelation: Nothing significant
^^^^^^^^^^^^^^^^Kolmogorov-Smirnov test for normality: Significant after power transformation
KstestResult(statistic=0.08820287645868741, pvalue=0.031641038719673005, statistic_location=1.20743679651

Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept,-0.07,0.094,-0.747,0.456
Mastery,0.097,0.072,1.363,0.174
Normative,0.159,0.088,1.797,0.074
Appearance*,-0.174,0.085,-2.059,0.041
Prior_Knowledge,0.158,0.134,1.18,0.239


Model adj. R^2: 0.030, R^2: 0.048, N. obs: 217, F-statistic: 2.652
Displaying ANOVA results:
model1:  EndQ_Read_Code ~ Mastery+Normative+Appearance+Prior_Knowledge
model2:  EndQ_Read_Code ~ Mastery+Normative+Appearance+Prior_Knowledge


Unnamed: 0,df_resid,ssr,df_diff,ss_diff,F,Pr(>F)
0,213.0,208.065,0.0,,,
1,213.0,208.065,-0.0,-0.0,,


F(-0.0, 213.0) = nan, p = nan


## Exam Scores

### Total Final Exam Score (Copilot & Noncopilot together)

Base Model for Total Final Exam Score (Copilot & Noncopilot together)

With Mid: Model adj. R^2: 0.047, R^2: 0.065, N. obs: 215, F-statistic: 3.637
Without Mid: Model adj. R^2: 0.055, R^2: 0.070, N. obs: 251, F-statistic: 4.662

In [70]:
print("Mean mastery: ", df['Mastery'].mean())
print("Mean normative: ", df['Normative'].mean())
print("Mean appearance: ", df['Appearance'].mean())
print("Mean final exam score: ", df['Final_Exam_Score'].mean())
print("Mean prior knowledge: ", df['Prior_Knowledge'].mean())

Mean mastery:  -8.963268455689338e-17
Mean normative:  -1.2222638803212733e-17
Mean appearance:  2.4445277606425466e-17
Mean final exam score:  -1.6296851737616978e-17
Mean prior knowledge:  0.4908256880733945


With mid:
Mean mastery:  -8.963268455689338e-17
Mean normative:  -1.2222638803212733e-17
Mean appearance:  2.4445277606425466e-17
Mean final exam score:  -1.6296851737616978e-17
Mean prior knowledge:  0.49767441860465117

Without mid: 
Mean mastery:  5.551115123125783e-17
Mean normative:  -1.3877787807814457e-17
Mean appearance:  4.163336342344337e-17
Mean final exam score:  -1.3877787807814457e-17
Mean prior knowledge:  0.50199203187251

In [71]:
base_model_eq = 'Final_Exam_Score ~ Mastery+Normative+Appearance+Prior_Knowledge' 
base_model = display_model_info(base_model_eq, df)

Equation:  Final_Exam_Score ~ Mastery+Normative+Appearance+Prior_Knowledge


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept*,-0.187,0.093,-2.006,0.046
Mastery,0.022,0.07,0.314,0.754
Normative,0.165,0.085,1.939,0.054
Appearance,-0.033,0.082,-0.405,0.686
Prior_Knowledge**,0.38,0.133,2.859,0.005


Model adj. R^2: 0.046, R^2: 0.064, N. obs: 218, F-statistic: 3.636
Checking VIF for:  Final_Exam_Score ~ Mastery+Normative+Appearance+Prior_Knowledge
Regression eq:  Mastery ~ Normative + Appearance + Prior_Knowledge R^2:  0.0935767917653082
Predictor:  Mastery VIF:  1.1032374181454974
Regression eq:  Normative ~ Mastery + Appearance + Prior_Knowledge R^2:  0.39545515153732314
Predictor:  Normative VIF:  1.6541369966892334
Regression eq:  Appearance ~ Mastery + Normative + Prior_Knowledge R^2:  0.34778493857366954
Predictor:  Appearance VIF:  1.5332365950168307
Regression eq:  Prior_Knowledge ~ Mastery + Normative + Appearance R^2:  0.006328395020115396
Predictor:  Prior_Knowledge VIF:  1.0063686986610063

No variables with high VIF
Durbin-Watson test for autocorrelation: Nothing significant
Kolmogorov-Smirnov test for normality with 'norm', alternative='less': Nothing significant
Breusch-Pagan test for homoscedasticity: Nothing significant
{'outliers_abs_gt_3': array([], dtype=int64),

Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept,-0.181,0.094,-1.936,0.054
Mastery,0.013,0.071,0.186,0.852
Normative*,0.178,0.088,2.025,0.044
Appearance,-0.044,0.084,-0.528,0.598
Prior_Knowledge**,0.375,0.134,2.81,0.005


Model adj. R^2: 0.047, R^2: 0.064, N. obs: 217, F-statistic: 3.644


Best Stepwise Model for Total Final Exam Score (Copilot & Noncopilot together)

In [72]:
_, best_model_eq, best_models = stepwise_selection(df, achievement_goals_replication_vars,'Final_Exam_Score', check_anova_before_adding=True, display_best_model=True)

#Dict of best models for each number of predictors. Keys are number of predictors, value is a tuple of the best model string equation and the model's adjusted R^2 value
#achievement_goals_dict_best_models

Adding Normative significantly improves the model significantly. P val: 0.03290282484070259
Best adj R^2 before:  0.0017282687862436585  Best adj R^2 after:  0.01813869934389145
models being compared: Final_Exam_Score ~ Mastery and Final_Exam_Score ~ Mastery + Normative
Adding Prior_Knowledge significantly improves the model significantly. P val: 0.004542110235270403
Best adj R^2 before:  0.01813869934389145  Best adj R^2 after:  0.05006636470500225
models being compared: Final_Exam_Score ~ Mastery + Normative and Final_Exam_Score ~ Mastery + Normative + Prior_Knowledge


Equation:  Final_Exam_Score ~ Mastery + Normative + Prior_Knowledge


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept*,-0.187,0.093,-2.013,0.045
Mastery,0.025,0.069,0.365,0.715
Normative*,0.145,0.069,2.106,0.036
Prior_Knowledge**,0.381,0.133,2.868,0.005


Model adj. R^2: 0.050, R^2: 0.063, N. obs: 218, F-statistic: 4.812
Checking VIF for:  Final_Exam_Score ~ Mastery + Normative + Prior_Knowledge
Regression eq:  Mastery ~ Normative + Prior_Knowledge R^2:  0.08052652964829654
Predictor:  Mastery VIF:  1.0875789593118925
Regression eq:  Normative ~ Mastery + Prior_Knowledge R^2:  0.07735959399469983
Predictor:  Normative VIF:  1.0838458769973438
Regression eq:  Prior_Knowledge ~ Mastery + Normative R^2:  0.0062533583232448064
Predictor:  Prior_Knowledge VIF:  1.0062927088867375

No variables with high VIF
Durbin-Watson test for autocorrelation: Nothing significant
Kolmogorov-Smirnov test for normality with 'norm', alternative='less': Nothing significant
Breusch-Pagan test for homoscedasticity: Nothing significant
{'outliers_abs_gt_3': array([], dtype=int64), 'outliers_abs_gt_2_5': [], 'outliers_abs_gt_2': []}
^^^^^^^^^^^^^^^^Outliers:  []
High Cook's D values:  []
Cooks D values:  []
Cooks D p-values:  []
^^^^^^^^^^^^^^^^High leverage poin

Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept,-0.164,0.094,-1.741,0.083
Mastery,0.025,0.073,0.346,0.73
Normative,0.144,0.075,1.916,0.057
Prior_Knowledge*,0.345,0.135,2.56,0.011


Model adj. R^2: 0.042, R^2: 0.056, N. obs: 212, F-statistic: 4.121


### Blockwise testing for Total Final Exam Scores

In [73]:

# Run the blockwise selection
_, best_model_eq, best_models = blockwise_selection(df, possible_blocks, base_model_eq,check_anova_before_adding=True, display_best_model=True, display_all_models=display_nonsignificant_tables)

display_anova(ols(base_model_eq, df).fit(), ols(best_model_eq, df).fit())

Equation:  Final_Exam_Score ~ Mastery+Normative+Appearance+Prior_Knowledge + Mastery:Prior_Knowledge + Normative:Prior_Knowledge + Appearance:Prior_Knowledge


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept*,-0.186,0.094,-1.988,0.048
Mastery,-0.0,0.098,-0.002,0.999
Normative,0.206,0.12,1.724,0.086
Appearance,-0.034,0.107,-0.315,0.753
Prior_Knowledge**,0.38,0.134,2.841,0.005
Mastery:Prior_Knowledge,0.04,0.141,0.283,0.777
Normative:Prior_Knowledge,-0.078,0.174,-0.449,0.654
Appearance:Prior_Knowledge,0.008,0.169,0.045,0.964


Model adj. R^2: 0.034, R^2: 0.065, N. obs: 218, F-statistic: 2.094
Checking VIF for:  Final_Exam_Score ~ Mastery+Normative+Appearance+Prior_Knowledge + Mastery:Prior_Knowledge + Normative:Prior_Knowledge + Appearance:Prior_Knowledge
Regression eq:  Mastery ~ Normative + Appearance + Prior_Knowledge + Mastery:Prior_Knowledge + Normative:Prior_Knowledge + Appearance:Prior_Knowledge R^2:  0.5331363656849835
Predictor:  Mastery VIF:  2.1419530811544196
Regression eq:  Normative ~ Mastery + Appearance + Prior_Knowledge + Mastery:Prior_Knowledge + Normative:Prior_Knowledge + Appearance:Prior_Knowledge R^2:  0.6896133405868825
Predictor:  Normative VIF:  3.221787952777387
Regression eq:  Appearance ~ Mastery + Normative + Prior_Knowledge + Mastery:Prior_Knowledge + Normative:Prior_Knowledge + Appearance:Prior_Knowledge R^2:  0.6106160087940595
Predictor:  Appearance VIF:  2.5681589962210647
Regression eq:  Prior_Knowledge ~ Mastery + Normative + Appearance + Mastery:Prior_Knowledge + Normativ

Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept,-0.173,0.095,-1.818,0.07
Mastery,-0.006,0.104,-0.057,0.955
Normative,0.216,0.13,1.662,0.098
Appearance,-0.058,0.112,-0.517,0.606
Prior_Knowledge**,0.376,0.136,2.767,0.006
Mastery:Prior_Knowledge,0.055,0.147,0.376,0.707
Normative:Prior_Knowledge,-0.088,0.188,-0.466,0.642
Appearance:Prior_Knowledge,0.027,0.177,0.153,0.878


Model adj. R^2: 0.031, R^2: 0.063, N. obs: 214, F-statistic: 1.976
Equation:  Final_Exam_Score ~ Mastery+Normative+Appearance+Prior_Knowledge + Mastery:Appearance + Normative:Appearance + Mastery:Normative


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept,-0.17,0.103,-1.653,0.1
Mastery,0.016,0.076,0.215,0.83
Normative,0.176,0.094,1.877,0.062
Appearance,-0.03,0.086,-0.354,0.724
Prior_Knowledge**,0.375,0.135,2.779,0.006
Mastery:Appearance,-0.016,0.085,-0.187,0.852
Normative:Appearance,-0.009,0.076,-0.114,0.91
Mastery:Normative,-0.028,0.087,-0.327,0.744


Model adj. R^2: 0.034, R^2: 0.065, N. obs: 218, F-statistic: 2.102
Checking VIF for:  Final_Exam_Score ~ Mastery+Normative+Appearance+Prior_Knowledge + Mastery:Appearance + Normative:Appearance + Mastery:Normative
Regression eq:  Mastery ~ Normative + Appearance + Prior_Knowledge + Mastery:Appearance + Normative:Appearance + Mastery:Normative R^2:  0.233232887786344
Predictor:  Mastery VIF:  1.3041769581287868
Regression eq:  Normative ~ Mastery + Appearance + Prior_Knowledge + Mastery:Appearance + Normative:Appearance + Mastery:Normative R^2:  0.49410354760488495
Predictor:  Normative VIF:  1.9766890937178987
Regression eq:  Appearance ~ Mastery + Normative + Prior_Knowledge + Mastery:Appearance + Normative:Appearance + Mastery:Normative R^2:  0.401247572299262
Predictor:  Appearance VIF:  1.670139366014912
Regression eq:  Prior_Knowledge ~ Mastery + Normative + Appearance + Mastery:Appearance + Normative:Appearance + Mastery:Normative R^2:  0.02439504457485142
Predictor:  Prior_Knowl

Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept,-0.169,0.111,-1.523,0.129
Mastery,-0.006,0.086,-0.066,0.947
Normative,0.16,0.104,1.544,0.124
Appearance,-0.068,0.093,-0.731,0.466
Prior_Knowledge**,0.391,0.141,2.772,0.006
Mastery:Appearance,0.017,0.105,0.167,0.868
Normative:Appearance,0.03,0.095,0.318,0.751
Mastery:Normative,-0.035,0.111,-0.312,0.756


Model adj. R^2: 0.024, R^2: 0.058, N. obs: 203, F-statistic: 1.724
Equation:  Final_Exam_Score ~ Mastery+Normative+Appearance+Prior_Knowledge + Mastery:Normative:Appearance + Mastery:Normative:Prior_Knowledge + Mastery:Appearance:Prior_Knowledge + Normative:Appearance:Prior_Knowledge


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept*,-0.195,0.094,-2.073,0.039
Mastery,-0.019,0.079,-0.242,0.809
Normative*,0.207,0.089,2.329,0.021
Appearance,-0.036,0.083,-0.432,0.667
Prior_Knowledge***,0.597,0.153,3.889,0.0
Mastery:Normative:Appearance,0.026,0.067,0.394,0.694
Mastery:Normative:Prior_Knowledge**,-0.369,0.134,-2.756,0.006
Mastery:Appearance:Prior_Knowledge*,0.287,0.14,2.053,0.041
Normative:Appearance:Prior_Knowledge,-0.202,0.104,-1.943,0.053


Model adj. R^2: 0.075, R^2: 0.109, N. obs: 218, F-statistic: 3.205
Checking VIF for:  Final_Exam_Score ~ Mastery+Normative+Appearance+Prior_Knowledge + Mastery:Normative:Appearance + Mastery:Normative:Prior_Knowledge + Mastery:Appearance:Prior_Knowledge + Normative:Appearance:Prior_Knowledge
Regression eq:  Mastery ~ Normative + Appearance + Prior_Knowledge + Mastery:Normative:Appearance + Mastery:Normative:Prior_Knowledge + Mastery:Appearance:Prior_Knowledge + Normative:Appearance:Prior_Knowledge R^2:  0.31269865250744777
Predictor:  Mastery VIF:  1.4549658656253925
Regression eq:  Normative ~ Mastery + Appearance + Prior_Knowledge + Mastery:Normative:Appearance + Mastery:Normative:Prior_Knowledge + Mastery:Appearance:Prior_Knowledge + Normative:Appearance:Prior_Knowledge R^2:  0.46004163856945535
Predictor:  Normative VIF:  1.8519946563113476
Regression eq:  Appearance ~ Mastery + Normative + Prior_Knowledge + Mastery:Normative:Appearance + Mastery:Normative:Prior_Knowledge + Mastery

Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept*,-0.206,0.099,-2.08,0.039
Mastery,-0.031,0.09,-0.339,0.735
Normative,0.194,0.1,1.943,0.054
Appearance,-0.072,0.095,-0.759,0.449
Prior_Knowledge***,0.575,0.166,3.473,0.001
Mastery:Normative:Appearance,0.118,0.106,1.11,0.268
Mastery:Normative:Prior_Knowledge,-0.253,0.16,-1.58,0.116
Mastery:Appearance:Prior_Knowledge,0.31,0.188,1.647,0.101
Normative:Appearance:Prior_Knowledge,-0.264,0.15,-1.768,0.079


Model adj. R^2: 0.051, R^2: 0.090, N. obs: 197, F-statistic: 2.318
----------------------Adding block '+ Mastery:Normative:Appearance + Mastery:Normative:Prior_Knowledge + Mastery:Appearance:Prior_Knowledge + Normative:Appearance:Prior_Knowledge' improves the model significantly.
The best blockwise model is below: 
Equation:  Final_Exam_Score ~ Mastery+Normative+Appearance+Prior_Knowledge + Mastery:Normative:Appearance + Mastery:Normative:Prior_Knowledge + Mastery:Appearance:Prior_Knowledge + Normative:Appearance:Prior_Knowledge


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept*,-0.195,0.094,-2.073,0.039
Mastery,-0.019,0.079,-0.242,0.809
Normative*,0.207,0.089,2.329,0.021
Appearance,-0.036,0.083,-0.432,0.667
Prior_Knowledge***,0.597,0.153,3.889,0.0
Mastery:Normative:Appearance,0.026,0.067,0.394,0.694
Mastery:Normative:Prior_Knowledge**,-0.369,0.134,-2.756,0.006
Mastery:Appearance:Prior_Knowledge*,0.287,0.14,2.053,0.041
Normative:Appearance:Prior_Knowledge,-0.202,0.104,-1.943,0.053


Model adj. R^2: 0.075, R^2: 0.109, N. obs: 218, F-statistic: 3.205
Checking VIF for:  Final_Exam_Score ~ Mastery+Normative+Appearance+Prior_Knowledge + Mastery:Normative:Appearance + Mastery:Normative:Prior_Knowledge + Mastery:Appearance:Prior_Knowledge + Normative:Appearance:Prior_Knowledge
Regression eq:  Mastery ~ Normative + Appearance + Prior_Knowledge + Mastery:Normative:Appearance + Mastery:Normative:Prior_Knowledge + Mastery:Appearance:Prior_Knowledge + Normative:Appearance:Prior_Knowledge R^2:  0.31269865250744777
Predictor:  Mastery VIF:  1.4549658656253925
Regression eq:  Normative ~ Mastery + Appearance + Prior_Knowledge + Mastery:Normative:Appearance + Mastery:Normative:Prior_Knowledge + Mastery:Appearance:Prior_Knowledge + Normative:Appearance:Prior_Knowledge R^2:  0.46004163856945535
Predictor:  Normative VIF:  1.8519946563113476
Regression eq:  Appearance ~ Mastery + Normative + Prior_Knowledge + Mastery:Normative:Appearance + Mastery:Normative:Prior_Knowledge + Mastery

Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept*,-0.206,0.099,-2.08,0.039
Mastery,-0.031,0.09,-0.339,0.735
Normative,0.194,0.1,1.943,0.054
Appearance,-0.072,0.095,-0.759,0.449
Prior_Knowledge***,0.575,0.166,3.473,0.001
Mastery:Normative:Appearance,0.118,0.106,1.11,0.268
Mastery:Normative:Prior_Knowledge,-0.253,0.16,-1.58,0.116
Mastery:Appearance:Prior_Knowledge,0.31,0.188,1.647,0.101
Normative:Appearance:Prior_Knowledge,-0.264,0.15,-1.768,0.079


Model adj. R^2: 0.051, R^2: 0.090, N. obs: 197, F-statistic: 2.318
Displaying ANOVA results:
model1:  Final_Exam_Score ~ Mastery+Normative+Appearance+Prior_Knowledge
model2:  Final_Exam_Score ~ Mastery+Normative+Appearance+Prior_Knowledge + Mastery:Normative:Appearance + Mastery:Normative:Prior_Knowledge + Mastery:Appearance:Prior_Knowledge + Normative:Appearance:Prior_Knowledge


Unnamed: 0,df_resid,ssr,df_diff,ss_diff,F,Pr(>F)
0,213.0,204.065,0.0,,,
1,209.0,194.178,4.0,9.887,2.66,0.034


F(4.0, 209.0) = 2.66, p = 0.03
NOTE: ANOVA The difference between models is significant.


### Copilot Final Exam Question 

Base Model for Copilot Final Exam Question

In [74]:
"""
base_model_eq = 'Final_copilot_question ~ Mastery+Normative+Appearance+Prior_Knowledge'
base_model = display_model_info(base_model_eq, df)
"""

"\nbase_model_eq = 'Final_copilot_question ~ Mastery+Normative+Appearance+Prior_Knowledge'\nbase_model = display_model_info(base_model_eq, df)\n"

Best Stepwise Model for Copilot Final Exam Question

In [75]:
"""
_, best_model_eq, best_models = stepwise_selection(df, achievement_goals_replication_vars, 'Final_copilot_question', display_best_model=True)
"""

"\n_, best_model_eq, best_models = stepwise_selection(df, achievement_goals_replication_vars, 'Final_copilot_question', display_best_model=True)\n"

### Blockwise testing for Copilot Question Scores

In [76]:
"""
_, best_model_eq, best_models = blockwise_selection(df, possible_blocks, base_model_eq,check_anova_before_adding=True, display_best_model=True)

display_anova(ols(base_model_eq, df).fit(), ols(best_model_eq, df).fit())
"""

'\n_, best_model_eq, best_models = blockwise_selection(df, possible_blocks, base_model_eq,check_anova_before_adding=True, display_best_model=True)\n\ndisplay_anova(ols(base_model_eq, df).fit(), ols(best_model_eq, df).fit())\n'

### Non-Copilot Final Exam Question 

Base Model for Non-Copilot Final Exam Question

In [77]:
base_model_eq = 'Final_noncopilot_questions ~ Mastery+Normative+Appearance+Prior_Knowledge'
base_model = display_model_info(base_model_eq, df)


Equation:  Final_noncopilot_questions ~ Mastery+Normative+Appearance+Prior_Knowledge


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept*,-0.237,0.092,-2.571,0.011
Mastery,0.003,0.069,0.045,0.964
Normative,0.165,0.084,1.96,0.051
Appearance,-0.028,0.081,-0.35,0.726
Prior_Knowledge***,0.482,0.132,3.663,0.0


Model adj. R^2: 0.067, R^2: 0.085, N. obs: 218, F-statistic: 4.920
Checking VIF for:  Final_noncopilot_questions ~ Mastery+Normative+Appearance+Prior_Knowledge
Regression eq:  Mastery ~ Normative + Appearance + Prior_Knowledge R^2:  0.0935767917653082
Predictor:  Mastery VIF:  1.1032374181454974
Regression eq:  Normative ~ Mastery + Appearance + Prior_Knowledge R^2:  0.39545515153732314
Predictor:  Normative VIF:  1.6541369966892334
Regression eq:  Appearance ~ Mastery + Normative + Prior_Knowledge R^2:  0.34778493857366954
Predictor:  Appearance VIF:  1.5332365950168307
Regression eq:  Prior_Knowledge ~ Mastery + Normative + Appearance R^2:  0.006328395020115396
Predictor:  Prior_Knowledge VIF:  1.0063686986610063

No variables with high VIF
Durbin-Watson test for autocorrelation: Nothing significant
Kolmogorov-Smirnov test for normality with 'norm', alternative='less': Nothing significant
Breusch-Pagan test for homoscedasticity: Nothing significant
{'outliers_abs_gt_3': array([], dty

Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept*,-0.229,0.092,-2.472,0.014
Mastery,-0.009,0.07,-0.134,0.893
Normative*,0.184,0.087,2.117,0.035
Appearance,-0.045,0.083,-0.538,0.591
Prior_Knowledge***,0.474,0.132,3.596,0.0


Model adj. R^2: 0.068, R^2: 0.085, N. obs: 217, F-statistic: 4.941


Best Stepwise Model for Non-Copilot Final Exam Question

In [78]:
#_, best_model_eq, best_models = stepwise_selection(df, achievement_goals_replication_vars,'Final_noncopilot_questions', check_anova_before_adding=True, display_best_model=True)

### Blockwise testing for Non-Copilot Scores

In [79]:

_, best_model_eq, best_models = blockwise_selection(df, possible_blocks, base_model_eq,check_anova_before_adding=True, display_best_model=True)

display_anova(ols(base_model_eq, df).fit(), ols(best_model_eq, df).fit())


----------------------Adding block '+ Mastery:Normative:Appearance + Mastery:Normative:Prior_Knowledge + Mastery:Appearance:Prior_Knowledge + Normative:Appearance:Prior_Knowledge' improves the model significantly.
The best blockwise model is below: 
Equation:  Final_noncopilot_questions ~ Mastery+Normative+Appearance+Prior_Knowledge + Mastery:Normative:Appearance + Mastery:Normative:Prior_Knowledge + Mastery:Appearance:Prior_Knowledge + Normative:Appearance:Prior_Knowledge


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept**,-0.245,0.093,-2.641,0.009
Mastery,-0.045,0.078,-0.578,0.564
Normative*,0.218,0.088,2.49,0.014
Appearance,-0.035,0.082,-0.43,0.667
Prior_Knowledge***,0.692,0.151,4.582,0.0
Mastery:Normative:Appearance,0.026,0.066,0.394,0.694
Mastery:Normative:Prior_Knowledge**,-0.415,0.132,-3.146,0.002
Mastery:Appearance:Prior_Knowledge*,0.322,0.138,2.338,0.02
Normative:Appearance:Prior_Knowledge,-0.179,0.102,-1.749,0.082


Model adj. R^2: 0.102, R^2: 0.135, N. obs: 218, F-statistic: 4.094
Checking VIF for:  Final_noncopilot_questions ~ Mastery+Normative+Appearance+Prior_Knowledge + Mastery:Normative:Appearance + Mastery:Normative:Prior_Knowledge + Mastery:Appearance:Prior_Knowledge + Normative:Appearance:Prior_Knowledge
Regression eq:  Mastery ~ Normative + Appearance + Prior_Knowledge + Mastery:Normative:Appearance + Mastery:Normative:Prior_Knowledge + Mastery:Appearance:Prior_Knowledge + Normative:Appearance:Prior_Knowledge R^2:  0.31269865250744777
Predictor:  Mastery VIF:  1.4549658656253925
Regression eq:  Normative ~ Mastery + Appearance + Prior_Knowledge + Mastery:Normative:Appearance + Mastery:Normative:Prior_Knowledge + Mastery:Appearance:Prior_Knowledge + Normative:Appearance:Prior_Knowledge R^2:  0.46004163856945535
Predictor:  Normative VIF:  1.8519946563113476
Regression eq:  Appearance ~ Mastery + Normative + Prior_Knowledge + Mastery:Normative:Appearance + Mastery:Normative:Prior_Knowledge

Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept**,-0.256,0.098,-2.62,0.01
Mastery,-0.071,0.089,-0.801,0.424
Normative*,0.224,0.098,2.271,0.024
Appearance,-0.08,0.093,-0.854,0.394
Prior_Knowledge***,0.658,0.163,4.028,0.0
Mastery:Normative:Appearance,0.134,0.105,1.276,0.203
Mastery:Normative:Prior_Knowledge,-0.311,0.158,-1.968,0.051
Mastery:Appearance:Prior_Knowledge,0.327,0.186,1.762,0.08
Normative:Appearance:Prior_Knowledge,-0.198,0.147,-1.341,0.181


Model adj. R^2: 0.078, R^2: 0.115, N. obs: 197, F-statistic: 3.067
Displaying ANOVA results:
model1:  Final_noncopilot_questions ~ Mastery+Normative+Appearance+Prior_Knowledge
model2:  Final_noncopilot_questions ~ Mastery+Normative+Appearance+Prior_Knowledge + Mastery:Normative:Appearance + Mastery:Normative:Prior_Knowledge + Mastery:Appearance:Prior_Knowledge + Normative:Appearance:Prior_Knowledge


Unnamed: 0,df_resid,ssr,df_diff,ss_diff,F,Pr(>F)
0,213.0,199.563,0.0,,,
1,209.0,188.467,4.0,11.096,3.076,0.017


F(4.0, 209.0) = 3.08, p = 0.02
NOTE: ANOVA The difference between models is significant.


## Interest

Base Model for Interest

In [80]:
base_interest_eq = 'EndInterest ~ Mastery+Normative+Appearance+Prior_Knowledge' 
base_model = display_model_info(base_interest_eq, df)

Equation:  EndInterest ~ Mastery+Normative+Appearance+Prior_Knowledge


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept***,-0.3,0.084,-3.564,0.0
Mastery***,0.331,0.063,5.257,0.0
Normative,0.051,0.077,0.657,0.512
Appearance,0.017,0.074,0.234,0.815
Prior_Knowledge***,0.611,0.12,5.079,0.0


Model adj. R^2: 0.219, R^2: 0.234, N. obs: 218, F-statistic: 16.241
Checking VIF for:  EndInterest ~ Mastery+Normative+Appearance+Prior_Knowledge
Regression eq:  Mastery ~ Normative + Appearance + Prior_Knowledge R^2:  0.0935767917653082
Predictor:  Mastery VIF:  1.1032374181454974
Regression eq:  Normative ~ Mastery + Appearance + Prior_Knowledge R^2:  0.39545515153732314
Predictor:  Normative VIF:  1.6541369966892334
Regression eq:  Appearance ~ Mastery + Normative + Prior_Knowledge R^2:  0.34778493857366954
Predictor:  Appearance VIF:  1.5332365950168307
Regression eq:  Prior_Knowledge ~ Mastery + Normative + Appearance R^2:  0.006328395020115396
Predictor:  Prior_Knowledge VIF:  1.0063686986610063

No variables with high VIF
Durbin-Watson test for autocorrelation: Nothing significant
Kolmogorov-Smirnov test for normality with 'norm', alternative='less': Nothing significant
Breusch-Pagan test for homoscedasticity: Nothing significant
{'outliers_abs_gt_3': array([8]), 'outliers_abs_g

Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept***,-0.297,0.081,-3.683,0.0
Mastery***,0.371,0.06,6.145,0.0
Normative,0.066,0.073,0.905,0.367
Appearance,0.022,0.071,0.315,0.753
Prior_Knowledge***,0.631,0.115,5.493,0.0


Model adj. R^2: 0.271, R^2: 0.285, N. obs: 215, F-statistic: 20.885
Re-running model without high leverage points


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept***,-0.293,0.085,-3.466,0.001
Mastery***,0.321,0.064,4.982,0.0
Normative,0.067,0.08,0.837,0.404
Appearance,0.004,0.076,0.046,0.963
Prior_Knowledge***,0.605,0.121,5.012,0.0


Model adj. R^2: 0.212, R^2: 0.226, N. obs: 217, F-statistic: 15.513


Best model for Interest

In [81]:
_, best_model_eq, best_models = stepwise_selection(df, achievement_goals_replication_vars, 'EndInterest', check_anova_before_adding=True, display_best_model=True)

Adding Prior_Knowledge significantly improves the model significantly. P val: 6.87193298646266e-07
Best adj R^2 before:  0.13227963245173513  Best adj R^2 after:  0.22288291774771207
models being compared: EndInterest ~ Mastery and EndInterest ~ Mastery + Prior_Knowledge
Equation:  EndInterest ~ Mastery + Prior_Knowledge


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept***,-0.301,0.084,-3.59,0.0
Mastery***,0.346,0.06,5.77,0.0
Prior_Knowledge***,0.614,0.12,5.117,0.0


Model adj. R^2: 0.223, R^2: 0.230, N. obs: 218, F-statistic: 32.119
Checking VIF for:  EndInterest ~ Mastery + Prior_Knowledge
Regression eq:  Mastery ~ Prior_Knowledge R^2:  0.005550764284573129
Predictor:  Mastery VIF:  1.0055817472478419
Regression eq:  Prior_Knowledge ~ Mastery R^2:  0.005550764284573018
Predictor:  Prior_Knowledge VIF:  1.0055817472478419

No variables with high VIF
Durbin-Watson test for autocorrelation: Nothing significant
Kolmogorov-Smirnov test for normality with 'norm', alternative='less': Nothing significant
Breusch-Pagan test for homoscedasticity: Nothing significant
{'outliers_abs_gt_3': array([8]), 'outliers_abs_gt_2_5': [], 'outliers_abs_gt_2': []}
^^^^^^^^^^^^^^^^Outliers:  [22]
High Cook's D values:  []
Cooks D values:  []
Cooks D p-values:  []
^^^^^^^^^^^^^^^^High leverage points:  [356]
Re-running model without outliers


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept***,-0.301,0.082,-3.657,0.0
Mastery***,0.359,0.059,6.095,0.0
Prior_Knowledge***,0.639,0.118,5.426,0.0


Model adj. R^2: 0.244, R^2: 0.251, N. obs: 217, F-statistic: 35.787
Re-running model without high leverage points


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept***,-0.296,0.084,-3.508,0.001
Mastery***,0.34,0.061,5.609,0.0
Prior_Knowledge***,0.609,0.12,5.062,0.0


Model adj. R^2: 0.215, R^2: 0.222, N. obs: 217, F-statistic: 30.548


### Blockwise testing for Interest

Equations: 


eq = base_model_eq + priorexpblock 


eq = base_model_eq + betweenachievementsblock


eq = base_model_eq + threewayinteractionsblock

Result: None are significant

In [82]:
eq = base_interest_eq + priorexpblock
block_model = display_model_info(eq, df)
display_anova(base_model, block_model)

Equation:  EndInterest ~ Mastery+Normative+Appearance+Prior_Knowledge+ Mastery:Prior_Knowledge + Normative:Prior_Knowledge + Appearance:Prior_Knowledge


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept***,-0.301,0.085,-3.548,0.0
Mastery***,0.357,0.088,4.05,0.0
Normative,0.001,0.108,0.008,0.993
Appearance,-0.002,0.097,-0.018,0.985
Prior_Knowledge***,0.611,0.121,5.052,0.0
Mastery:Prior_Knowledge,-0.043,0.128,-0.333,0.739
Normative:Prior_Knowledge,0.086,0.157,0.546,0.585
Appearance:Prior_Knowledge,0.039,0.153,0.254,0.8


Model adj. R^2: 0.211, R^2: 0.237, N. obs: 218, F-statistic: 9.305
Checking VIF for:  EndInterest ~ Mastery+Normative+Appearance+Prior_Knowledge+ Mastery:Prior_Knowledge + Normative:Prior_Knowledge + Appearance:Prior_Knowledge
Regression eq:  Mastery ~ Normative + Appearance + Prior_Knowledge + Mastery:Prior_Knowledge + Normative:Prior_Knowledge + Appearance:Prior_Knowledge R^2:  0.5331363656849835
Predictor:  Mastery VIF:  2.1419530811544196
Regression eq:  Normative ~ Mastery + Appearance + Prior_Knowledge + Mastery:Prior_Knowledge + Normative:Prior_Knowledge + Appearance:Prior_Knowledge R^2:  0.6896133405868825
Predictor:  Normative VIF:  3.221787952777387
Regression eq:  Appearance ~ Mastery + Normative + Prior_Knowledge + Mastery:Prior_Knowledge + Normative:Prior_Knowledge + Appearance:Prior_Knowledge R^2:  0.6106160087940595
Predictor:  Appearance VIF:  2.5681589962210647
Regression eq:  Prior_Knowledge ~ Mastery + Normative + Appearance + Mastery:Prior_Knowledge + Normative:Prio

Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept***,-0.301,0.083,-3.629,0.0
Mastery***,0.357,0.086,4.141,0.0
Normative,0.001,0.106,0.009,0.993
Appearance,-0.002,0.094,-0.019,0.985
Prior_Knowledge***,0.636,0.119,5.362,0.0
Mastery:Prior_Knowledge,-0.017,0.125,-0.137,0.891
Normative:Prior_Knowledge,0.094,0.154,0.614,0.54
Appearance:Prior_Knowledge,0.04,0.149,0.266,0.791


Model adj. R^2: 0.234, R^2: 0.258, N. obs: 217, F-statistic: 10.405
Re-running model without high leverage points


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept***,-0.304,0.086,-3.547,0.0
Mastery***,0.318,0.094,3.391,0.001
Normative,0.056,0.117,0.478,0.633
Appearance,-0.02,0.101,-0.198,0.844
Prior_Knowledge***,0.6,0.122,4.91,0.0
Mastery:Prior_Knowledge,-0.02,0.132,-0.15,0.881
Normative:Prior_Knowledge,0.037,0.169,0.221,0.825
Appearance:Prior_Knowledge,0.058,0.16,0.361,0.718


Model adj. R^2: 0.196, R^2: 0.222, N. obs: 214, F-statistic: 8.395
Displaying ANOVA results:
model1:  EndInterest ~ Mastery+Normative+Appearance+Prior_Knowledge
model2:  EndInterest ~ Mastery+Normative+Appearance+Prior_Knowledge+ Mastery:Prior_Knowledge + Normative:Prior_Knowledge + Appearance:Prior_Knowledge


Unnamed: 0,df_resid,ssr,df_diff,ss_diff,F,Pr(>F)
0,213.0,167.052,0.0,,,
1,210.0,166.391,3.0,0.66,0.278,0.841


F(3.0, 210.0) = 0.28, p = 0.84


In [83]:
eq3 = base_interest_eq + betweenachievementsblock
m3 = display_model_info(eq3, df)

display_anova(base_model, m3)

Equation:  EndInterest ~ Mastery+Normative+Appearance+Prior_Knowledge+ Mastery:Appearance + Normative:Appearance + Mastery:Normative


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept**,-0.288,0.093,-3.095,0.002
Mastery***,0.327,0.069,4.755,0.0
Normative,0.067,0.085,0.785,0.433
Appearance,0.014,0.078,0.186,0.853
Prior_Knowledge***,0.604,0.122,4.943,0.0
Mastery:Appearance,-0.05,0.077,-0.658,0.512
Normative:Appearance,-0.005,0.068,-0.067,0.947
Mastery:Normative,-0.007,0.078,-0.091,0.928


Model adj. R^2: 0.211, R^2: 0.237, N. obs: 218, F-statistic: 9.299
Checking VIF for:  EndInterest ~ Mastery+Normative+Appearance+Prior_Knowledge+ Mastery:Appearance + Normative:Appearance + Mastery:Normative
Regression eq:  Mastery ~ Normative + Appearance + Prior_Knowledge + Mastery:Appearance + Normative:Appearance + Mastery:Normative R^2:  0.233232887786344
Predictor:  Mastery VIF:  1.3041769581287868
Regression eq:  Normative ~ Mastery + Appearance + Prior_Knowledge + Mastery:Appearance + Normative:Appearance + Mastery:Normative R^2:  0.49410354760488495
Predictor:  Normative VIF:  1.9766890937178987
Regression eq:  Appearance ~ Mastery + Normative + Prior_Knowledge + Mastery:Appearance + Normative:Appearance + Mastery:Normative R^2:  0.401247572299262
Predictor:  Appearance VIF:  1.670139366014912
Regression eq:  Prior_Knowledge ~ Mastery + Normative + Appearance + Mastery:Appearance + Normative:Appearance + Mastery:Normative R^2:  0.02439504457485142
Predictor:  Prior_Knowledge V

Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept**,-0.284,0.091,-3.116,0.002
Mastery***,0.345,0.068,5.099,0.0
Normative,0.064,0.083,0.771,0.442
Appearance,0.017,0.076,0.229,0.819
Prior_Knowledge***,0.631,0.12,5.272,0.0
Mastery:Appearance,-0.048,0.075,-0.646,0.519
Normative:Appearance,-0.017,0.067,-0.256,0.798
Mastery:Normative,0.001,0.077,0.019,0.985


Model adj. R^2: 0.233, R^2: 0.258, N. obs: 217, F-statistic: 10.369
Re-running model without high leverage points


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept*,-0.256,0.099,-2.591,0.01
Mastery***,0.306,0.076,4.017,0.0
Normative,0.081,0.092,0.881,0.379
Appearance,0.003,0.083,0.036,0.972
Prior_Knowledge***,0.546,0.126,4.35,0.0
Mastery:Appearance,0.038,0.093,0.411,0.681
Normative:Appearance,0.042,0.085,0.494,0.622
Mastery:Normative,-0.069,0.099,-0.703,0.483


Model adj. R^2: 0.194, R^2: 0.222, N. obs: 203, F-statistic: 7.933
Displaying ANOVA results:
model1:  EndInterest ~ Mastery+Normative+Appearance+Prior_Knowledge
model2:  EndInterest ~ Mastery+Normative+Appearance+Prior_Knowledge+ Mastery:Appearance + Normative:Appearance + Mastery:Normative


Unnamed: 0,df_resid,ssr,df_diff,ss_diff,F,Pr(>F)
0,213.0,167.052,0.0,,,
1,210.0,166.415,3.0,0.637,0.268,0.849


F(3.0, 210.0) = 0.27, p = 0.85


In [84]:
eq4 = base_interest_eq + threewayinteractionsblock
m4 = display_model_info(eq4, df)
display_anova(base_model, m4)

Equation:  EndInterest ~ Mastery+Normative+Appearance+Prior_Knowledge+ Mastery:Normative:Appearance + Mastery:Normative:Prior_Knowledge + Mastery:Appearance:Prior_Knowledge + Normative:Appearance:Prior_Knowledge


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept**,-0.262,0.087,-3.023,0.003
Mastery***,0.388,0.072,5.36,0.0
Normative,0.062,0.082,0.761,0.447
Appearance,0.041,0.076,0.545,0.587
Prior_Knowledge***,0.578,0.141,4.101,0.0
Mastery:Normative:Appearance,-0.118,0.062,-1.919,0.056
Mastery:Normative:Prior_Knowledge,-0.085,0.123,-0.694,0.488
Mastery:Appearance:Prior_Knowledge,0.09,0.128,0.7,0.485
Normative:Appearance:Prior_Knowledge,0.04,0.095,0.423,0.673


Model adj. R^2: 0.220, R^2: 0.249, N. obs: 218, F-statistic: 8.651
Checking VIF for:  EndInterest ~ Mastery+Normative+Appearance+Prior_Knowledge+ Mastery:Normative:Appearance + Mastery:Normative:Prior_Knowledge + Mastery:Appearance:Prior_Knowledge + Normative:Appearance:Prior_Knowledge
Regression eq:  Mastery ~ Normative + Appearance + Prior_Knowledge + Mastery:Normative:Appearance + Mastery:Normative:Prior_Knowledge + Mastery:Appearance:Prior_Knowledge + Normative:Appearance:Prior_Knowledge R^2:  0.31269865250744777
Predictor:  Mastery VIF:  1.4549658656253925
Regression eq:  Normative ~ Mastery + Appearance + Prior_Knowledge + Mastery:Normative:Appearance + Mastery:Normative:Prior_Knowledge + Mastery:Appearance:Prior_Knowledge + Normative:Appearance:Prior_Knowledge R^2:  0.46004163856945535
Predictor:  Normative VIF:  1.8519946563113476
Regression eq:  Appearance ~ Mastery + Normative + Prior_Knowledge + Mastery:Normative:Appearance + Mastery:Normative:Prior_Knowledge + Mastery:Appea

Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept***,-0.266,0.076,-3.478,0.001
Mastery***,0.499,0.064,7.785,0.0
Normative,0.073,0.071,1.031,0.304
Appearance,0.061,0.066,0.912,0.363
Prior_Knowledge***,0.695,0.124,5.619,0.0
Mastery:Normative:Appearance*,-0.136,0.054,-2.523,0.012
Mastery:Normative:Prior_Knowledge,-0.052,0.108,-0.479,0.633
Mastery:Appearance:Prior_Knowledge,0.086,0.111,0.776,0.439
Normative:Appearance:Prior_Knowledge,-0.017,0.083,-0.2,0.842


Model adj. R^2: 0.375, R^2: 0.399, N. obs: 207, F-statistic: 16.438
Re-running model without high leverage points


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept**,-0.254,0.092,-2.765,0.006
Mastery***,0.349,0.084,4.169,0.0
Normative,0.059,0.092,0.635,0.526
Appearance,0.02,0.088,0.228,0.82
Prior_Knowledge***,0.566,0.153,3.691,0.0
Mastery:Normative:Appearance,-0.138,0.099,-1.404,0.162
Mastery:Normative:Prior_Knowledge,-0.175,0.148,-1.181,0.239
Mastery:Appearance:Prior_Knowledge,0.192,0.174,1.102,0.272
Normative:Appearance:Prior_Knowledge,0.188,0.138,1.36,0.176


Model adj. R^2: 0.210, R^2: 0.243, N. obs: 197, F-statistic: 7.530
Displaying ANOVA results:
model1:  EndInterest ~ Mastery+Normative+Appearance+Prior_Knowledge
model2:  EndInterest ~ Mastery+Normative+Appearance+Prior_Knowledge+ Mastery:Normative:Appearance + Mastery:Normative:Prior_Knowledge + Mastery:Appearance:Prior_Knowledge + Normative:Appearance:Prior_Knowledge


Unnamed: 0,df_resid,ssr,df_diff,ss_diff,F,Pr(>F)
0,213.0,167.052,0.0,,,
1,209.0,163.768,4.0,3.283,1.048,0.384


F(4.0, 209.0) = 1.05, p = 0.38


# 2nd paper: Self Efficacy & Other Attributes

Independent variables attributes we're considering: 
- Mastery + Normative + Appearance + Prior_Knowledge + ProgIntelligenceBelief + CSMajor + SelfEfficacy + CanDoThisTask

Dependent variables we're considering: 
- attitudes, performance, and self-reported use of GenAI 

Note: 
Self efficacy was measured pre, and sense of belonging was measured post. 

In [85]:
all_factors = ['Mastery', 'Normative', 'Appearance', 'Prior_Knowledge', 'CSMajor', 'SelfEfficacy', 'EndProgIntelligenceBelief', 'CanDoThisTask', 'MidBelonging', 'EndBelonging', 'PreProgIntelligenceBelief', 'MidProgIntelligenceBelief']

In [86]:
base_factors = ['Mastery', 'Normative', 'Appearance', 'Prior_Knowledge', 'CSMajor']

### Default/Non temporal blocks? 

Mastery + Normative + Appearance + Prior_Knowledge + ProgIntelligenceBelief + CSMajor

### Pre-Survey Blocks 

Programming Intelligence, Self Efficacy


### End-Survey Blocks 

Programming Intelligence
CanDoThisTask
Belonging 


### Interactions we're interested in 

## Interest

#### Base  

In [87]:
base_all_factors_interest_eq = 'EndInterest ~ ' + '+'.join(base_factors)
base_model = display_model_info(base_all_factors_interest_eq, df)

Equation:  EndInterest ~ Mastery+Normative+Appearance+Prior_Knowledge+CSMajor


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept***,-0.547,0.089,-6.137,0.0
Mastery***,0.242,0.061,3.993,0.0
Normative,0.062,0.072,0.858,0.392
Appearance,-0.035,0.07,-0.497,0.62
Prior_Knowledge***,0.465,0.115,4.052,0.0
CSMajor***,0.701,0.121,5.816,0.0


Model adj. R^2: 0.324, R^2: 0.339, N. obs: 218, F-statistic: 21.761
Checking VIF for:  EndInterest ~ Mastery+Normative+Appearance+Prior_Knowledge+CSMajor
Regression eq:  Mastery ~ Normative + Appearance + Prior_Knowledge + CSMajor R^2:  0.15151841251770903
Predictor:  Mastery VIF:  1.1785759582212165
Regression eq:  Normative ~ Mastery + Appearance + Prior_Knowledge + CSMajor R^2:  0.39587028080118336
Predictor:  Normative VIF:  1.6552736411083662
Regression eq:  Appearance ~ Mastery + Normative + Prior_Knowledge + CSMajor R^2:  0.3585097875688924
Predictor:  Appearance VIF:  1.5588702378017878
Regression eq:  Prior_Knowledge ~ Mastery + Normative + Appearance + CSMajor R^2:  0.05386982895525594
Predictor:  Prior_Knowledge VIF:  1.0569370162836804
Regression eq:  CSMajor ~ Mastery + Normative + Appearance + Prior_Knowledge R^2:  0.1354449308354171
Predictor:  CSMajor VIF:  1.1566643186376746

No variables with high VIF
Durbin-Watson test for autocorrelation: Nothing significant
Kolmogo

Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept***,-0.589,0.08,-7.348,0.0
Mastery***,0.319,0.054,5.943,0.0
Normative,0.067,0.063,1.07,0.286
Appearance,-0.078,0.061,-1.262,0.208
Prior_Knowledge***,0.534,0.101,5.294,0.0
CSMajor***,0.727,0.107,6.791,0.0


Model adj. R^2: 0.462, R^2: 0.475, N. obs: 206, F-statistic: 36.256
Re-running model without high leverage points


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept***,-0.54,0.089,-6.04,0.0
Mastery***,0.231,0.062,3.734,0.0
Normative,0.078,0.074,1.057,0.292
Appearance,-0.049,0.071,-0.689,0.492
Prior_Knowledge***,0.458,0.115,3.982,0.0
CSMajor***,0.702,0.121,5.821,0.0


Model adj. R^2: 0.318, R^2: 0.333, N. obs: 217, F-statistic: 21.114


In [88]:
pre_factors_block = '+ PreProgIntelligenceBelief' #+ SelfEfficacy'
mid_factors_block = '+ MidProgIntelligenceBelief' #+ Belonging' 
end_factors_block = '+ EndProgIntelligenceBelief' #+ CanDoThisTask + Belonging'


#possible_blocks = [pre_factors_block, end_factors_block]

### Pre-Factors Block

In [89]:
pre_eq = base_all_factors_interest_eq + pre_factors_block

pre_model = display_model_info(pre_eq, df)


Equation:  EndInterest ~ Mastery+Normative+Appearance+Prior_Knowledge+CSMajor+ PreProgIntelligenceBelief


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept***,-0.527,0.088,-5.986,0.0
Mastery*,0.163,0.066,2.453,0.015
Normative,0.056,0.071,0.787,0.432
Appearance,-0.001,0.07,-0.009,0.993
Prior_Knowledge***,0.449,0.113,3.962,0.0
CSMajor***,0.676,0.119,5.674,0.0
PreProgIntelligenceBelief**,-0.176,0.064,-2.756,0.006


Model adj. R^2: 0.344, R^2: 0.362, N. obs: 218, F-statistic: 19.963
Checking VIF for:  EndInterest ~ Mastery+Normative+Appearance+Prior_Knowledge+CSMajor+ PreProgIntelligenceBelief
Regression eq:  Mastery ~ Normative + Appearance + Prior_Knowledge + CSMajor + PreProgIntelligenceBelief R^2:  0.311978376243531
Predictor:  Mastery VIF:  1.4534426905657232
Regression eq:  Normative ~ Mastery + Appearance + Prior_Knowledge + CSMajor + PreProgIntelligenceBelief R^2:  0.3964284433493688
Predictor:  Normative VIF:  1.6568043821502274
Regression eq:  Appearance ~ Mastery + Normative + Prior_Knowledge + CSMajor + PreProgIntelligenceBelief R^2:  0.37857395231986657
Predictor:  Appearance VIF:  1.6092019375968127
Regression eq:  Prior_Knowledge ~ Mastery + Normative + Appearance + CSMajor + PreProgIntelligenceBelief R^2:  0.056552144908009194
Predictor:  Prior_Knowledge VIF:  1.0599419931931426
Regression eq:  CSMajor ~ Mastery + Normative + Appearance + Prior_Knowledge + PreProgIntelligenceBelief

Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept***,-0.492,0.084,-5.879,0.0
Mastery**,0.201,0.063,3.206,0.002
Normative,0.062,0.067,0.928,0.355
Appearance,0.021,0.066,0.312,0.756
Prior_Knowledge***,0.491,0.107,4.581,0.0
CSMajor***,0.641,0.113,5.665,0.0
PreProgIntelligenceBelief**,-0.183,0.06,-3.025,0.003


Model adj. R^2: 0.399, R^2: 0.415, N. obs: 214, F-statistic: 24.522
Re-running model without high leverage points


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept***,-0.528,0.088,-5.988,0.0
Mastery**,0.18,0.069,2.62,0.009
Normative,0.077,0.073,1.053,0.294
Appearance,-0.025,0.072,-0.344,0.731
Prior_Knowledge***,0.465,0.114,4.078,0.0
CSMajor***,0.655,0.12,5.474,0.0
PreProgIntelligenceBelief*,-0.15,0.066,-2.26,0.025


Model adj. R^2: 0.342, R^2: 0.360, N. obs: 215, F-statistic: 19.534


In [90]:
display_anova(base_model, pre_model)

Displaying ANOVA results:
model1:  EndInterest ~ Mastery+Normative+Appearance+Prior_Knowledge+CSMajor
model2:  EndInterest ~ Mastery+Normative+Appearance+Prior_Knowledge+CSMajor+ PreProgIntelligenceBelief


Unnamed: 0,df_resid,ssr,df_diff,ss_diff,F,Pr(>F)
0,212.0,144.063,0.0,,,
1,211.0,139.059,1.0,5.004,7.593,0.006


F(1.0, 211.0) = 7.59, p = 0.01
NOTE: ANOVA The difference between models is significant.


In [91]:
pre_eq = base_all_factors_interest_eq + mid_factors_block

mid_model = display_model_info(pre_eq, df)


Equation:  EndInterest ~ Mastery+Normative+Appearance+Prior_Knowledge+CSMajor+ MidProgIntelligenceBelief


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept***,-0.384,0.085,-4.532,0.0
Mastery*,0.143,0.057,2.502,0.013
Normative,0.093,0.066,1.424,0.156
Appearance,-0.069,0.064,-1.088,0.278
Prior_Knowledge**,0.299,0.107,2.785,0.006
CSMajor***,0.522,0.113,4.618,0.0
MidProgIntelligenceBelief***,-0.386,0.058,-6.687,0.0


Model adj. R^2: 0.439, R^2: 0.455, N. obs: 218, F-statistic: 29.325
Checking VIF for:  EndInterest ~ Mastery+Normative+Appearance+Prior_Knowledge+CSMajor+ MidProgIntelligenceBelief
Regression eq:  Mastery ~ Normative + Appearance + Prior_Knowledge + CSMajor + MidProgIntelligenceBelief R^2:  0.20854020014702424
Predictor:  Mastery VIF:  1.2634880510491668
Regression eq:  Normative ~ Mastery + Appearance + Prior_Knowledge + CSMajor + MidProgIntelligenceBelief R^2:  0.39903631790108174
Predictor:  Normative VIF:  1.6639940645122056
Regression eq:  Appearance ~ Mastery + Normative + Prior_Knowledge + CSMajor + MidProgIntelligenceBelief R^2:  0.3627675157106338
Predictor:  Appearance VIF:  1.5692859743570475
Regression eq:  Prior_Knowledge ~ Mastery + Normative + Appearance + CSMajor + MidProgIntelligenceBelief R^2:  0.10435847737243042
Predictor:  Prior_Knowledge VIF:  1.116518132239192
Regression eq:  CSMajor ~ Mastery + Normative + Appearance + Prior_Knowledge + MidProgIntelligenceBelief

Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept***,-0.392,0.074,-5.304,0.0
Mastery*,0.127,0.05,2.555,0.011
Normative,0.11,0.057,1.91,0.058
Appearance,-0.057,0.056,-1.022,0.308
Prior_Knowledge*,0.23,0.094,2.456,0.015
CSMajor***,0.612,0.097,6.288,0.0
MidProgIntelligenceBelief***,-0.48,0.051,-9.361,0.0


Model adj. R^2: 0.590, R^2: 0.602, N. obs: 206, F-statistic: 50.101
Re-running model without high leverage points


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept***,-0.384,0.085,-4.505,0.0
Mastery*,0.137,0.059,2.338,0.02
Normative,0.108,0.068,1.604,0.11
Appearance,-0.091,0.065,-1.397,0.164
Prior_Knowledge**,0.286,0.108,2.653,0.009
CSMajor***,0.538,0.114,4.725,0.0
MidProgIntelligenceBelief***,-0.376,0.059,-6.403,0.0


Model adj. R^2: 0.434, R^2: 0.449, N. obs: 216, F-statistic: 28.421


In [92]:
display_anova(base_model, mid_model)

Displaying ANOVA results:
model1:  EndInterest ~ Mastery+Normative+Appearance+Prior_Knowledge+CSMajor
model2:  EndInterest ~ Mastery+Normative+Appearance+Prior_Knowledge+CSMajor+ MidProgIntelligenceBelief


Unnamed: 0,df_resid,ssr,df_diff,ss_diff,F,Pr(>F)
0,212.0,144.063,0.0,,,
1,211.0,118.873,1.0,25.19,44.713,0.0


F(1.0, 211.0) = 44.71, p = 0.00
NOTE: ANOVA The difference between models is significant.


### End-Factors Block

In [93]:
end_eq = base_all_factors_interest_eq + end_factors_block

end_model = display_model_info(end_eq, df)



Equation:  EndInterest ~ Mastery+Normative+Appearance+Prior_Knowledge+CSMajor+ EndProgIntelligenceBelief


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept***,-0.356,0.081,-4.4,0.0
Mastery**,0.145,0.054,2.677,0.008
Normative,0.095,0.063,1.517,0.131
Appearance,-0.047,0.061,-0.775,0.439
Prior_Knowledge*,0.241,0.103,2.332,0.021
CSMajor***,0.523,0.107,4.882,0.0
EndProgIntelligenceBelief***,-0.45,0.054,-8.316,0.0


Model adj. R^2: 0.488, R^2: 0.502, N. obs: 218, F-statistic: 35.489
Checking VIF for:  EndInterest ~ Mastery+Normative+Appearance+Prior_Knowledge+CSMajor+ EndProgIntelligenceBelief
Regression eq:  Mastery ~ Normative + Appearance + Prior_Knowledge + CSMajor + EndProgIntelligenceBelief R^2:  0.19146629274555127
Predictor:  Mastery VIF:  1.23680681587873
Regression eq:  Normative ~ Mastery + Appearance + Prior_Knowledge + CSMajor + EndProgIntelligenceBelief R^2:  0.3983485705904827
Predictor:  Normative VIF:  1.6620919541094359
Regression eq:  Appearance ~ Mastery + Normative + Prior_Knowledge + CSMajor + EndProgIntelligenceBelief R^2:  0.35889837071473163
Predictor:  Appearance VIF:  1.559815096890097
Regression eq:  Prior_Knowledge ~ Mastery + Normative + Appearance + CSMajor + EndProgIntelligenceBelief R^2:  0.11804908399915959
Predictor:  Prior_Knowledge VIF:  1.1338499477209536
Regression eq:  CSMajor ~ Mastery + Normative + Appearance + Prior_Knowledge + EndProgIntelligenceBelief R

Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept***,-0.351,0.081,-4.316,0.0
Mastery*,0.14,0.056,2.498,0.013
Normative,0.116,0.064,1.792,0.075
Appearance,-0.068,0.062,-1.09,0.277
Prior_Knowledge*,0.249,0.105,2.384,0.018
CSMajor***,0.517,0.109,4.754,0.0
EndProgIntelligenceBelief***,-0.434,0.058,-7.439,0.0


Model adj. R^2: 0.477, R^2: 0.492, N. obs: 214, F-statistic: 33.421


In [94]:
display_anova(base_model, end_model)

Displaying ANOVA results:
model1:  EndInterest ~ Mastery+Normative+Appearance+Prior_Knowledge+CSMajor
model2:  EndInterest ~ Mastery+Normative+Appearance+Prior_Knowledge+CSMajor+ EndProgIntelligenceBelief


Unnamed: 0,df_resid,ssr,df_diff,ss_diff,F,Pr(>F)
0,212.0,144.063,0.0,,,
1,211.0,108.503,1.0,35.56,69.152,0.0


F(1.0, 211.0) = 69.15, p = 0.00
NOTE: ANOVA The difference between models is significant.


In [95]:
#model with all factors 

all_factors_interest_eq = 'EndInterest ~ ' + '+'.join(all_factors)

all_factors_model = display_model_info(all_factors_interest_eq, df)

Equation:  EndInterest ~ Mastery+Normative+Appearance+Prior_Knowledge+CSMajor+SelfEfficacy+EndProgIntelligenceBelief+CanDoThisTask+MidBelonging+EndBelonging+PreProgIntelligenceBelief+MidProgIntelligenceBelief


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept*,-0.158,0.079,-2.002,0.047
Mastery,0.094,0.054,1.734,0.084
Normative,0.056,0.058,0.976,0.33
Appearance,-0.038,0.056,-0.674,0.501
Prior_Knowledge,-0.028,0.102,-0.276,0.783
CSMajor***,0.379,0.1,3.79,0.0
SelfEfficacy,-0.047,0.054,-0.882,0.379
EndProgIntelligenceBelief**,-0.21,0.072,-2.908,0.004
CanDoThisTask***,0.189,0.054,3.489,0.001
MidBelonging,0.088,0.078,1.132,0.259


Model adj. R^2: 0.590, R^2: 0.613, N. obs: 218, F-statistic: 27.044
Checking VIF for:  EndInterest ~ Mastery+Normative+Appearance+Prior_Knowledge+CSMajor+SelfEfficacy+EndProgIntelligenceBelief+CanDoThisTask+MidBelonging+EndBelonging+PreProgIntelligenceBelief+MidProgIntelligenceBelief
Regression eq:  Mastery ~ Normative + Appearance + Prior_Knowledge + CSMajor + SelfEfficacy + EndProgIntelligenceBelief + CanDoThisTask + MidBelonging + EndBelonging + PreProgIntelligenceBelief + MidProgIntelligenceBelief R^2:  0.3545181725686517
Predictor:  Mastery VIF:  1.549230292012144
Regression eq:  Normative ~ Mastery + Appearance + Prior_Knowledge + CSMajor + SelfEfficacy + EndProgIntelligenceBelief + CanDoThisTask + MidBelonging + EndBelonging + PreProgIntelligenceBelief + MidProgIntelligenceBelief R^2:  0.4320484361690713
Predictor:  Normative VIF:  1.760713525031663
Regression eq:  Appearance ~ Mastery + Normative + Prior_Knowledge + CSMajor + SelfEfficacy + EndProgIntelligenceBelief + CanDoThis

Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept,-0.139,0.078,-1.783,0.076
Mastery*,0.108,0.053,2.025,0.044
Normative,0.063,0.057,1.109,0.269
Appearance,-0.04,0.055,-0.729,0.467
Prior_Knowledge,-0.017,0.1,-0.172,0.864
CSMajor***,0.344,0.099,3.486,0.001
SelfEfficacy,-0.05,0.052,-0.954,0.341
EndProgIntelligenceBelief**,-0.218,0.071,-3.085,0.002
CanDoThisTask***,0.185,0.053,3.482,0.001
MidBelonging,0.088,0.076,1.153,0.25


Model adj. R^2: 0.606, R^2: 0.628, N. obs: 217, F-statistic: 28.697
Re-running model without high leverage points


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept*,-0.19,0.08,-2.379,0.018
Mastery,0.081,0.055,1.47,0.143
Normative,0.091,0.059,1.532,0.127
Appearance,-0.068,0.058,-1.163,0.246
Prior_Knowledge,-0.0,0.103,-0.005,0.996
CSMajor***,0.403,0.1,4.036,0.0
SelfEfficacy,-0.037,0.054,-0.697,0.486
EndProgIntelligenceBelief*,-0.163,0.075,-2.182,0.03
CanDoThisTask***,0.185,0.054,3.392,0.001
MidBelonging,0.114,0.083,1.368,0.173


Model adj. R^2: 0.597, R^2: 0.620, N. obs: 213, F-statistic: 27.214


In [96]:
#anova
display_anova(base_model, all_factors_model)

Displaying ANOVA results:
model1:  EndInterest ~ Mastery+Normative+Appearance+Prior_Knowledge+CSMajor
model2:  EndInterest ~ Mastery+Normative+Appearance+Prior_Knowledge+CSMajor+SelfEfficacy+EndProgIntelligenceBelief+CanDoThisTask+MidBelonging+EndBelonging+PreProgIntelligenceBelief+MidProgIntelligenceBelief


Unnamed: 0,df_resid,ssr,df_diff,ss_diff,F,Pr(>F)
0,212.0,144.063,0.0,,,
1,205.0,84.396,7.0,59.667,20.705,0.0


F(7.0, 205.0) = 20.70, p = 0.00
NOTE: ANOVA The difference between models is significant.


In [97]:
#binarize final exam score to be 1 if above median, 0 if below. store as new variable binarized_final_exam_score
df['binarized_final_exam_score'] = df['Final_Exam_Score'].apply(lambda x: 1 if x > df['Final_Exam_Score'].median() else 0)

  df['binarized_final_exam_score'] = df['Final_Exam_Score'].apply(lambda x: 1 if x > df['Final_Exam_Score'].median() else 0)


In [98]:
all_factors_interest_eq = 'EndInterest ~ ' + '+'.join(all_factors) + ' + EndProgIntelligenceBelief*binarized_final_exam_score'
display_model_info(all_factors_interest_eq, df)

Equation:  EndInterest ~ Mastery+Normative+Appearance+Prior_Knowledge+CSMajor+SelfEfficacy+EndProgIntelligenceBelief+CanDoThisTask+MidBelonging+EndBelonging+PreProgIntelligenceBelief+MidProgIntelligenceBelief + EndProgIntelligenceBelief*binarized_final_exam_score


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept,-0.142,0.098,-1.456,0.147
Mastery,0.091,0.054,1.677,0.095
Normative,0.057,0.058,0.985,0.326
Appearance,-0.044,0.057,-0.766,0.445
Prior_Knowledge,-0.031,0.102,-0.307,0.759
CSMajor***,0.378,0.101,3.755,0.0
SelfEfficacy,-0.046,0.054,-0.847,0.398
EndProgIntelligenceBelief*,-0.172,0.086,-2.016,0.045
CanDoThisTask***,0.195,0.058,3.361,0.001
MidBelonging,0.083,0.078,1.066,0.288


Model adj. R^2: 0.588, R^2: 0.615, N. obs: 218, F-statistic: 23.119
Checking VIF for:  EndInterest ~ Mastery+Normative+Appearance+Prior_Knowledge+CSMajor+SelfEfficacy+EndProgIntelligenceBelief+CanDoThisTask+MidBelonging+EndBelonging+PreProgIntelligenceBelief+MidProgIntelligenceBelief + EndProgIntelligenceBelief*binarized_final_exam_score
Regression eq:  Mastery ~ Normative + Appearance + Prior_Knowledge + CSMajor + SelfEfficacy + EndProgIntelligenceBelief + CanDoThisTask + MidBelonging + EndBelonging + PreProgIntelligenceBelief + MidProgIntelligenceBelief + binarized_final_exam_score + EndProgIntelligenceBelief:binarized_final_exam_score R^2:  0.35669396392298514
Predictor:  Mastery VIF:  1.5544701027494832
Regression eq:  Normative ~ Mastery + Appearance + Prior_Knowledge + CSMajor + SelfEfficacy + EndProgIntelligenceBelief + CanDoThisTask + MidBelonging + EndBelonging + PreProgIntelligenceBelief + MidProgIntelligenceBelief + binarized_final_exam_score + EndProgIntelligenceBelief:bina

Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept,-0.097,0.097,-1.003,0.317
Mastery*,0.106,0.053,1.983,0.049
Normative,0.067,0.057,1.171,0.243
Appearance,-0.047,0.056,-0.843,0.4
Prior_Knowledge,-0.02,0.1,-0.204,0.839
CSMajor***,0.338,0.099,3.396,0.001
SelfEfficacy,-0.049,0.053,-0.926,0.355
EndProgIntelligenceBelief*,-0.189,0.084,-2.257,0.025
CanDoThisTask***,0.2,0.057,3.516,0.001
MidBelonging,0.084,0.077,1.094,0.275


Model adj. R^2: 0.605, R^2: 0.630, N. obs: 217, F-statistic: 24.593
Re-running model without high leverage points


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept,-0.193,0.1,-1.942,0.054
Mastery,0.087,0.055,1.584,0.115
Normative,0.075,0.058,1.302,0.194
Appearance,-0.041,0.057,-0.719,0.473
Prior_Knowledge,-0.007,0.102,-0.073,0.942
CSMajor***,0.39,0.101,3.862,0.0
SelfEfficacy,-0.039,0.054,-0.728,0.467
EndProgIntelligenceBelief,-0.086,0.089,-0.967,0.335
CanDoThisTask***,0.194,0.058,3.362,0.001
MidBelonging,0.1,0.082,1.218,0.225


Model adj. R^2: 0.592, R^2: 0.619, N. obs: 213, F-statistic: 22.939


<statsmodels.regression.linear_model.RegressionResultsWrapper at 0x280773a40>

#### Stepwise With Base Factors 

In [99]:
_, best_model_eq, best_models = stepwise_selection(df, base_factors, 'EndInterest', check_anova_before_adding=True, display_best_model=True)

Adding Prior_Knowledge significantly improves the model significantly. P val: 6.87193298646266e-07
Best adj R^2 before:  0.13227963245173513  Best adj R^2 after:  0.22288291774771207
models being compared: EndInterest ~ Mastery and EndInterest ~ Mastery + Prior_Knowledge
Adding CSMajor significantly improves the model significantly. P val: 1.6335954608693867e-08
Best adj R^2 before:  0.22288291774771207  Best adj R^2 after:  0.32756764500783353
models being compared: EndInterest ~ Mastery + Prior_Knowledge and EndInterest ~ Mastery + Prior_Knowledge + CSMajor
Equation:  EndInterest ~ Mastery + Prior_Knowledge + CSMajor


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept***,-0.547,0.089,-6.175,0.0
Mastery***,0.257,0.058,4.443,0.0
Prior_Knowledge***,0.468,0.114,4.094,0.0
CSMajor***,0.699,0.119,5.871,0.0


Model adj. R^2: 0.328, R^2: 0.337, N. obs: 218, F-statistic: 36.236
Checking VIF for:  EndInterest ~ Mastery + Prior_Knowledge + CSMajor
Regression eq:  Mastery ~ Prior_Knowledge + CSMajor R^2:  0.07421554444361966
Predictor:  Mastery VIF:  1.0801650362545971
Regression eq:  Prior_Knowledge ~ Mastery + CSMajor R^2:  0.05245591590962673
Predictor:  Prior_Knowledge VIF:  1.0553598685173404
Regression eq:  CSMajor ~ Mastery + Prior_Knowledge R^2:  0.1177305518531313
Predictor:  CSMajor VIF:  1.1334405856401513

No variables with high VIF
Durbin-Watson test for autocorrelation: Nothing significant
Kolmogorov-Smirnov test for normality with 'norm', alternative='less': Nothing significant
^^^^^^^^^^^^^^^^Breusch-Pagan test for homoscedasticity: Significant
(8.11483814732499, 0.04369702104961901, 2.7579770261010714, 0.04326630427081773)
{'outliers_abs_gt_3': array([], dtype=int64), 'outliers_abs_gt_2_5': array([  8,  53, 215]), 'outliers_abs_gt_2': array([  8,  39,  47,  53,  67,  76, 143, 17

Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept***,-0.581,0.079,-7.311,0.0
Mastery***,0.334,0.051,6.491,0.0
Prior_Knowledge***,0.537,0.101,5.325,0.0
CSMajor***,0.71,0.105,6.734,0.0


Model adj. R^2: 0.463, R^2: 0.471, N. obs: 206, F-statistic: 59.900
Re-running model without high leverage points


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept***,-0.553,0.088,-6.275,0.0
Mastery***,0.261,0.06,4.378,0.0
Prior_Knowledge***,0.487,0.115,4.249,0.0
CSMajor***,0.688,0.121,5.68,0.0


Model adj. R^2: 0.336, R^2: 0.345, N. obs: 215, F-statistic: 37.036


### Prior Experience Interactions

All priorexp interaction factors 

In [100]:
#list of interactions between prior experience and all other factors, with * so that it includes base as well 
priorexp_interactions = [f'Mastery*Prior_Knowledge', f'Normative*Prior_Knowledge', f'Appearance*Prior_Knowledge', f'CSMajor*Prior_Knowledge', f'SelfEfficacy*Prior_Knowledge', f'EndProgIntelligenceBelief*Prior_Knowledge', f'CanDoThisTask*Prior_Knowledge', f'EndBelonging*Prior_Knowledge']

#regression
priorexp_interactions_interest_eq = 'EndInterest ~ ' + '+'.join(priorexp_interactions)
model = display_model_info(priorexp_interactions_interest_eq, df)


Equation:  EndInterest ~ Mastery*Prior_Knowledge+Normative*Prior_Knowledge+Appearance*Prior_Knowledge+CSMajor*Prior_Knowledge+SelfEfficacy*Prior_Knowledge+EndProgIntelligenceBelief*Prior_Knowledge+CanDoThisTask*Prior_Knowledge+EndBelonging*Prior_Knowledge


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept,-0.166,0.091,-1.818,0.071
Mastery,0.083,0.07,1.18,0.239
Prior_Knowledge,0.058,0.133,0.439,0.661
Mastery:Prior_Knowledge,0.062,0.102,0.612,0.541
Normative,-0.022,0.081,-0.269,0.788
Normative:Prior_Knowledge,0.155,0.115,1.339,0.182
Appearance,-0.055,0.071,-0.786,0.433
Appearance:Prior_Knowledge,0.011,0.11,0.096,0.924
CSMajor**,0.471,0.142,3.319,0.001
CSMajor:Prior_Knowledge,-0.153,0.194,-0.789,0.431


Model adj. R^2: 0.600, R^2: 0.632, N. obs: 218, F-statistic: 20.176
Checking VIF for:  EndInterest ~ Mastery*Prior_Knowledge+Normative*Prior_Knowledge+Appearance*Prior_Knowledge+CSMajor*Prior_Knowledge+SelfEfficacy*Prior_Knowledge+EndProgIntelligenceBelief*Prior_Knowledge+CanDoThisTask*Prior_Knowledge+EndBelonging*Prior_Knowledge
Regression eq:  Mastery ~ Prior_Knowledge + Mastery:Prior_Knowledge + Normative + Normative:Prior_Knowledge + Appearance + Appearance:Prior_Knowledge + CSMajor + CSMajor:Prior_Knowledge + SelfEfficacy + SelfEfficacy:Prior_Knowledge + EndProgIntelligenceBelief + EndProgIntelligenceBelief:Prior_Knowledge + CanDoThisTask + CanDoThisTask:Prior_Knowledge + EndBelonging + EndBelonging:Prior_Knowledge R^2:  0.6277826145169707
Predictor:  Mastery VIF:  2.686602074490133
Regression eq:  Prior_Knowledge ~ Mastery + Mastery:Prior_Knowledge + Normative + Normative:Prior_Knowledge + Appearance + Appearance:Prior_Knowledge + CSMajor + CSMajor:Prior_Knowledge + SelfEfficacy 

Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept,-0.166,0.089,-1.868,0.063
Mastery,0.083,0.068,1.212,0.227
Prior_Knowledge,0.107,0.13,0.819,0.414
Mastery:Prior_Knowledge,0.087,0.1,0.87,0.385
Normative,-0.022,0.079,-0.276,0.782
Normative:Prior_Knowledge,0.172,0.113,1.531,0.127
Appearance,-0.055,0.069,-0.808,0.42
Appearance:Prior_Knowledge,0.004,0.107,0.04,0.969
CSMajor***,0.471,0.138,3.409,0.001
CSMajor:Prior_Knowledge,-0.231,0.19,-1.212,0.227


Model adj. R^2: 0.621, R^2: 0.651, N. obs: 217, F-statistic: 21.806
Re-running model without high leverage points


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept,-0.164,0.092,-1.783,0.076
Mastery,0.068,0.074,0.922,0.358
Prior_Knowledge,0.074,0.138,0.534,0.594
Mastery:Prior_Knowledge,0.072,0.11,0.648,0.518
Normative,-0.002,0.086,-0.023,0.982
Normative:Prior_Knowledge,0.129,0.12,1.077,0.283
Appearance,-0.07,0.074,-0.949,0.344
Appearance:Prior_Knowledge,0.024,0.116,0.205,0.837
CSMajor**,0.473,0.143,3.321,0.001
CSMajor:Prior_Knowledge,-0.14,0.199,-0.702,0.484


Model adj. R^2: 0.589, R^2: 0.622, N. obs: 212, F-statistic: 18.767


Stepwise priorexp interactions 

In [101]:
_, best_model_eq, best_models = stepwise_selection(df, priorexp_interactions, 'EndInterest', check_anova_before_adding=True, display_best_model=True)

Adding CSMajor*Prior_Knowledge significantly improves the model significantly. P val: 8.60332778123311e-08
Best adj R^2 before:  0.21939490865154543  Best adj R^2 after:  0.3241423995614986
models being compared: EndInterest ~ Mastery*Prior_Knowledge and EndInterest ~ Mastery*Prior_Knowledge + CSMajor*Prior_Knowledge
Adding EndProgIntelligenceBelief*Prior_Knowledge significantly improves the model significantly. P val: 1.451342937378155e-13
Best adj R^2 before:  0.3241423995614986  Best adj R^2 after:  0.4851236881071037
models being compared: EndInterest ~ Mastery*Prior_Knowledge + CSMajor*Prior_Knowledge and EndInterest ~ Mastery*Prior_Knowledge + CSMajor*Prior_Knowledge + EndProgIntelligenceBelief*Prior_Knowledge
Adding CanDoThisTask*Prior_Knowledge significantly improves the model significantly. P val: 2.5762212617141133e-05
Best adj R^2 before:  0.4851236881071037  Best adj R^2 after:  0.5303939218469707
models being compared: EndInterest ~ Mastery*Prior_Knowledge + CSMajor*Prior_

Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept,-0.15,0.089,-1.681,0.094
Mastery,0.063,0.065,0.969,0.334
Prior_Knowledge,0.043,0.131,0.327,0.744
Mastery:Prior_Knowledge,0.111,0.093,1.197,0.233
CSMajor**,0.442,0.139,3.178,0.002
CSMajor:Prior_Knowledge,-0.122,0.191,-0.637,0.525
EndProgIntelligenceBelief*,-0.196,0.077,-2.531,0.012
EndProgIntelligenceBelief:Prior_Knowledge,-0.076,0.114,-0.672,0.502
CanDoThisTask***,0.312,0.071,4.418,0.0
CanDoThisTask:Prior_Knowledge**,-0.289,0.106,-2.717,0.007


Model adj. R^2: 0.603, R^2: 0.623, N. obs: 218, F-statistic: 30.934
Checking VIF for:  EndInterest ~ Mastery*Prior_Knowledge + CSMajor*Prior_Knowledge + EndProgIntelligenceBelief*Prior_Knowledge + CanDoThisTask*Prior_Knowledge + EndBelonging*Prior_Knowledge
Regression eq:  Mastery ~ Prior_Knowledge + Mastery:Prior_Knowledge + CSMajor + CSMajor:Prior_Knowledge + EndProgIntelligenceBelief + EndProgIntelligenceBelief:Prior_Knowledge + CanDoThisTask + CanDoThisTask:Prior_Knowledge + EndBelonging + EndBelonging:Prior_Knowledge R^2:  0.5608088935142994
Predictor:  Mastery VIF:  2.276913136975278
Regression eq:  Prior_Knowledge ~ Mastery + Mastery:Prior_Knowledge + CSMajor + CSMajor:Prior_Knowledge + EndProgIntelligenceBelief + EndProgIntelligenceBelief:Prior_Knowledge + CanDoThisTask + CanDoThisTask:Prior_Knowledge + EndBelonging + EndBelonging:Prior_Knowledge R^2:  0.5748380299743396
Predictor:  Prior_Knowledge VIF:  2.3520447982204185
Regression eq:  Mastery:Prior_Knowledge ~ Mastery + Pri

Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept,-0.15,0.087,-1.722,0.086
Mastery,0.063,0.063,0.993,0.322
Prior_Knowledge,0.089,0.129,0.688,0.492
Mastery:Prior_Knowledge,0.134,0.091,1.469,0.143
CSMajor**,0.442,0.136,3.256,0.001
CSMajor:Prior_Knowledge,-0.198,0.188,-1.053,0.294
EndProgIntelligenceBelief*,-0.196,0.076,-2.593,0.01
EndProgIntelligenceBelief:Prior_Knowledge,-0.075,0.111,-0.679,0.498
CanDoThisTask***,0.312,0.069,4.527,0.0
CanDoThisTask:Prior_Knowledge**,-0.302,0.104,-2.905,0.004


Model adj. R^2: 0.621, R^2: 0.640, N. obs: 217, F-statistic: 33.192
Re-running model without high leverage points


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept,-0.129,0.089,-1.446,0.15
Mastery,0.083,0.068,1.218,0.225
Prior_Knowledge,0.061,0.134,0.453,0.651
Mastery:Prior_Knowledge,0.091,0.101,0.905,0.367
CSMajor*,0.352,0.145,2.423,0.016
CSMajor:Prior_Knowledge,0.024,0.197,0.119,0.905
EndProgIntelligenceBelief*,-0.184,0.081,-2.276,0.024
EndProgIntelligenceBelief:Prior_Knowledge,-0.164,0.13,-1.261,0.209
CanDoThisTask***,0.334,0.071,4.71,0.0
CanDoThisTask:Prior_Knowledge***,-0.399,0.112,-3.571,0.0


Model adj. R^2: 0.617, R^2: 0.637, N. obs: 208, F-statistic: 31.253


## Exam Scores

Base 

In [102]:
#run all base factors except CanDoThisTask

base_factors_nocando = ['Mastery', 'Normative', 'Appearance', 'Prior_Knowledge', 'CSMajor', 'SelfEfficacy', 'EndProgIntelligenceBelief', 'EndProgIntelligenceBelief', 'PreProgIntelligenceBelief', 'EndBelonging', 'MidBelonging']

base_factors_nocando_interest_eq = 'Final_Exam_Score ~ ' + '+'.join(base_factors_nocando)
model = display_model_info(base_factors_nocando_interest_eq, df)

Equation:  Final_Exam_Score ~ Mastery+Normative+Appearance+Prior_Knowledge+CSMajor+SelfEfficacy+EndProgIntelligenceBelief+EndProgIntelligenceBelief+PreProgIntelligenceBelief+EndBelonging+MidBelonging


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept,0.032,0.115,0.275,0.783
Mastery,-0.009,0.079,-0.12,0.905
Normative*,0.184,0.083,2.21,0.028
Appearance,-0.071,0.081,-0.874,0.383
Prior_Knowledge,0.116,0.147,0.789,0.431
CSMajor,-0.195,0.146,-1.338,0.182
SelfEfficacy,0.014,0.078,0.176,0.86
EndProgIntelligenceBelief***,-0.291,0.083,-3.507,0.001
PreProgIntelligenceBelief,0.135,0.082,1.635,0.104
EndBelonging,0.104,0.113,0.921,0.358


Model adj. R^2: 0.127, R^2: 0.167, N. obs: 218, F-statistic: 4.154
Checking VIF for:  Final_Exam_Score ~ Mastery+Normative+Appearance+Prior_Knowledge+CSMajor+SelfEfficacy+EndProgIntelligenceBelief+EndProgIntelligenceBelief+PreProgIntelligenceBelief+EndBelonging+MidBelonging
Regression eq:  Mastery ~ Normative + Appearance + Prior_Knowledge + CSMajor + SelfEfficacy + EndProgIntelligenceBelief + PreProgIntelligenceBelief + EndBelonging + MidBelonging R^2:  0.3490296504150241
Predictor:  Mastery VIF:  1.5361682765390878
Regression eq:  Normative ~ Mastery + Appearance + Prior_Knowledge + CSMajor + SelfEfficacy + EndProgIntelligenceBelief + PreProgIntelligenceBelief + EndBelonging + MidBelonging R^2:  0.4205861075852282
Predictor:  Normative VIF:  1.725881987110093
Regression eq:  Appearance ~ Mastery + Normative + Prior_Knowledge + CSMajor + SelfEfficacy + EndProgIntelligenceBelief + PreProgIntelligenceBelief + EndBelonging + MidBelonging R^2:  0.3859337506104594
Predictor:  Appearance VI

Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept,0.052,0.116,0.451,0.652
Mastery,-0.043,0.081,-0.534,0.594
Normative*,0.183,0.085,2.147,0.033
Appearance,-0.103,0.083,-1.237,0.218
Prior_Knowledge,0.092,0.148,0.62,0.536
CSMajor,-0.176,0.147,-1.192,0.235
SelfEfficacy,0.042,0.079,0.539,0.591
EndProgIntelligenceBelief**,-0.25,0.087,-2.856,0.005
PreProgIntelligenceBelief,0.149,0.082,1.813,0.071
EndBelonging,0.194,0.121,1.605,0.11


Model adj. R^2: 0.131, R^2: 0.172, N. obs: 213, F-statistic: 4.196


In [103]:
base_all_factors_exam_eq = 'Final_Exam_Score ~ ' + '+'.join(base_factors)
base_model = display_model_info(base_all_factors_exam_eq, df)

Equation:  Final_Exam_Score ~ Mastery+Normative+Appearance+Prior_Knowledge+CSMajor


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept,-0.184,0.106,-1.733,0.085
Mastery,0.023,0.072,0.317,0.751
Normative,0.165,0.085,1.933,0.055
Appearance,-0.033,0.083,-0.394,0.694
Prior_Knowledge**,0.382,0.137,2.796,0.006
CSMajor,-0.008,0.144,-0.057,0.954


Model adj. R^2: 0.042, R^2: 0.064, N. obs: 218, F-statistic: 2.896
Checking VIF for:  Final_Exam_Score ~ Mastery+Normative+Appearance+Prior_Knowledge+CSMajor
Regression eq:  Mastery ~ Normative + Appearance + Prior_Knowledge + CSMajor R^2:  0.15151841251770903
Predictor:  Mastery VIF:  1.1785759582212165
Regression eq:  Normative ~ Mastery + Appearance + Prior_Knowledge + CSMajor R^2:  0.39587028080118336
Predictor:  Normative VIF:  1.6552736411083662
Regression eq:  Appearance ~ Mastery + Normative + Prior_Knowledge + CSMajor R^2:  0.3585097875688924
Predictor:  Appearance VIF:  1.5588702378017878
Regression eq:  Prior_Knowledge ~ Mastery + Normative + Appearance + CSMajor R^2:  0.05386982895525594
Predictor:  Prior_Knowledge VIF:  1.0569370162836804
Regression eq:  CSMajor ~ Mastery + Normative + Appearance + Prior_Knowledge R^2:  0.1354449308354171
Predictor:  CSMajor VIF:  1.1566643186376746

No variables with high VIF
Durbin-Watson test for autocorrelation: Nothing significant
Kol

Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept,-0.179,0.107,-1.676,0.095
Mastery,0.014,0.074,0.193,0.847
Normative*,0.178,0.088,2.018,0.045
Appearance,-0.044,0.085,-0.515,0.607
Prior_Knowledge**,0.377,0.137,2.747,0.007
CSMajor,-0.008,0.144,-0.053,0.958


Model adj. R^2: 0.042, R^2: 0.064, N. obs: 217, F-statistic: 2.902


### Prior Knowledge Interactions

In [104]:
priorexp_interactions_interest_eq = 'Final_Exam_Score ~ ' + '+'.join(priorexp_interactions)
model = display_model_info(priorexp_interactions_interest_eq, df)


Equation:  Final_Exam_Score ~ Mastery*Prior_Knowledge+Normative*Prior_Knowledge+Appearance*Prior_Knowledge+CSMajor*Prior_Knowledge+SelfEfficacy*Prior_Knowledge+EndProgIntelligenceBelief*Prior_Knowledge+CanDoThisTask*Prior_Knowledge+EndBelonging*Prior_Knowledge


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept,-0.046,0.126,-0.362,0.718
Mastery,-0.127,0.097,-1.303,0.194
Prior_Knowledge,0.228,0.184,1.241,0.216
Mastery:Prior_Knowledge,0.052,0.141,0.371,0.711
Normative,0.184,0.112,1.653,0.1
Normative:Prior_Knowledge,-0.111,0.16,-0.693,0.489
Appearance,-0.034,0.097,-0.344,0.731
Appearance:Prior_Knowledge,0.012,0.152,0.076,0.94
CSMajor,0.072,0.196,0.369,0.712
CSMajor:Prior_Knowledge,-0.467,0.268,-1.739,0.084


Model adj. R^2: 0.237, R^2: 0.296, N. obs: 218, F-statistic: 4.957
Checking VIF for:  Final_Exam_Score ~ Mastery*Prior_Knowledge+Normative*Prior_Knowledge+Appearance*Prior_Knowledge+CSMajor*Prior_Knowledge+SelfEfficacy*Prior_Knowledge+EndProgIntelligenceBelief*Prior_Knowledge+CanDoThisTask*Prior_Knowledge+EndBelonging*Prior_Knowledge
Regression eq:  Mastery ~ Prior_Knowledge + Mastery:Prior_Knowledge + Normative + Normative:Prior_Knowledge + Appearance + Appearance:Prior_Knowledge + CSMajor + CSMajor:Prior_Knowledge + SelfEfficacy + SelfEfficacy:Prior_Knowledge + EndProgIntelligenceBelief + EndProgIntelligenceBelief:Prior_Knowledge + CanDoThisTask + CanDoThisTask:Prior_Knowledge + EndBelonging + EndBelonging:Prior_Knowledge R^2:  0.6277826145169707
Predictor:  Mastery VIF:  2.686602074490133
Regression eq:  Prior_Knowledge ~ Mastery + Mastery:Prior_Knowledge + Normative + Normative:Prior_Knowledge + Appearance + Appearance:Prior_Knowledge + CSMajor + CSMajor:Prior_Knowledge + SelfEffic

Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept,-0.115,0.121,-0.952,0.342
Mastery,-0.073,0.095,-0.774,0.44
Prior_Knowledge,0.298,0.176,1.696,0.091
Mastery:Prior_Knowledge,-0.001,0.136,-0.008,0.993
Normative,0.174,0.106,1.642,0.102
Normative:Prior_Knowledge,-0.1,0.152,-0.661,0.509
Appearance,0.027,0.095,0.28,0.78
Appearance:Prior_Knowledge,-0.049,0.146,-0.332,0.74
CSMajor,0.169,0.187,0.902,0.368
CSMajor:Prior_Knowledge*,-0.564,0.256,-2.202,0.029


Model adj. R^2: 0.291, R^2: 0.348, N. obs: 215, F-statistic: 6.173
Re-running model without high leverage points


Unnamed: 0,Coefficient,Standard Error,t-value,p-value
Intercept,-0.043,0.126,-0.338,0.736
Mastery,-0.146,0.102,-1.433,0.154
Prior_Knowledge,0.245,0.19,1.286,0.2
Mastery:Prior_Knowledge,0.021,0.152,0.141,0.888
Normative,0.209,0.118,1.773,0.078
Normative:Prior_Knowledge,-0.142,0.165,-0.861,0.39
Appearance,-0.052,0.102,-0.509,0.611
Appearance:Prior_Knowledge,0.008,0.16,0.053,0.958
CSMajor,0.076,0.197,0.386,0.7
CSMajor:Prior_Knowledge,-0.456,0.274,-1.663,0.098


Model adj. R^2: 0.207, R^2: 0.271, N. obs: 212, F-statistic: 4.239


### Trying Majors and Nonmajors Separately

In [105]:
#create a df of only non-CS majors
non_cs_majors = df[df['CSMajor'] == 0]

#create a df of only CS majors
cs_majors = df[df['CSMajor'] == 1]

#length of each 
len(non_cs_majors), len(cs_majors)

#print lengths
print(len(non_cs_majors), len(cs_majors))



119 99


In [106]:
print("percent non majors: ", len(non_cs_majors)/len(df))
print("percent majors: ", len(cs_majors)/len(df))

percent non majors:  0.5458715596330275
percent majors:  0.4541284403669725


In [107]:
#for non-CS majors only 
eq = 'Interest ~ Mastery + Normative + Appearance + Prior_Knowledge + CSMajor + SelfEfficacy + ProgIntelligenceBelief + CanDoThisTask'
non_cs_majors
display_model_info(eq, non_cs_majors)

#note: results are same when we remove CSMajor from the equation, which makes sense. 


Equation:  Interest ~ Mastery + Normative + Appearance + Prior_Knowledge + CSMajor + SelfEfficacy + ProgIntelligenceBelief + CanDoThisTask


PatsyError: Error evaluating factor: NameError: name 'ProgIntelligenceBelief' is not defined
    Interest ~ Mastery + Normative + Appearance + Prior_Knowledge + CSMajor + SelfEfficacy + ProgIntelligenceBelief + CanDoThisTask
                                                                                             ^^^^^^^^^^^^^^^^^^^^^^

In [None]:
eq = 'Interest ~ Mastery + Normative + Appearance + Prior_Knowledge + ProgIntelligenceBelief'
display_model_info(eq, non_cs_majors)

In [None]:
#for CS majors only 
eq = 'Interest ~ Mastery + Normative + Appearance + Prior_Knowledge + CSMajor + SelfEfficacy + ProgIntelligenceBelief'
display_model_info(eq, cs_majors)
#This is strange. Why is CS major predictive at all if they're all CS majors? 

#note: results are same when we remove CSMajor from the equation, which makes sense. 



In [None]:
#for CS majors only 
eq = 'Interest ~ Mastery + Normative + Appearance + Prior_Knowledge  + SelfEfficacy + ProgIntelligenceBelief'
display_model_info(eq, cs_majors)
#This is strange. Why is CS major predictive at all if they're all CS majors? 

#note: results are same when we remove CSMajor from the equation, which makes sense. 



In [None]:
eq = 'Interest ~ Mastery + Normative + Appearance + Prior_Knowledge + ProgIntelligenceBelief'
display_model_info(eq, cs_majors)

In [None]:
eq1 = 'Interest ~ Mastery + Normative + Appearance + Prior_Knowledge + CSMajor + SelfEfficacy + ProgIntelligenceBelief + ConfidentIndependentProgramming + GenAIHelpsProgramming + GenAIHelpsProblemSolving + CopilotPerceptions'
display_model_info(eq, df)


In [None]:
eq = 'Interest ~ Mastery + Normative + Appearance + Prior_Knowledge + CSMajor + SelfEfficacy + ProgIntelligenceBelief + ReadCopilotOutput*SelfEfficacy + TestCopilotOutput*SelfEfficacy'
display_model_info(eq, df)


In [None]:
eq = 'Interest ~ ReadCopilotOutput + TestCopilotOutput + ReadCopilotOutput:TestCopilotOutput +CopilotPerceptions+ Prior_Knowledge + CSMajor + SelfEfficacy'
display_model_info(eq, df)


## Predicting Other Things

In [None]:
eq = 'ReadCopilotOutput ~  Prior_Knowledge + CSMajor + SelfEfficacy + Mastery + Normative + Appearance + Interest'
display_model_info(eq, df)



In [None]:
eq = 'TestCopilotOutput ~ Prior_Knowledge + CSMajor + SelfEfficacy + Mastery + Normative + Appearance + Interest'
display_model_info(eq, df)



In [None]:
eq = 'CopilotPerceptions ~ Prior_Knowledge + CSMajor + SelfEfficacy + Mastery + Normative + Appearance + Interest'
display_model_info(eq, df)



In [None]:
indepedent_programming = 'ConfidentIndependentProgramming ~ Prior_Knowledge + CSMajor + SelfEfficacy + Mastery + Normative + Appearance + Interest'
display_model_info(eq, df)



# Other possible things we might care about 
### Pre survey 
- belief that math ability is important: 'I think my ability to succeed in CSE 8A will be related to my mathematical skills or experience.' Strongly disagree to Strongly agree (1-6)

### End survey 
- How comfortable or uncomfortable are you in using GenAI tools to program?
    1: Not at all comfortable
    2: Not comfortable
    3: Neutral
    4: Comfortable
    5: Strongly comfortable
- How certain or uncertain are you that you understand the code you write using GenAI tools?
    1: Not at all certain
    2: Not certain
    3: Neutral
    4: Certain
    5: Strongly certain
- If I had to guess, professional programmers: 
    - rarely use Copilot or similar tools
    - use Copilot or similar tools sometimes
    - use Copilot or similar tools routinely
    - use Copilot or similar tools  everyday 
- Progintelligencebelief end of quarter vs pre quarter