# ELA NWEA Spring Prediction Model

In [None]:
import numpy as np
import pandas as pd

## Data Preparation

First the NWEA data for the Fall and Winter NWEA will be uploaded and joined using an outer join in order to keep any student who took a test.

In [None]:
data = pd.read_excel (r"C:\Users\derek.castleman\Desktop\Testing Data\NWEA History\NWEA 2016-2022\NWEA 2021-22\Fall 2021-2022\AssessmentResults 9-13-21.xlsx") # Insert Fall file pathway between the parenthesis with a csv file
data

In [None]:
data = data.loc[data['Subject'] == 'Language Arts'] #Selects the Language Arts test
data

In [None]:
fall = data[['TermName', 'StudentID', 'TestStartTime', 'TestDurationMinutes', 'TestRITScore', 
             'TestPercentile', 'PercentCorrect', 'RapidGuessingPercentage', 'Grade']] #Selects the needed data
fall

In [None]:
data = pd.read_csv (r"C:\Users\derek.castleman\Desktop\Testing Data\NWEA History\NWEA 2016-2022\NWEA 2021-22\Winter 2021-2022\AssessmentResults.csv") #Insert pathway of the Winter NWEA test
data

In [None]:
data = data.loc[data['Subject'] == 'Language Arts'] #Selects Language Arts test
data

In [None]:
winter = data[['TermName','StudentID', 'TestStartTime', 'TestDurationMinutes', 
               'TestRITScore', 'TestPercentile', 'PercentCorrect', 'RapidGuessingPercentage', 'Grade']] #Needed features
winter

In [None]:
NWEA_Data = pd.merge(fall, winter, how='outer', 
                            left_on=['StudentID'], right_on=['StudentID']) #Joins the Fall and Winter data
NWEA_Data

## Fixing Missing Tests

For students that missed one of the NWEA tests their score for the missing exam will be filled in using the growth chart that is provided by NWEA for what should be expected for their scores based on the percentile that they were found to be in. Therefore, the percentile level they are at for the one test that they took will be then used to fill in the missing score.

__*The excel file that contains the scores for each grade level and percentile will have to be uploaded at the point in which it is asked.*__

The missing scores will be filled in one grade level at a time since the scores for each percentile vary by the grade in which the student is. The column will be changed to the name Fall Test.

### Fall Scores

In [None]:
full_data = NWEA_Data[NWEA_Data['TestRITScore_x'].isna()] #Finding students missing the Fall test.
full_data

In [None]:
fall_data = pd.read_excel (r"C:\Users\derek.castleman\Desktop\Testing Data\ML NWEA SBAC\Fall ELA NWEA.xlsx") #Upload excel file pathway for ELA NWEA Fall scores
fall_data

Now each grade level will be separated, have the missing scores filled in, then combined at the end.

In [None]:
k = full_data.loc[full_data['Grade'] == 'K']
k

In [None]:
k = k.merge(fall_data[[0, 'TestPercentile_y']],on='TestPercentile_y',how="left")

k['TestRITScore_x'] = k['TestRITScore_x'].fillna(k[0])

k.drop([0], inplace=True, axis=1)
k.rename(columns={'TestRITScore_x':'Fall Test'},inplace=True)

In [None]:
k

In [None]:
first= full_data.loc[full_data['Grade'] == '1']
first= first.merge(fall_data[[1, 'TestPercentile_y']],on='TestPercentile_y',how="left")

first['TestRITScore_x'] =first['TestRITScore_x'].fillna(first[1])

first.drop([1], inplace=True, axis=1)
first.rename(columns={'TestRITScore_x':'Fall Test'},inplace=True)
first

In [None]:
second = full_data.loc[full_data['Grade'] == '2']
second = second.merge(fall_data[[2, 'TestPercentile_y']],on='TestPercentile_y',how="left")

second['TestRITScore_x'] =second['TestRITScore_x'].fillna(second[2])

second.drop([2], inplace=True, axis=1)
second.rename(columns={'TestRITScore_x':'Fall Test'},inplace=True)
second

In [None]:
third = full_data.loc[full_data['Grade'] == '3']
third = third.merge(fall_data[[3, 'TestPercentile_y']],on='TestPercentile_y',how="left")

third['TestRITScore_x'] =third['TestRITScore_x'].fillna(third[3])

third.drop([3], inplace=True, axis=1)
third.rename(columns={'TestRITScore_x':'Fall Test'},inplace=True)
third

In [None]:
fourth = full_data.loc[full_data['Grade'] == '4']
fourth = fourth.merge(fall_data[[4, 'TestPercentile_y']],on='TestPercentile_y',how="left")

fourth['TestRITScore_x'] =fourth['TestRITScore_x'].fillna(fourth[4])

fourth.drop([4], inplace=True, axis=1)
fourth.rename(columns={'TestRITScore_x':'Fall Test'},inplace=True)
fourth

In [None]:
fifth = full_data.loc[full_data['Grade'] == '5']
fifth = fifth.merge(fall_data[[5, 'TestPercentile_y']],on='TestPercentile_y',how="left")

fifth['TestRITScore_x'] =fifth['TestRITScore_x'].fillna(fifth[5])

fifth.drop([5], inplace=True, axis=1)
fifth.rename(columns={'TestRITScore_x':'Fall Test'},inplace=True)
fifth

In [None]:
sixth = full_data.loc[full_data['Grade'] == '6']
sixth = sixth.merge(fall_data[[6, 'TestPercentile_y']],on='TestPercentile_y',how="left")

sixth['TestRITScore_x'] =sixth['TestRITScore_x'].fillna(sixth[6])

sixth.drop([6], inplace=True, axis=1)
sixth.rename(columns={'TestRITScore_x':'Fall Test'},inplace=True)
sixth

In [None]:
seventh = full_data.loc[full_data['Grade'] == '7']
seventh = seventh.merge(fall_data[[7, 'TestPercentile_y']],on='TestPercentile_y',how="left")

seventh['TestRITScore_x'] =seventh['TestRITScore_x'].fillna(seventh[7])

seventh.drop([7], inplace=True, axis=1)
seventh.rename(columns={'TestRITScore_x':'Fall Test'},inplace=True)
seventh

In [None]:
eighth = full_data.loc[full_data['Grade'] == '8']
eighth = eighth.merge(fall_data[[8, 'TestPercentile_y']],on='TestPercentile_y',how="left")

eighth['TestRITScore_x'] =eighth['TestRITScore_x'].fillna(eighth[8])

eighth.drop([8], inplace=True, axis=1)
eighth.rename(columns={'TestRITScore_x':'Fall Test'},inplace=True)
eighth

In [None]:
ninth = full_data.loc[full_data['Grade'] == '9']
ninth = ninth.merge(fall_data[[9, 'TestPercentile_y']],on='TestPercentile_y',how="left")

ninth['TestRITScore_x'] =ninth['TestRITScore_x'].fillna(ninth[9])

ninth.drop([9], inplace=True, axis=1)
ninth.rename(columns={'TestRITScore_x':'Fall Test'},inplace=True)
ninth

In [None]:
tenth = full_data.loc[full_data['Grade'] == '10']
tenth = tenth.merge(fall_data[[10, 'TestPercentile_y']],on='TestPercentile_y',how="left")

tenth['TestRITScore_x'] =tenth['TestRITScore_x'].fillna(tenth[10])

tenth.drop([10], inplace=True, axis=1)
tenth.rename(columns={'TestRITScore_x':'Fall Test'},inplace=True)
tenth

In [None]:
eleventh = full_data.loc[full_data['Grade'] == '11']
eleventh = eleventh.merge(fall_data[[11, 'TestPercentile_y']],on='TestPercentile_y',how="left")

eleventh['TestRITScore_x'] =eleventh['TestRITScore_x'].fillna(eleventh[11])

eleventh.drop([11], inplace=True, axis=1)
eleventh.rename(columns={'TestRITScore_x':'Fall Test'},inplace=True)
eleventh

In [None]:
twelfth = full_data.loc[full_data['Grade'] == '12']
twelfth = twelfth.merge(fall_data[[12, 'TestPercentile_y']],on='TestPercentile_y',how="left")

twelfth['TestRITScore_x'] =twelfth['TestRITScore_x'].fillna(twelfth[12])

twelfth.drop([12], inplace=True, axis=1)
twelfth.rename(columns={'TestRITScore_x':'Fall Test'},inplace=True)
twelfth

In [None]:
frames = [k, first, second, third, fourth, fifth, sixth, seventh, eighth, ninth, tenth, eleventh, twelfth]
full_data_fall = pd.concat(frames) # Combing all the grade levels

In [None]:
# Renaming column to Winter Test so it matches as others are changed
full_data_fall.rename(columns = {'TestRITScore_y':'Winter Test'}, inplace = True)

In [None]:
full_data_fall

### Winter Scores

The same steps will now happen for students that mised the Winter Test.

__*Remember to put the excel file for Winter NWEA chart in the proper place*__

In [None]:
full_data = NWEA_Data[NWEA_Data['TestRITScore_y'].isna()]
full_data

In [None]:
winter_data = pd.read_excel (r"") #Insert pathway for Winter NWEA test scores here
winter_data

In [None]:
k = full_data.loc[full_data['Grade'] == 'K']
k

In [None]:
k = k.merge(winter_data[[0, 'TestPercentile_x']],on='TestPercentile_x',how="left")

k['TestRITScore_y'] = k['TestRITScore_y'].fillna(k[0])

k.drop([0], inplace=True, axis=1)
k.rename(columns={'TestRITScore_y':'Winter Test'},inplace=True)


In [None]:
k

In [None]:
first= full_data.loc[full_data['Grade'] == '1']
first= first.merge(winter_data[[1, 'TestPercentile_x']],on='TestPercentile_x',how="left")

first['TestRITScore_y'] =first['TestRITScore_y'].fillna(first[1])

first.drop([1], inplace=True, axis=1)
first.rename(columns={'TestRITScore_y':'Winter Test'},inplace=True)
first

In [None]:
second = full_data.loc[full_data['Grade'] == '2']
second = second.merge(winter_data[[2, 'TestPercentile_x']],on='TestPercentile_x',how="left")

second['TestRITScore_y'] =second['TestRITScore_y'].fillna(second[2])

second.drop([2], inplace=True, axis=1)
second.rename(columns={'TestRITScore_y':'Winter Test'},inplace=True)
second

In [None]:
third = full_data.loc[full_data['Grade'] == '3']
third = third.merge(winter_data[[3, 'TestPercentile_x']],on='TestPercentile_x',how="left")

third['TestRITScore_y'] =third['TestRITScore_y'].fillna(third[3])

third.drop([3], inplace=True, axis=1)
third.rename(columns={'TestRITScore_y':'Winter Test'},inplace=True)
third

In [None]:
fourth = full_data.loc[full_data['Grade'] == '4']
fourth = fourth.merge(winter_data[[4, 'TestPercentile_x']],on='TestPercentile_x',how="left")

fourth['TestRITScore_y'] =fourth['TestRITScore_y'].fillna(fourth[4])

fourth.drop([4], inplace=True, axis=1)
fourth.rename(columns={'TestRITScore_y':'Winter Test'},inplace=True)
fourth

In [None]:
fifth = full_data.loc[full_data['Grade'] == '5']
fifth = fifth.merge(winter_data[[5, 'TestPercentile_x']],on='TestPercentile_x',how="left")

fifth['TestRITScore_y'] =fifth['TestRITScore_y'].fillna(fifth[5])

fifth.drop([5], inplace=True, axis=1)
fifth.rename(columns={'TestRITScore_y':'Winter Test'},inplace=True)
fifth

In [None]:
sixth = full_data.loc[full_data['Grade'] == '6']
sixth = sixth.merge(winter_data[[6, 'TestPercentile_x']],on='TestPercentile_x',how="left")

sixth['TestRITScore_y'] =sixth['TestRITScore_y'].fillna(sixth[6])

sixth.drop([6], inplace=True, axis=1)
sixth.rename(columns={'TestRITScore_y':'Winter Test'},inplace=True)
sixth

In [None]:
seventh = full_data.loc[full_data['Grade'] == '7']
seventh = seventh.merge(winter_data[[7, 'TestPercentile_x']],on='TestPercentile_x',how="left")

seventh['TestRITScore_y'] =seventh['TestRITScore_y'].fillna(seventh[7])

seventh.drop([7], inplace=True, axis=1)
seventh.rename(columns={'TestRITScore_y':'Winter Test'},inplace=True)
seventh

In [None]:
eighth = full_data.loc[full_data['Grade'] == '8']
eighth = eighth.merge(winter_data[[8, 'TestPercentile_x']],on='TestPercentile_x',how="left")

eighth['TestRITScore_y'] =eighth['TestRITScore_y'].fillna(eighth[8])

eighth.drop([8], inplace=True, axis=1)
eighth.rename(columns={'TestRITScore_y':'Winter Test'},inplace=True)
eighth

In [None]:
ninth = full_data.loc[full_data['Grade'] == '9']
ninth = ninth.merge(winter_data[[9, 'TestPercentile_x']],on='TestPercentile_x',how="left")

ninth['TestRITScore_y'] =ninth['TestRITScore_y'].fillna(ninth[9])

ninth.drop([9], inplace=True, axis=1)
ninth.rename(columns={'TestRITScore_y':'Winter Test'},inplace=True)
ninth

In [None]:
tenth = full_data.loc[full_data['Grade'] == '10']
tenth = tenth.merge(winter_data[[10, 'TestPercentile_x']],on='TestPercentile_x',how="left")

tenth['TestRITScore_y'] =tenth['TestRITScore_y'].fillna(tenth[10])

tenth.drop([10], inplace=True, axis=1)
tenth.rename(columns={'TestRITScore_y':'Winter Test'},inplace=True)
tenth

In [None]:
eleventh = full_data.loc[full_data['Grade'] == '11']
eleventh = eleventh.merge(winter_data[[11, 'TestPercentile_x']],on='TestPercentile_x',how="left")

eleventh['TestRITScore_y'] =eleventh['TestRITScore_y'].fillna(eleventh[11])

eleventh.drop([11], inplace=True, axis=1)
eleventh.rename(columns={'TestRITScore_y':'Winter Test'},inplace=True)
eleventh

In [None]:
twelfth = full_data.loc[full_data['Grade'] == '12']
twelfth = twelfth.merge(winter_data[[12, 'TestPercentile_x']],on='TestPercentile_x',how="left")

twelfth['TestRITScore_y'] =twelfth['TestRITScore_y'].fillna(twelfth[12])

twelfth.drop([12], inplace=True, axis=1)
twelfth.rename(columns={'TestRITScore_y':'Winter Test'},inplace=True)
twelfth

In [None]:
frames = [k, first, second, third, fourth, fifth, sixth, seventh, eighth, ninth, tenth, eleventh, twelfth]
full_data_winter = pd.concat(frames)

In [None]:
full_data_winter.rename(columns = {'TestRITScore_x':'Fall Test'}, inplace = True)

In [None]:
full_data_winter

### Students with both tests

Students that took both tests will be selected so that the column names can be renamed and then allow them to be combined with the other two dataframes that were generated for students missing Fall and Winter tests to produce one final dataframe that has all Fall and Winter test score for students.

In [None]:
# Selecting students that have all tests
full_data_students = NWEA_Data.loc[NWEA_Data['TestRITScore_y'].notnull()]
full_data_students = full_data_students.loc[full_2022['TestRITScore_x'].notnull()]
full_data_students

In [None]:
# Renaming columns
full_data_students.rename(columns = {'TestRITScore_y':'Winter Test'}, inplace = True)
full_data_students.rename(columns = {'TestRITScore_x':'Fall Test'}, inplace = True)
full_data_students

In [None]:
#Putting all of the data together
frames = [full_data_students, full_data_winter, full_data_fall]
data_fixed = pd.concat(frames)
data_fixed = data_fixed.drop_duplicates(subset='StudentID', keep="first") #Dropping if duplicates exist

In [None]:
data_fixed.info() #Checking the data for what is there and how many values missing

## Student Information

Combing student information (ID, State ID, Gender and Language Fluency) that is taken from Aries in order to allow for combining of NWEA data with the ICA data.

In [None]:
students = pd.read_excel (r"C:\Users\derek.castleman\Desktop\Testing Data\ML NWEA SBAC\FInal Data\2021-2022 Student data.xlsx")
students

In [None]:
#An inner join on NWEA data with student data since State ID is necessary for ICA join
data_with_students = pd.merge(data_fixed, students, how='inner', left_on=['StudentID'], right_on=['Student ID'])
data_with_students

In [None]:
#Dropping any duplicated students
data_fixed = data_with_students.drop_duplicates(subset='StudentID', keep="first")
data_fixed

In [None]:
data_fixed.info()

## ICA Data

The ICA data for the students will first be selected by narrowing the dataframe down by Subject then Assessment Subtype.

The features that are wanted from the dataset will then be selected out.

An inner join with the ICA data will occur between the ICA dataframe and the NWEA one. An inner join is completed since there is no way in which to fill in the score for the student in the same fashon there was with students who had missed either the Fall or Winter test.

Basically, if a student has missed the ICA then there is not going to be a prediction for their Spring NWEA score from the model.

In [None]:
ica = pd.read_excel (r"C:\Users\derek.castleman\Desktop\Testing Data\ML NWEA SBAC\FInal Data\ELAICA2022.xlsx")
ica

In [None]:
ica =ica.loc[ica['Subject'] == 'ELA'] #Selects the Language Arts test
ica =ica.loc[ica['AssessmentSubType'] == 'ICA'] #Selects the ICA

In [None]:
#Selects the features that are needed from the data
ica = ica[['StudentIdentifier', 'AssessmentSubType', 'Subject', 'GradeLevelWhenAssessed', 'ScaleScoreAchievementLevel', 
             'ScaleScore', 'ScaleScoreStandardError', 'EnglishLanguageAcquisitionStatus']] #Selects the needed data

In [None]:
ica

In [None]:
#Inner join between the ICA data and the NWEA data
full_testing_data = pd.merge(data_fixed, ica, how='inner', 
                            left_on=['State Student ID'], right_on=['StudentIdentifier'])
full_testing_data

In [None]:
#Checking the data after the join.
full_testing_data.info()

## Missing Values and Data Transformation

Missing values from students who had their Fall or Winter test filled in have to be dealt with. Rapid Guessing and the percent correct will be the value that they obtained on the test they actually took since the score for the test is based on this as well.

Categorical (language fluency) and one hot encoding (gender) will be used on the features to turn them into numbers for use in the ML model.

In [None]:
testing_data = full_testing_data

### Language

In [None]:
#Languge fluency is selected
language = testing_data['LangFlu'].to_numpy()
language = language.reshape(-1, 1)

In [None]:
scale_categories = ['L', 'R', 'I', 'E', 'F'] #Selecting the order for the categories for ordinalencoder
from sklearn.preprocessing import OrdinalEncoder
ordinalencoder = OrdinalEncoder(categories=[scale_categories])

scale_fixed = ordinalencoder.fit_transform(language) #The data is transformed to numbers

In [None]:
scale_fixed

In [None]:
#Changed into a dataframe
scale_fixed_df = pd.DataFrame(scale_fixed, columns = ['Learner'])
scale_fixed_df

In [None]:
#Rejoined back with the original dataframe
fixed_data = testing_data.join(scale_fixed_df)
fixed_data

### Gender

In [None]:
gender = fixed_data['Gender'].to_numpy() #Selecting Gender
gender = gender.reshape(-1, 1) #Reshaping it for OneHotEncoder
gender

In [None]:
#Changing Gender to numbers
from sklearn.preprocessing import OneHotEncoder

onehotencoder = OneHotEncoder(drop = 'first', sparse = False) #Dropping the first column since it is redundant

student_gender = onehotencoder.fit_transform(gender)
student_gender

In [None]:
gender_df = pd.DataFrame(student_gender, columns = ['gender'])
gender_df

In [None]:
#Adding gender as a column to the dataframe
fixed_data['gender'] = gender_df
fixed_data

In [None]:
#Setting Rapid Guessing to 
fixed_data['RapidGuessingPercentage_x'].fillna(fixed_data['RapidGuessingPercentage_y'], inplace = True)
fixed_data['RapidGuessingPercentage_y'].fillna(fixed_data['RapidGuessingPercentage_x'], inplace = True)

In [None]:
fixed_data.info()

In [None]:
import base64
from IPython.display import HTML

def create_download_link( df, title = "Testing_Data", filename = "Testing_Data"):
    csv = df.to_csv()
    b64 = base64.b64encode(csv.encode())
    payload = b64.decode()
    html = '<a download="{filename}" href="data:text/csv;base64,{payload}" target="_blank">{title}</a>'
    html = html.format(payload=payload,title=title,filename=filename)
    return HTML(html)

create_download_link(fixed_data)

## Machine Learning

In [None]:
training_data = pd.read_csv (r"C:\Users\derek.castleman\Desktop\Testing Data\ML NWEA SBAC\FInal Data\Full_ELA_Training_Data.csv")
training_data

In [None]:
training_data['RapidGuessingPercentage_x'].fillna(training_data['RapidGuessingPercentage_y'], inplace = True)
training_data['RapidGuessingPercentage_y'].fillna(training_data['RapidGuessingPercentage_x'], inplace = True)

In [None]:
training_data.info()

In [None]:
values = {'RapidGuessingPercentage_x': 0, 'RapidGuessingPercentage_y': 0}
training_data = training_data.fillna(value=values)

In [None]:
training_data.info()

In [None]:
import base64
from IPython.display import HTML

def create_download_link( df, title = "Training_Data_ELA_Finale", filename = "Training_Data_ELA_Finale"):
    csv = df.to_csv()
    b64 = base64.b64encode(csv.encode())
    payload = b64.decode()
    html = '<a download="{filename}" href="data:text/csv;base64,{payload}" target="_blank">{title}</a>'
    html = html.format(payload=payload,title=title,filename=filename)
    return HTML(html)

create_download_link(training_data)

In [None]:
features = training_data[['Fall Test', 'Winter Test', 'Grade', "ScaleScore", 'gender', 'RapidGuessingPercentage_x',
                      'RapidGuessingPercentage_y', 'PercentCorrect_y', 'PercentCorrect_x']]
features

In [None]:
features.info()

In [None]:
target_values = training_data[['TestRITScore']]
target_values

In [None]:
from sklearn.preprocessing import StandardScaler

model_scaler = StandardScaler()
scaled_data = model_scaler.fit_transform(features)

In [None]:
from sklearn.model_selection import cross_val_score
def display_scores(mode, X_train, y_train):
    model_scores = cross_val_score(model, X_train, y_train,
                         scoring="neg_mean_squared_error", cv=5)
    scores = np.sqrt(-model_scores)
    print("Scores:", scores)
    print("Mean:", scores.mean())
    print("Standard deviation:", scores.std())

In [None]:
def display_scores_r2(mode, X_train, y_train):
    model_scores = cross_val_score(model, X_train, y_train,
                         scoring="r2", cv=5)
    scores = model_scores
    print("Scores:", scores)
    print("Mean:", scores.mean())
    print("Standard deviation:", scores.std())

In [None]:
X_train = features
y_train = target_values

In [None]:
from sklearn.linear_model import LinearRegression
model = LinearRegression()
display_scores(model, X_train, y_train)

In [None]:
from sklearn.linear_model import LinearRegression
model = LinearRegression()
display_scores_r2(model, X_train, y_train)

In [None]:
x = y_train.to_numpy() #Have to change training data into an array and the shape of it for use in model.
y_train = x.ravel()

In [None]:
from sklearn.ensemble import RandomForestRegressor
model = RandomForestRegressor()
display_scores(model, X_train, y_train)

In [None]:
display_scores_r2(model, X_train, y_train)

In [None]:
from sklearn.ensemble import GradientBoostingRegressor
model = GradientBoostingRegressor()
display_scores(model, X_train, y_train)

In [None]:
display_scores_r2(model, X_train, y_train)

In [None]:
from sklearn.linear_model import RidgeCV
model = RidgeCV()
display_scores(model, X_train, y_train)

In [None]:
display_scores_r2(model, X_train, y_train)

In [None]:
from sklearn.ensemble import StackingRegressor
from sklearn.linear_model import RidgeCV

In [None]:
estimators = [('lr', LinearRegression()),('gbr_reg', GradientBoostingRegressor()), ('rfr', RandomForestRegressor())]

In [None]:
model = StackingRegressor(estimators)
display_scores(model, X_train, y_train)

In [None]:
model.fit(X_train, y_train)
display_scores(model, X_train, y_train)

## Machine Learning Test

In [None]:
fixed_data = pd.read_csv (r"C:\Users\derek.castleman\Desktop\Testing Data\ML NWEA SBAC\FInal Data\Testing_Data_ELA_Finale.csv") 

In [None]:
fixed_data['PercentCorrect_x'].fillna(fixed_data['PercentCorrect_y'], inplace = True)
fixed_data

In [None]:
fixed_data['PercentCorrect_y'].fillna(fixed_data['PercentCorrect_x'], inplace = True)
fixed_data

In [None]:
features = fixed_data[['Fall Test', 'Winter Test', 'Grade', "ScaleScore", 'gender', 'RapidGuessingPercentage_x',
                      'RapidGuessingPercentage_y', 'PercentCorrect_x', 'PercentCorrect_y']]
features.info()

In [None]:
target_values = fixed_data[['TestRITScore']]
target_values

In [None]:
from sklearn.preprocessing import StandardScaler

model_scaler = StandardScaler()
scaled_data = model_scaler.fit_transform(features)

In [None]:
X_test = features
y_test = target_values

In [None]:
from sklearn.metrics import mean_squared_error
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
rmse

In [None]:
y_pred = pd.DataFrame(y_pred)
y_pred

In [None]:
import base64
from IPython.display import HTML

def create_download_link( df, title = "Y_pred", filename = "Y_pred"):
    csv = df.to_csv()
    b64 = base64.b64encode(csv.encode())
    payload = b64.decode()
    html = '<a download="{filename}" href="data:text/csv;base64,{payload}" target="_blank">{title}</a>'
    html = html.format(payload=payload,title=title,filename=filename)
    return HTML(html)

create_download_link(y_pred)

In [None]:
import base64
from IPython.display import HTML

def create_download_link( df, title = "Y_pred", filename = "Y_pred"):
    csv = df.to_csv()
    b64 = base64.b64encode(csv.encode())
    payload = b64.decode()
    html = '<a download="{filename}" href="data:text/csv;base64,{payload}" target="_blank">{title}</a>'
    html = html.format(payload=payload,title=title,filename=filename)
    return HTML(html)

create_download_link(y_test)