In [4]:
import pandas as pd
import altair as alt
data=pd.read_csv("study_performance.csv")
data.head()
#def categorize_score(score):
    #if score >= 90:
        #return 'A'
    #elif score >= 80:
        #return 'B'
    #elif score >= 70:
        #return 'C'
    #elif score >= 60:
        #return 'D'
    #else:
        #return 'F'
data_long = data.melt(
    id_vars=['gender', 'race_ethnicity', 'parental_level_of_education', 'lunch', 'test_preparation_course'],
    value_vars=['math_score', 'reading_score', 'writing_score'],
    var_name='subject',
    value_name='score'
)
#data_long['Grade']= data_long['score'].apply(categorize_score)
data_long.head()

Unnamed: 0,gender,race_ethnicity,parental_level_of_education,lunch,test_preparation_course,subject,score
0,female,group B,bachelor's degree,standard,none,math_score,72
1,female,group C,some college,standard,completed,math_score,69
2,female,group B,master's degree,standard,none,math_score,90
3,male,group A,associate's degree,free/reduced,none,math_score,47
4,male,group C,some college,standard,none,math_score,76


In [5]:
# Create a selection dropdown to choose the subject
subject_input = alt.binding_select(options=['math_score', 'reading_score', 'writing_score'], name='Select Subject: ')
subject_selection = alt.selection_point(fields=['subject'], bind=subject_input, clear='none')

# Create the scatter plot
scatter_plot = alt.Chart(data_long).mark_point().encode(
    x=alt.X('score:Q', title='Score'),
    y=alt.Y('test_preparation_course:N', title='Test Preparation Completion'),
    color=alt.Color('test_preparation_course:N', legend=alt.Legend(title="Test Prep Completion")),
    tooltip=['gender', 'race_ethnicity', 'parental_level_of_education', 'lunch', 'score']
).properties(
    width=1000,
    height=300,
    title="Correlation between Test Preparation Completion and Student Scores"
).add_params(
    subject_selection
).transform_filter(
    subject_selection
)
density_plot = alt.Chart(data_long).transform_filter(
    subject_selection  # Apply the filter based on the dropdown selection
).transform_density(
    density='score',  # Adjust this if your data structure requires a different setup
    bandwidth=5,
    groupby=['test_preparation_course'],
    as_=['score', 'density']
).mark_area(opacity=0.5).encode(
    x='score:Q',
    y='density:Q',
    color='test_preparation_course:N'
).properties(
    width=600,
    height=300,
    title="Density of Scores by Test Preparation Completion"
).add_params(
    subject_selection
)

combined_chart = alt.vconcat(scatter_plot, density_plot)  # For vertical concatenation

# To save the combined chart as an HTML file
combined_chart.save('combined_chart.html')
scatter_plot | density_plot

In [6]:
average_scores= data_long.groupby(['parental_level_of_education', 'subject', 'race_ethnicity']).score.mean().reset_index()
average_scores_all=average_scores.copy()
average_scores_all['race_ethnicity']='All'
average_scores_combined=pd.concat([average_scores, average_scores_all])
ethnicity_input = alt.binding_select(options=['All']+list(average_scores['race_ethnicity'].unique()), name='Race/Ethnicity: ')
ethnicity_selection = alt.selection_point(fields=['race_ethnicity'], bind=ethnicity_input)

# Create the bar chart
bar_chart = alt.Chart(average_scores_combined).mark_bar().encode(
    x=alt.X('parental_level_of_education:N', title='Parental Level of Education', sort=None),
    y=alt.Y('score:Q', title='Average Score', axis=alt.Axis(title='Average Score per Subject')),
    color=alt.Color('subject:N', legend=alt.Legend(title="Subject")),
    tooltip=['parental_level_of_education', 'subject', 'score', 'race_ethnicity']
).transform_filter(
    ethnicity_selection
).properties(
    title="Average Scores by Parental Level of Education, Subject, and Race/Ethnicity",
    width=600,
    height=400
).add_params(
    ethnicity_selection
)

bar_chart
bar_chart.save('barchart.html', embed_options={'renderer':'svg'})
bar_chart

In [8]:
parallel_data = data.copy()


for col in ['math_score', 'reading_score', 'writing_score']:
    parallel_data[col] = (parallel_data[col] - parallel_data[col].min()) / (parallel_data[col].max() - parallel_data[col].min())


parallel_data = parallel_data.melt(id_vars=['gender', 'race_ethnicity', 'parental_level_of_education', 'lunch', 'test_preparation_course'],
                                   value_vars=['math_score', 'reading_score', 'writing_score'],
                                   var_name='Subject', value_name='Normalized Score')
parallel_data= parallel_data.replace('none', 'not completed')
input_widget = alt.binding_select(options=['All', 'not completed', 'completed'], name='Test Prep')
selection = alt.selection_point(fields=['test_preparation_course'], bind=input_widget, name="Test Prep")

base = alt.Chart(parallel_data).encode(
    x='Subject:N',
    y='Normalized Score:Q',
    color=alt.Color('parental_level_of_education:N', title= 'Parental Level of Education'),
    detail='index:N',
    opacity=alt.condition('datum.test_preparation_course == Test_Prep.test_preparation_course || Test_Prep.test_preparation_course == "both"', alt.value(0.9), alt.value(0.2))
).add_params(
    selection
)


lines = base.mark_line().interactive()

points = base.mark_point().encode(
    opacity=alt.condition('datum.test_preparation_course == Test_Prep.test_preparation_course || Test_Prep.test_preparation_course == "All"', alt.value(1), alt.value(0)),
    tooltip=['gender', 'race_ethnicity', 'lunch', 'test_preparation_course', 'Subject', 'Normalized Score']
).properties(
    width=600,
    height=400
)


parallel_plot = lines + points
parallel_plot.save('parallelplot.html', embed_options={'renderer':'svg'})
parallel_plot.display()

Collecting voila
  Obtaining dependency information for voila from https://files.pythonhosted.org/packages/84/ba/f0afb96e99418fbe0930ad6066eddb1e8c6d0b3c368c42723fbd8bb24d98/voila-0.5.6-py3-none-any.whl.metadata
  Downloading voila-0.5.6-py3-none-any.whl.metadata (9.1 kB)
Collecting jupyter-server<3,>=2.0.0 (from voila)
  Obtaining dependency information for jupyter-server<3,>=2.0.0 from https://files.pythonhosted.org/packages/07/46/6bb926b3bf878bf687b952fb6a4c09d014b4575a25960f2cd1a61793763f/jupyter_server-2.14.0-py3-none-any.whl.metadata
  Downloading jupyter_server-2.14.0-py3-none-any.whl.metadata (8.4 kB)
Collecting websockets>=9.0 (from voila)
  Obtaining dependency information for websockets>=9.0 from https://files.pythonhosted.org/packages/d1/40/6b169cd1957476374f51f4486a3e85003149e62a14e6b78a958c2222337a/websockets-12.0-cp311-cp311-win_amd64.whl.metadata
  Downloading websockets-12.0-cp311-cp311-win_amd64.whl.metadata (6.8 kB)
Collecting jupyter-events>=0.9.0 (from jupyter-serv

SyntaxError: invalid syntax (3250007675.py, line 1)