In [1]:
print('Yash')

Yash


In [11]:
# Import necessary libraries
%pip install "nbformat>=4.2.0" -q
# Make sure to install all required packages
%pip install pandas matplotlib seaborn numpy plotly ipywidgets -q

# Force a restart of the kernel to ensure installed packages are properly loaded
import IPython
try:
    import nbformat
    if nbformat.__version__ < '4.2.0':
        print(f"Current nbformat version: {nbformat.__version__}, upgrading...")
        %pip install --upgrade "nbformat>=4.2.0" -q
        IPython.Application.instance().kernel.do_shutdown(True)
except (ImportError, AttributeError):
    %pip install --upgrade "nbformat>=4.2.0" -q
    IPython.Application.instance().kernel.do_shutdown(True)

# Import after installation to ensure they're available
import nbformat
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import ipywidgets as widgets
from IPython.display import display

# Set style for better visualizations - this makes our graphs look profesional
plt.style.use('ggplot')  # ggplot style is clean and looks good in reports
sns.set(font_scale=1.2)  # Bigger text is better for presentations

# Load the data - VLE stands for Virtual Learning Enviornment
print("Loading data from the Virtual Learning Environment...")
activity_log = pd.read_csv('vle/vle_log.csv')
grades = pd.read_csv('vle/vle_grades.csv')

# Initial characterization of the activity log data
print("Activity Log Data Overview:")
print(f"Number of records: {len(activity_log)}")
print(f"Date range: {activity_log['date'].min()} to {activity_log['date'].max()}")
print(f"Number of unique students: {activity_log['student_id'].nunique()}")
print(f"Types of activities: {activity_log['activity'].unique()}")

# Count activities by type
activity_counts = activity_log['activity'].value_counts()
print("\nActivity Counts:")
print(activity_counts)

# Characterize the grades data
print("\nGrades Data Overview:")
print(f"Number of students with grades: {len(grades)}")
print(f"Grade distribution:")
grade_counts = grades['final_grade'].value_counts().sort_index()
print(grade_counts)

# I am choosing plotly here because it gives interactive graphs that help in better analysis
# and I can hover over data points to see exact values

# Interactive Grade Distribution Bar Chart
fig = px.bar(
    x=grade_counts.index,
    y=grade_counts.values,
    color=grade_counts.index,
    labels={'x': 'Grade', 'y': 'Number of Students'},
    title='Distribution of Student Grades',
    color_discrete_sequence=px.colors.sequential.Viridis
)
fig.update_layout(
    title_font_size=20,
    xaxis_title_font_size=16,
    yaxis_title_font_size=16,
    showlegend=False
)

# Adding text annotations for the count values
for i, count in enumerate(grade_counts.values):
    fig.add_annotation(
        x=list(grade_counts.index)[i],
        y=count,
        text=str(count),
        showarrow=False,
        yshift=10
    )

fig.show()

# Interactive Pie Chart for Grade Distribution
# I chose pie chart because it shows proportion very clearly which is important for grade analysis
fig_pie = px.pie(
    names=grade_counts.index,
    values=grade_counts.values,
    title='Proportion of Student Grades',
    color=grade_counts.index,
    color_discrete_sequence=px.colors.sequential.Viridis
)
fig_pie.update_layout(
    title_font_size=20
)
fig_pie.update_traces(
    textposition='inside',
    textinfo='percent+label'
)
fig_pie.show()

# Additional characterization: activities per student
activities_per_student = activity_log.groupby('student_id').size()
print("\nActivities per student:")
print(f"Min: {activities_per_student.min()}")
print(f"Max: {activities_per_student.max()}")
print(f"Mean: {activities_per_student.mean():.2f}")
print(f"Median: {activities_per_student.median()}")

# Interactive Histogram for Activities per Student
# Using histogram with slider because the distribution is quite wide and it helps to zoom in
fig_hist = px.histogram(
    x=activities_per_student.values,
    nbins=50,
    labels={'x': 'Number of Activities', 'y': 'Number of Students'},
    title='Distribution of Activity Count per Student',
    color_discrete_sequence=['#636EFA']
)
fig_hist.update_layout(
    title_font_size=20,
    xaxis_title_font_size=16, 
    yaxis_title_font_size=16
)

# Add KDE line to histogram
kde_x = np.linspace(activities_per_student.min(), activities_per_student.max(), 1000)
kde = sns.kdeplot(activities_per_student).get_lines()[0].get_data()
fig_hist.add_trace(
    go.Scatter(
        x=kde[0],
        y=kde[1] * activities_per_student.shape[0] * (activities_per_student.max() - activities_per_student.min()) / 50,
        mode='lines',
        name='KDE',
        line=dict(color='#EF553B', width=3)
    )
)
fig_hist.show()

# Create an interactive widget to compare activities vs grades
# This is very useful to see if more activity correlates with better grades
student_activities = pd.DataFrame({
    'student_id': activities_per_student.index,
    'activity_count': activities_per_student.values
})
student_data = student_activities.merge(grades, on='student_id')

def plot_by_grade(selected_grades):
    filtered_data = student_data[student_data['final_grade'].isin(selected_grades)]
    fig = px.box(
        filtered_data,
        x='final_grade', 
        y='activity_count',
        color='final_grade',
        title='Activity Count by Grade',
        labels={'final_grade': 'Grade', 'activity_count': 'Number of Activities'},
        color_discrete_sequence=px.colors.sequential.Viridis
    )
    fig.update_layout(
        title_font_size=20,
        xaxis_title_font_size=16,
        yaxis_title_font_size=16,
        showlegend=False
    )
    return fig

# Create checkboxes for grade selection
# I'm using checkboxes rather than dropdown because we need to select multiple grades at once
grade_options = ['fail', 'pass', 'merit', 'distinction']
grade_widgets = widgets.SelectMultiple(
    options=grade_options,
    value=grade_options,
    description='Select Grades:',
    disabled=False
)

# Interactive output
out = widgets.Output()
print("Select grades to compare their activity distributions:")
display(grade_widgets, out)

@out.capture()
def update_plot(*args):
    with out:
        out.clear_output()
        selected = grade_widgets.value
        if not selected:
            print("Please select at least one grade.")
            return
        fig = plot_by_grade(selected)
        fig.show()

grade_widgets.observe(update_plot, names='value')
update_plot()

# Calculate correlation between activity count and grades (numeric conversion)
# This helps us quantify the relationship we're observing in the graphs
print("\nAnalyzing relationship between activity and performance...")
grade_to_num = {'fail': 0, 'pass': 1, 'merit': 2, 'distinction': 3}
student_data['grade_numeric'] = student_data['final_grade'].map(grade_to_num)
correlation = student_data['activity_count'].corr(student_data['grade_numeric'])
print(f"Correlation between activity count and grade level: {correlation:.3f}")
if correlation > 0.5:
    print("This shows a strong positive relationship between activity and grades!")
elif correlation > 0.3:
    print("This shows a moderate positive relationship between activity and grades.")
else:
    print("The relationship between activity and grades appears to be weak.")


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.0.1[0m[39;49m -> [0m[32;49m25.0.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.0.1[0m[39;49m -> [0m[32;49m25.0.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.
Loading data from the Virtual Learning Environment...
Activity Log Data Overview:
Number of records: 33633
Date range: 2024-09-09 to 2024-11-29
Number of unique students: 300
Types of activities: ['module_visit' 'topic_visit' 'lab_material_download' 'forum_post_reply'
 'video_lecture_view' 'quiz_attempt' 'external_link_click']

Activity Counts:
activity
module

ValueError: Mime type rendering requires nbformat>=4.2.0 but it is not installed