# Longitudinal Trends Analysis

This notebook analyzes temporal trends in Bangladesh educational data:
- Academic performance trends over time
- Enrollment and dropout patterns
- Seasonal variations in performance
- Grade progression analysis
- Institutional performance trends
- Regional development patterns

**Time Series Analysis Includes:**
- Multi-year performance tracking
- Cohort analysis
- Seasonal decomposition
- Trend identification and forecasting
- Change point detection
- Policy impact assessment

In [None]:
# Import required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from datetime import datetime, timedelta
import warnings
import sys
from pathlib import Path

# Time series analysis libraries
from scipy import stats
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures

# Add project root to Python path
sys.path.append('../..')
from src.data_processing.data_processor import DataProcessor

# Configure display options
pd.set_option('display.max_columns', None)
plt.style.use('seaborn-v0_8')
warnings.filterwarnings('ignore')

# Set up plotting
plt.rcParams['figure.figsize'] = (12, 8)
sns.set_palette('husl')

## 1. Generate Longitudinal Dataset

In [None]:
# Create longitudinal dataset spanning multiple years
np.random.seed(42)

# Define time parameters
start_year = 2018
end_year = 2023
years = list(range(start_year, end_year + 1))
n_students_per_year = 1500

# Create comprehensive longitudinal dataset
longitudinal_data = []

for year in years:
    # Add some yearly trends
    year_effect = (year - start_year) * 0.02  # Slight improvement over time
    covid_effect = -0.3 if year in [2020, 2021] else 0  # COVID-19 impact
    
    for month in [3, 6, 12]:  # Three assessment periods per year
        date = datetime(year, month, 15)
        
        # Generate student data for this time period
        for i in range(n_students_per_year // 3):  # Divide by 3 for three periods
            student_id = f'S{year}{month:02d}{i:04d}'
            
            # Add seasonal effects
            seasonal_effect = 0.1 if month == 12 else (0.05 if month == 6 else 0)
            
            # Base performance with trends
            base_gpa = np.random.normal(3.2, 0.8) + year_effect + covid_effect + seasonal_effect
            
            student_record = {
                'student_id': student_id,
                'assessment_date': date,
                'year': year,
                'month': month,
                'quarter': (month - 1) // 3 + 1,
                'academic_year': f'{year}-{year+1}',
                'division': np.random.choice(['Dhaka', 'Chittagong', 'Khulna', 'Rajshahi', 'Sylhet', 'Barishal', 'Rangpur', 'Mymensingh']),
                'institution_type': np.random.choice(['Government', 'Private', 'Madrasa'], p=[0.65, 0.25, 0.10]),
                'grade_level': np.random.choice([6, 7, 8, 9, 10, 11, 12]),
                'gender': np.random.choice(['Male', 'Female']),
                'socioeconomic_status': np.random.choice(['Low', 'Medium', 'High'], p=[0.45, 0.35, 0.20]),
                'area_type': np.random.choice(['Urban', 'Rural'], p=[0.35, 0.65]),
                'gpa': max(0, min(5, base_gpa)),
                'attendance_rate': np.random.beta(8, 2),
                'enrollment_status': np.random.choice(['Enrolled', 'Dropped', 'Transferred'], p=[0.92, 0.05, 0.03]),
                'bangla': np.random.normal(3.3, 0.8) + year_effect + covid_effect,
                'english': np.random.normal(3.0, 0.9) + year_effect + covid_effect,
                'mathematics': np.random.normal(2.9, 1.0) + year_effect + covid_effect,
                'science': np.random.normal(3.1, 0.9) + year_effect + covid_effect,
                'social_studies': np.random.normal(3.4, 0.7) + year_effect + covid_effect
            }
            
            # Clip subject scores
            for subject in ['bangla', 'english', 'mathematics', 'science', 'social_studies']:
                student_record[subject] = max(0, min(5, student_record[subject]))
            
            longitudinal_data.append(student_record)

# Convert to DataFrame
trend_data = pd.DataFrame(longitudinal_data)
trend_data['assessment_date'] = pd.to_datetime(trend_data['assessment_date'])
trend_data = trend_data.sort_values('assessment_date')

print(f"Longitudinal dataset created: {trend_data.shape}")
print(f"Time range: {trend_data['assessment_date'].min()} to {trend_data['assessment_date'].max()}")
trend_data.head()

## 2. Overall Performance Trends

In [None]:
# Calculate time-based aggregations
monthly_trends = trend_data.groupby(['year', 'month']).agg({
    'gpa': ['mean', 'std', 'count'],
    'attendance_rate': 'mean'
}).round(3)

yearly_trends = trend_data.groupby('year').agg({
    'gpa': ['mean', 'std', 'count'],
    'attendance_rate': 'mean'
}).round(3)

print("Yearly Performance Trends:")
print(yearly_trends)

# Create time series for visualization
time_series = trend_data.groupby('assessment_date').agg({
    'gpa': 'mean',
    'attendance_rate': 'mean',
    'student_id': 'count'
}).reset_index()

time_series.columns = ['date', 'avg_gpa', 'avg_attendance', 'student_count']

# Visualization
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(15, 12))

# GPA trend over time
ax1.plot(time_series['date'], time_series['avg_gpa'], marker='o', linewidth=2, markersize=6)
ax1.set_title('Average GPA Trend Over Time')
ax1.set_ylabel('Average GPA')
ax1.grid(True, alpha=0.3)
ax1.tick_params(axis='x', rotation=45)

# Add trend line
x_numeric = np.arange(len(time_series))
z = np.polyfit(x_numeric, time_series['avg_gpa'], 1)
p = np.poly1d(z)
ax1.plot(time_series['date'], p(x_numeric), "r--", alpha=0.8, label=f'Trend: {z[0]:.4f}/period')
ax1.legend()

# Attendance trend
ax2.plot(time_series['date'], time_series['avg_attendance'], marker='s', color='orange', linewidth=2, markersize=6)
ax2.set_title('Average Attendance Rate Trend')
ax2.set_ylabel('Average Attendance Rate')
ax2.grid(True, alpha=0.3)
ax2.tick_params(axis='x', rotation=45)

# Yearly comparison
yearly_gpa = trend_data.groupby('year')['gpa'].mean()
yearly_gpa.plot(kind='bar', ax=ax3, color='skyblue')
ax3.set_title('Average GPA by Year')
ax3.set_ylabel('Average GPA')
ax3.tick_params(axis='x', rotation=45)

# Seasonal patterns
seasonal_gpa = trend_data.groupby('month')['gpa'].mean()
seasonal_gpa.plot(kind='bar', ax=ax4, color='lightgreen')
ax4.set_title('Average GPA by Assessment Period')
ax4.set_xlabel('Month')
ax4.set_ylabel('Average GPA')
ax4.set_xticks([0, 1, 2])
ax4.set_xticklabels(['March', 'June', 'December'], rotation=0)

plt.tight_layout()
plt.show()

# Statistical trend analysis
slope, intercept, r_value, p_value, std_err = stats.linregress(x_numeric, time_series['avg_gpa'])
print(f"\nGPA Trend Analysis:")
print(f"Slope (change per period): {slope:.4f}")
print(f"R-squared: {r_value**2:.4f}")
print(f"P-value: {p_value:.4f}")
print(f"Significant trend: {'Yes' if p_value < 0.05 else 'No'}")

## 3. Subject-wise Trend Analysis

In [None]:
# Subject performance trends
subjects = ['bangla', 'english', 'mathematics', 'science', 'social_studies']

subject_trends = trend_data.groupby('assessment_date')[subjects].mean().reset_index()

# Calculate trend statistics for each subject
trend_stats = {}
x_numeric = np.arange(len(subject_trends))

for subject in subjects:
    slope, intercept, r_value, p_value, std_err = stats.linregress(x_numeric, subject_trends[subject])
    trend_stats[subject] = {
        'slope': slope,
        'r_squared': r_value**2,
        'p_value': p_value,
        'significant': p_value < 0.05
    }

print("Subject-wise Trend Analysis:")
for subject, stats_dict in trend_stats.items():
    print(f"\n{subject.title()}:")
    print(f"  Slope: {stats_dict['slope']:.4f}")
    print(f"  R²: {stats_dict['r_squared']:.4f}")
    print(f"  Significant: {stats_dict['significant']}")

# Visualization
fig, axes = plt.subplots(2, 3, figsize=(18, 12))
axes = axes.flatten()

for i, subject in enumerate(subjects):
    ax = axes[i]
    
    # Plot subject trend
    ax.plot(subject_trends['assessment_date'], subject_trends[subject], 
           marker='o', linewidth=2, markersize=4, label=subject.title())
    
    # Add trend line
    z = np.polyfit(x_numeric, subject_trends[subject], 1)
    p = np.poly1d(z)
    ax.plot(subject_trends['assessment_date'], p(x_numeric), "r--", alpha=0.8)
    
    ax.set_title(f'{subject.title()} Performance Trend')
    ax.set_ylabel('Average Score')
    ax.grid(True, alpha=0.3)
    ax.tick_params(axis='x', rotation=45)
    
    # Add trend statistics as text
    slope = trend_stats[subject]['slope']
    r_sq = trend_stats[subject]['r_squared']
    ax.text(0.05, 0.95, f'Slope: {slope:.4f}\nR²: {r_sq:.3f}', 
           transform=ax.transAxes, verticalalignment='top',
           bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.8))

# Hide the last subplot
axes[-1].set_visible(False)

plt.tight_layout()
plt.show()

# Interactive subject trends
fig_interactive = go.Figure()

for subject in subjects:
    fig_interactive.add_trace(go.Scatter(
        x=subject_trends['assessment_date'],
        y=subject_trends[subject],
        mode='lines+markers',
        name=subject.title(),
        line=dict(width=2),
        marker=dict(size=6)
    ))

fig_interactive.update_layout(
    title='Interactive Subject Performance Trends',
    xaxis_title='Assessment Date',
    yaxis_title='Average Score',
    hovermode='x unified'
)

fig_interactive.show()

## 4. Demographic Trends Over Time

In [None]:
# Gender performance trends
gender_trends = trend_data.groupby(['assessment_date', 'gender'])['gpa'].mean().unstack().reset_index()

# Socioeconomic trends
ses_trends = trend_data.groupby(['assessment_date', 'socioeconomic_status'])['gpa'].mean().unstack().reset_index()

# Urban vs Rural trends
area_trends = trend_data.groupby(['assessment_date', 'area_type'])['gpa'].mean().unstack().reset_index()

# Institution type trends
institution_trends = trend_data.groupby(['assessment_date', 'institution_type'])['gpa'].mean().unstack().reset_index()

# Visualization
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(15, 12))

# Gender trends
for gender in ['Male', 'Female']:
    if gender in gender_trends.columns:
        ax1.plot(gender_trends['assessment_date'], gender_trends[gender], 
                marker='o', linewidth=2, label=gender)
ax1.set_title('GPA Trends by Gender')
ax1.set_ylabel('Average GPA')
ax1.legend()
ax1.grid(True, alpha=0.3)
ax1.tick_params(axis='x', rotation=45)

# SES trends
colors = ['red', 'orange', 'green']
for i, ses in enumerate(['Low', 'Medium', 'High']):
    if ses in ses_trends.columns:
        ax2.plot(ses_trends['assessment_date'], ses_trends[ses], 
                marker='s', linewidth=2, label=ses, color=colors[i])
ax2.set_title('GPA Trends by Socioeconomic Status')
ax2.set_ylabel('Average GPA')
ax2.legend()
ax2.grid(True, alpha=0.3)
ax2.tick_params(axis='x', rotation=45)

# Area type trends
for area in ['Urban', 'Rural']:
    if area in area_trends.columns:
        ax3.plot(area_trends['assessment_date'], area_trends[area], 
                marker='^', linewidth=2, label=area)
ax3.set_title('GPA Trends by Area Type')
ax3.set_ylabel('Average GPA')
ax3.legend()
ax3.grid(True, alpha=0.3)
ax3.tick_params(axis='x', rotation=45)

# Institution trends
inst_colors = ['blue', 'orange', 'purple']
for i, inst in enumerate(['Government', 'Private', 'Madrasa']):
    if inst in institution_trends.columns:
        ax4.plot(institution_trends['assessment_date'], institution_trends[inst], 
                marker='d', linewidth=2, label=inst, color=inst_colors[i])
ax4.set_title('GPA Trends by Institution Type')
ax4.set_ylabel('Average GPA')
ax4.legend()
ax4.grid(True, alpha=0.3)
ax4.tick_params(axis='x', rotation=45)

plt.tight_layout()
plt.show()

# Calculate gap trends
print("Gap Analysis Over Time:")

# Gender gap
if 'Male' in gender_trends.columns and 'Female' in gender_trends.columns:
    gender_gap = abs(gender_trends['Female'] - gender_trends['Male'])
    print(f"Gender gap (latest): {gender_gap.iloc[-1]:.3f}")
    print(f"Gender gap trend: {'Increasing' if gender_gap.iloc[-1] > gender_gap.iloc[0] else 'Decreasing'}")

# SES gap
if 'High' in ses_trends.columns and 'Low' in ses_trends.columns:
    ses_gap = ses_trends['High'] - ses_trends['Low']
    print(f"\nSES gap (latest): {ses_gap.iloc[-1]:.3f}")
    print(f"SES gap trend: {'Increasing' if ses_gap.iloc[-1] > ses_gap.iloc[0] else 'Decreasing'}")

# Urban-Rural gap
if 'Urban' in area_trends.columns and 'Rural' in area_trends.columns:
    area_gap = area_trends['Urban'] - area_trends['Rural']
    print(f"\nUrban-Rural gap (latest): {area_gap.iloc[-1]:.3f}")
    print(f"Urban-Rural gap trend: {'Increasing' if area_gap.iloc[-1] > area_gap.iloc[0] else 'Decreasing'}")

## 5. Regional Trends Analysis

In [None]:
# Division-wise performance trends
division_trends = trend_data.groupby(['assessment_date', 'division'])['gpa'].mean().unstack().reset_index()

divisions = ['Dhaka', 'Chittagong', 'Khulna', 'Rajshahi', 'Sylhet', 'Barishal', 'Rangpur', 'Mymensingh']

# Calculate division performance changes
division_changes = {}
for division in divisions:
    if division in division_trends.columns:
        initial_perf = division_trends[division].iloc[0]
        final_perf = division_trends[division].iloc[-1]
        change = final_perf - initial_perf
        division_changes[division] = {
            'initial': initial_perf,
            'final': final_perf,
            'change': change,
            'percent_change': (change / initial_perf) * 100
        }

print("Division Performance Changes:")
for division, stats in division_changes.items():
    print(f"{division}: {stats['change']:+.3f} ({stats['percent_change']:+.1f}%)")

# Visualization
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(15, 12))

# Division trends (all on one plot)
colors = plt.cm.tab10(np.linspace(0, 1, len(divisions)))
for i, division in enumerate(divisions):
    if division in division_trends.columns:
        ax1.plot(division_trends['assessment_date'], division_trends[division], 
                linewidth=2, label=division, color=colors[i], marker='o', markersize=4)

ax1.set_title('GPA Trends by Division')
ax1.set_ylabel('Average GPA')
ax1.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
ax1.grid(True, alpha=0.3)
ax1.tick_params(axis='x', rotation=45)

# Division performance change bar chart
changes = [stats['change'] for stats in division_changes.values()]
division_names = list(division_changes.keys())
colors_bar = ['green' if x > 0 else 'red' for x in changes]

ax2.bar(division_names, changes, color=colors_bar, alpha=0.7)
ax2.set_title('Performance Change by Division (2018-2023)')
ax2.set_ylabel('GPA Change')
ax2.tick_params(axis='x', rotation=45)
ax2.axhline(y=0, color='black', linestyle='-', alpha=0.3)

# Top and bottom performing divisions over time
# Calculate average performance by division across all years
avg_division_perf = trend_data.groupby('division')['gpa'].mean().sort_values(ascending=False)

top_3 = avg_division_perf.head(3)
bottom_3 = avg_division_perf.tail(3)

ax3.bar(range(len(top_3)), top_3.values, color='lightgreen', alpha=0.8)
ax3.set_title('Top 3 Performing Divisions (Overall Average)')
ax3.set_ylabel('Average GPA')
ax3.set_xticks(range(len(top_3)))
ax3.set_xticklabels(top_3.index, rotation=45)

ax4.bar(range(len(bottom_3)), bottom_3.values, color='lightcoral', alpha=0.8)
ax4.set_title('Bottom 3 Performing Divisions (Overall Average)')
ax4.set_ylabel('Average GPA')
ax4.set_xticks(range(len(bottom_3)))
ax4.set_xticklabels(bottom_3.index, rotation=45)

plt.tight_layout()
plt.show()

# Interactive regional analysis
fig_regional = go.Figure()

for division in divisions[:4]:  # Show top 4 for clarity
    if division in division_trends.columns:
        fig_regional.add_trace(go.Scatter(
            x=division_trends['assessment_date'],
            y=division_trends[division],
            mode='lines+markers',
            name=division,
            line=dict(width=3),
            marker=dict(size=6)
        ))

fig_regional.update_layout(
    title='Interactive Regional Performance Trends (Top 4 Divisions)',
    xaxis_title='Assessment Date',
    yaxis_title='Average GPA',
    hovermode='x unified'
)

fig_regional.show()

## 6. Seasonal Patterns and Cyclical Trends

In [None]:
# Seasonal pattern analysis
seasonal_analysis = trend_data.groupby(['year', 'month']).agg({
    'gpa': 'mean',
    'attendance_rate': 'mean',
    'student_id': 'count'
}).reset_index()

# Add season labels
def get_season(month):
    if month == 3:
        return 'Mid-year'
    elif month == 6:
        return 'Pre-summer'
    else:  # month == 12
        return 'Year-end'

seasonal_analysis['season'] = seasonal_analysis['month'].apply(get_season)

# Calculate seasonal effects
seasonal_effects = seasonal_analysis.groupby('season')['gpa'].mean()
seasonal_std = seasonal_analysis.groupby('season')['gpa'].std()

print("Seasonal Performance Patterns:")
for season in seasonal_effects.index:
    print(f"{season}: {seasonal_effects[season]:.3f} ± {seasonal_std[season]:.3f}")

# Year-over-year growth by season
growth_analysis = seasonal_analysis.pivot(index='year', columns='month', values='gpa')

# Calculate year-over-year changes
yoy_changes = growth_analysis.pct_change() * 100

print(f"\nYear-over-Year GPA Changes (%):")
print(yoy_changes.round(2))

# Visualization
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(15, 12))

# Seasonal patterns
season_order = ['Mid-year', 'Pre-summer', 'Year-end']
seasonal_data = [seasonal_effects[season] for season in season_order]
seasonal_errors = [seasonal_std[season] for season in season_order]

ax1.bar(season_order, seasonal_data, yerr=seasonal_errors, 
        capsize=5, color=['lightblue', 'orange', 'lightgreen'], alpha=0.8)
ax1.set_title('Average GPA by Assessment Period')
ax1.set_ylabel('Average GPA')
ax1.tick_params(axis='x', rotation=45)

# Seasonal trends over years
for month in [3, 6, 12]:
    month_data = seasonal_analysis[seasonal_analysis['month'] == month]
    season_name = get_season(month)
    ax2.plot(month_data['year'], month_data['gpa'], 
            marker='o', linewidth=2, label=season_name, markersize=6)

ax2.set_title('Seasonal GPA Trends Over Years')
ax2.set_xlabel('Year')
ax2.set_ylabel('Average GPA')
ax2.legend()
ax2.grid(True, alpha=0.3)

# Heat map of performance by year and month
pivot_data = seasonal_analysis.pivot(index='year', columns='month', values='gpa')
sns.heatmap(pivot_data, annot=True, cmap='RdYlGn', center=pivot_data.mean().mean(), 
            ax=ax3, cbar_kws={'label': 'Average GPA'})
ax3.set_title('GPA Heatmap: Year vs Assessment Period')
ax3.set_xlabel('Assessment Month')
ax3.set_ylabel('Year')

# Box plot of seasonal variations
seasonal_analysis['month_str'] = seasonal_analysis['month'].astype(str)
sns.boxplot(data=seasonal_analysis, x='month_str', y='gpa', ax=ax4)
ax4.set_title('GPA Distribution by Assessment Period')
ax4.set_xlabel('Assessment Month')
ax4.set_ylabel('Average GPA')
ax4.set_xticklabels(['March', 'June', 'December'])

plt.tight_layout()
plt.show()

# Statistical test for seasonal differences
from scipy.stats import f_oneway

march_data = seasonal_analysis[seasonal_analysis['month'] == 3]['gpa']
june_data = seasonal_analysis[seasonal_analysis['month'] == 6]['gpa']
december_data = seasonal_analysis[seasonal_analysis['month'] == 12]['gpa']

f_stat, p_value = f_oneway(march_data, june_data, december_data)
print(f"\nANOVA test for seasonal differences:")
print(f"F-statistic: {f_stat:.4f}")
print(f"P-value: {p_value:.4f}")
print(f"Significant seasonal differences: {'Yes' if p_value < 0.05 else 'No'}")

## 7. Impact Assessment and Change Points

In [None]:
# COVID-19 impact analysis
pre_covid = trend_data[trend_data['year'] < 2020]['gpa'].mean()
covid_period = trend_data[trend_data['year'].isin([2020, 2021])]['gpa'].mean()
post_covid = trend_data[trend_data['year'] > 2021]['gpa'].mean()

print("COVID-19 Impact Analysis:")
print(f"Pre-COVID average GPA (2018-2019): {pre_covid:.3f}")
print(f"COVID period average GPA (2020-2021): {covid_period:.3f}")
print(f"Post-COVID average GPA (2022-2023): {post_covid:.3f}")
print(f"COVID impact: {covid_period - pre_covid:+.3f}")
print(f"Recovery: {post_covid - covid_period:+.3f}")

# Change point detection (simplified)
def detect_change_points(data, threshold=0.1):
    """Simple change point detection based on rolling mean differences."""
    rolling_mean = data.rolling(window=3).mean()
    changes = rolling_mean.diff().abs()
    change_points = changes[changes > threshold].index
    return change_points

# Analyze overall time series
time_series_gpa = time_series.set_index('date')['avg_gpa']
change_points = detect_change_points(time_series_gpa)

print(f"\nDetected change points in performance:")
for cp in change_points:
    print(f"  {cp.strftime('%Y-%m')}: {time_series_gpa[cp]:.3f}")

# Institution type impact analysis
institution_impact = {}
for inst_type in ['Government', 'Private', 'Madrasa']:
    inst_data = trend_data[trend_data['institution_type'] == inst_type]
    if len(inst_data) > 0:
        pre = inst_data[inst_data['year'] < 2020]['gpa'].mean()
        covid = inst_data[inst_data['year'].isin([2020, 2021])]['gpa'].mean()
        post = inst_data[inst_data['year'] > 2021]['gpa'].mean()
        institution_impact[inst_type] = {
            'pre_covid': pre,
            'covid_period': covid,
            'post_covid': post,
            'covid_impact': covid - pre,
            'recovery': post - covid
        }

print(f"\nCOVID-19 Impact by Institution Type:")
for inst, impact in institution_impact.items():
    print(f"\n{inst}:")
    print(f"  COVID impact: {impact['covid_impact']:+.3f}")
    print(f"  Recovery: {impact['recovery']:+.3f}")

# Visualization
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(15, 12))

# Overall trend with change points
ax1.plot(time_series['date'], time_series['avg_gpa'], 
         marker='o', linewidth=2, markersize=4, color='blue')

# Mark COVID period
covid_start = datetime(2020, 1, 1)
covid_end = datetime(2021, 12, 31)
ax1.axvspan(covid_start, covid_end, alpha=0.3, color='red', label='COVID-19 Period')

# Mark change points
for cp in change_points:
    ax1.axvline(x=cp, color='orange', linestyle='--', alpha=0.7)

ax1.set_title('Performance Trends with Change Points')
ax1.set_ylabel('Average GPA')
ax1.legend()
ax1.grid(True, alpha=0.3)
ax1.tick_params(axis='x', rotation=45)

# COVID impact by institution
inst_names = list(institution_impact.keys())
covid_impacts = [institution_impact[inst]['covid_impact'] for inst in inst_names]
recovery_values = [institution_impact[inst]['recovery'] for inst in inst_names]

x = np.arange(len(inst_names))
width = 0.35

ax2.bar(x - width/2, covid_impacts, width, label='COVID Impact', color='red', alpha=0.7)
ax2.bar(x + width/2, recovery_values, width, label='Recovery', color='green', alpha=0.7)
ax2.set_title('COVID-19 Impact and Recovery by Institution Type')
ax2.set_ylabel('GPA Change')
ax2.set_xticks(x)
ax2.set_xticklabels(inst_names)
ax2.legend()
ax2.axhline(y=0, color='black', linestyle='-', alpha=0.3)

# Pre/during/post COVID comparison
periods = ['Pre-COVID\n(2018-2019)', 'COVID Period\n(2020-2021)', 'Post-COVID\n(2022-2023)']
values = [pre_covid, covid_period, post_covid]
colors = ['green', 'red', 'blue']

ax3.bar(periods, values, color=colors, alpha=0.7)
ax3.set_title('Average GPA: Pre, During, and Post COVID-19')
ax3.set_ylabel('Average GPA')
ax3.tick_params(axis='x', rotation=45)

# Recovery progress by demographic
demographic_recovery = {}
for demo in ['Male', 'Female']:
    demo_data = trend_data[trend_data['gender'] == demo]
    covid_avg = demo_data[demo_data['year'].isin([2020, 2021])]['gpa'].mean()
    post_avg = demo_data[demo_data['year'] > 2021]['gpa'].mean()
    demographic_recovery[demo] = post_avg - covid_avg

recovery_names = list(demographic_recovery.keys())
recovery_vals = list(demographic_recovery.values())

ax4.bar(recovery_names, recovery_vals, color=['lightblue', 'pink'], alpha=0.8)
ax4.set_title('COVID-19 Recovery by Gender')
ax4.set_ylabel('GPA Recovery')
ax4.axhline(y=0, color='black', linestyle='-', alpha=0.3)

plt.tight_layout()
plt.show()

## 8. Trend Analysis Summary and Forecasting

In [None]:
# Simple forecasting using linear regression
def forecast_performance(time_series_data, periods_ahead=6):
    """Simple linear regression forecast."""
    X = np.arange(len(time_series_data)).reshape(-1, 1)
    y = time_series_data.values
    
    model = LinearRegression()
    model.fit(X, y)
    
    # Forecast future periods
    future_X = np.arange(len(time_series_data), len(time_series_data) + periods_ahead).reshape(-1, 1)
    forecast = model.predict(future_X)
    
    return forecast, model.score(X, y)

# Generate forecasts
gpa_forecast, gpa_r2 = forecast_performance(time_series['avg_gpa'])
attendance_forecast, attendance_r2 = forecast_performance(time_series['avg_attendance'])

# Create future dates
last_date = time_series['date'].iloc[-1]
future_dates = [last_date + timedelta(days=90*i) for i in range(1, 7)]  # 6 quarters ahead

print("TREND ANALYSIS SUMMARY")
print("=" * 50)

# Overall trends
overall_slope = stats.linregress(np.arange(len(time_series)), time_series['avg_gpa'])[0]
print(f"📈 Overall GPA Trend:")
print(f"   • Slope: {overall_slope:+.4f} per assessment period")
print(f"   • Annual change: {overall_slope * 3:+.4f} (approx)")
print(f"   • Model R²: {gpa_r2:.3f}")

# Subject trends summary
print(f"📚 Subject Performance Trends:")
for subject, stats_dict in trend_stats.items():
    trend_direction = 'Improving' if stats_dict['slope'] > 0 else 'Declining'
    print(f"   • {subject.title()}: {trend_direction} ({stats_dict['slope']:+.4f}/period)")

# Regional summary
print(f"🗺️ Regional Trends:")
improving_divisions = [div for div, data in division_changes.items() if data['change'] > 0]
declining_divisions = [div for div, data in division_changes.items() if data['change'] < 0]
print(f"   • Improving divisions: {', '.join(improving_divisions) if improving_divisions else 'None'}")
print(f"   • Declining divisions: {', '.join(declining_divisions) if declining_divisions else 'None'}")

# Impact assessment
print(f"⚡ Key Impacts Identified:")
print(f"   • COVID-19 impact: {covid_period - pre_covid:+.3f} GPA points")
print(f"   • Recovery progress: {post_covid - covid_period:+.3f} GPA points")
recovery_status = 'Complete' if post_covid >= pre_covid else 'Partial'
print(f"   • Recovery status: {recovery_status}")

# Forecasts
print(f"🔮 Performance Forecasts (Next 6 Periods):")
current_gpa = time_series['avg_gpa'].iloc[-1]
forecast_change = gpa_forecast[-1] - current_gpa
print(f"   • Current GPA: {current_gpa:.3f}")
print(f"   • Forecast GPA (6 periods): {gpa_forecast[-1]:.3f}")
print(f"   • Expected change: {forecast_change:+.3f}")

# Visualization of forecasts
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))

# GPA forecast
ax1.plot(time_series['date'], time_series['avg_gpa'], 
         marker='o', linewidth=2, label='Historical', color='blue')
ax1.plot(future_dates, gpa_forecast, 
         marker='s', linewidth=2, linestyle='--', label='Forecast', color='red')
ax1.set_title(f'GPA Forecast (R² = {gpa_r2:.3f})')
ax1.set_ylabel('Average GPA')
ax1.legend()
ax1.grid(True, alpha=0.3)
ax1.tick_params(axis='x', rotation=45)

# Attendance forecast
ax2.plot(time_series['date'], time_series['avg_attendance'], 
         marker='o', linewidth=2, label='Historical', color='green')
ax2.plot(future_dates, attendance_forecast, 
         marker='s', linewidth=2, linestyle='--', label='Forecast', color='orange')
ax2.set_title(f'Attendance Forecast (R² = {attendance_r2:.3f})')
ax2.set_ylabel('Average Attendance Rate')
ax2.legend()
ax2.grid(True, alpha=0.3)
ax2.tick_params(axis='x', rotation=45)

plt.tight_layout()
plt.show()

# Recommendations based on trends
print(f"
📋 TREND-BASED RECOMMENDATIONS")
print("=" * 50)

if overall_slope > 0:
    print(f"✅ Positive overall trend detected - continue current strategies")
else:
    print(f"⚠️ Negative overall trend detected - intervention needed")

print(f"🎯 Priority Actions:")

if post_covid < pre_covid:
    print(f"   • Focus on COVID-19 recovery programs")
    print(f"   • Implement remedial education initiatives")

if len(declining_divisions) > 0:
    print(f"   • Target interventions in declining divisions: {', '.join(declining_divisions)}")

# Identify subjects needing attention
declining_subjects = [subj for subj, stats in trend_stats.items() if stats['slope'] < 0]
if declining_subjects:
    print(f"   • Strengthen instruction in: {', '.join([s.title() for s in declining_subjects])}")

print(f"📊 Monitoring Priorities:")
print(f"   • Continue tracking seasonal patterns")
print(f"   • Monitor regional disparities")
print(f"   • Assess long-term COVID-19 impacts")
print(f"   • Implement early warning systems for performance drops")