In [34]:
import pandas as pd
import re

def detect_absence_streaks(attendance_df, students_df):
    attendance_df['attendance_date'] = pd.to_datetime(attendance_df['attendance_date'])
    attendance_df = attendance_df.sort_values(by=['student_id', 'attendance_date'])

    absence_streaks = []
    
    for student_id, student_records in attendance_df.groupby('student_id'):
        student_records = student_records.reset_index(drop=True)
        
        streak_count = 0
        start_date = None

        for i in range(len(student_records)):
            if student_records.loc[i, 'status'] == 'Absent':
                if streak_count == 0:
                    start_date = student_records.loc[i, 'attendance_date']
                streak_count += 1
            else:
                if streak_count > 3:
                    absence_streaks.append({
                        'student_id': student_id,
                        'absence_start_date': start_date,
                        'absence_end_date': student_records.loc[i - 1, 'attendance_date'],
                        'total_absent_days': streak_count
                    })
                streak_count = 0  

        if streak_count > 3:
            absence_streaks.append({
                'student_id': student_id,
                'absence_start_date': start_date,
                'absence_end_date': student_records.loc[len(student_records) - 1, 'attendance_date'],
                'total_absent_days': streak_count
            })

    absence_streaks_df = pd.DataFrame(absence_streaks)
    final_df = absence_streaks_df.merge(students_df[['student_id', 'student_name', 'parent_email']], on='student_id', how='left')

    def is_valid_email(email):
        return bool(re.match(r'^[a-zA-Z_][a-zA-Z0-9_]*@[a-zA-Z0-9]+\.com$', str(email)))

    final_df['email'] = final_df['parent_email'].apply(lambda email: email if is_valid_email(email) else '')

    def generate_message(row):
        if row['email']:
            return (f"Dear Parent, your child {row['student_name']} was absent from "
                    f"{row['absence_start_date'].date()} to {row['absence_end_date'].date()} "
                    f"for {row['total_absent_days']} days. Please ensure their attendance improves.")
        return ''

    final_df['msg'] = final_df.apply(generate_message, axis=1)

    return final_df[['student_id', 'absence_start_date', 'absence_end_date', 'total_absent_days', 'email', 'msg']]

attendance_records = [
    [1, '2025-03-01', 'Present'],
    [1, '2025-03-02', 'Absent'],
    [1, '2025-03-03', 'Absent'],
    [1, '2025-03-04', 'Absent'],
    [1, '2025-03-05', 'Absent'],
    [2, '2025-03-01', 'Present'],
    [2, '2025-03-02', 'Present'],
    [2, '2025-03-03', 'Absent'],
    [2, '2025-03-04', 'Absent'],
    [2, '2025-03-05', 'Absent']
]

student_info = [
    [1, 'Alice', 'alice_parent@gmail.com'],
    [2, 'Bob', 'bob_parent@domaincom']
]

attendance_df = pd.DataFrame(attendance_records, columns=['student_id', 'attendance_date', 'status'])
students_df = pd.DataFrame(student_info, columns=['student_id', 'student_name', 'parent_email'])

output_df = detect_absence_streaks(attendance_df, students_df)
print(output_df)


   student_id absence_start_date absence_end_date  total_absent_days  \
0           1         2025-03-02       2025-03-05                  4   

                    email                                                msg  
0  alice_parent@gmail.com  Dear Parent, your child Alice was absent from ...  
