In [2]:
import pandas as pd
from dateutil.relativedelta import relativedelta

# Load the dataset
df = pd.read_csv('novice.csv')

# Convert birthdate columns to datetime, coercing errors to handle invalid dates
df['Big Birthdate'] = pd.to_datetime(df['Big Birthdate'], errors='coerce')
df['Little Birthdate'] = pd.to_datetime(df['Little Birthdate'], errors='coerce')

def calculate_age_diff(row):
    big_bday = row['Big Birthdate']
    little_bday = row['Little Birthdate']
    
    # Check for missing dates
    if pd.isnull(big_bday) or pd.isnull(little_bday):
        return None
    
    # Determine the earlier and later dates
    if big_bday > little_bday:
        later_date, earlier_date = big_bday, little_bday
    else:
        later_date, earlier_date = little_bday, big_bday
    
    # Calculate precise age difference using relativedelta
    delta = relativedelta(later_date, earlier_date)
    # Convert the difference to years as a float
    years_diff = delta.years + delta.months / 12 + delta.days / 365.25
    return years_diff

# Apply the function to calculate age differences
df['Age Difference'] = df.apply(calculate_age_diff, axis=1)

# Split the dataset into two based on the age difference threshold
df_less_equal_10 = df[df['Age Difference'] <= 10]
df_greater_10 = df[df['Age Difference'] > 10]

# Save the results to CSV files
df_less_equal_10.to_csv('age_diff_less_equal_10.csv', index=False)
df_greater_10.to_csv('age_diff_greater_10.csv', index=False)