# AF Database (AFDB) Analysis

This notebook provides an analysis of the MIT-BIH Atrial Fibrillation Database (AFDB), including:
- Number of records and their durations
- Visual comparison of AFib and normal rhythm ECG patterns

In [1]:
# Import required libraries
import numpy as np
import matplotlib.pyplot as plt
import wfdb
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

# Set plot style
plt.style.use('seaborn-v0_8-darkgrid')
plt.rcParams['figure.figsize'] = (14, 8)

## 1. Dataset Overview

Let's first get all records from the AFDB database and examine their basic properties.

In [2]:
# Get list of all records in AFDB
record_list = wfdb.get_record_list('afdb')
print(f"Total number of records in AFDB: {len(record_list)}")
print(f"\nRecord names: {record_list}")

Total number of records in AFDB: 25

Record names: ['00735', '03665', '04015', '04043', '04048', '04126', '04746', '04908', '04936', '05091', '05121', '05261', '06426', '06453', '06995', '07162', '07859', '07879', '07910', '08215', '08219', '08378', '08405', '08434', '08455']


## 2. Record Durations

Now let's analyze the duration of each record.

In [3]:
# Analyze each record
record_data = []

print("Analyzing all records...\n")
print("="*80)

for idx, record_name in enumerate(record_list, 1):
    try:
        # Read record
        record = wfdb.rdrecord(record_name, pn_dir='afdb')
        
        # Calculate duration
        duration_seconds = len(record.p_signal) / record.fs
        duration_minutes = duration_seconds / 60
        duration_hours = duration_minutes / 60
        
        record_data.append({
            'Record': record_name,
            'Duration (min)': duration_minutes,
            'Duration (hours)': duration_hours,
            'Sampling Rate (Hz)': record.fs,
            'Channels': len(record.sig_name)
        })
        
        print(f"{idx}. {record_name}:")
        print(f"   Duration: {duration_hours:.2f} hours ({duration_minutes:.1f} min)")
        print(f"   Sampling rate: {record.fs} Hz")
        print(f"   Channels: {len(record.sig_name)}")
        print()
        
    except Exception as e:
        print(f"Error processing {record_name}: {e}\n")

print("="*80)

Analyzing all records...

Error processing 00735: sampto must be greater than sampfrom

Error processing 03665: sampto must be greater than sampfrom

3. 04015:
   Duration: 10.23 hours (613.7 min)
   Sampling rate: 250 Hz
   Channels: 2

4. 04043:
   Duration: 10.23 hours (613.7 min)
   Sampling rate: 250 Hz
   Channels: 2

5. 04048:
   Duration: 10.23 hours (613.7 min)
   Sampling rate: 250 Hz
   Channels: 2

6. 04126:
   Duration: 10.23 hours (613.7 min)
   Sampling rate: 250 Hz
   Channels: 2

7. 04746:
   Duration: 10.23 hours (613.7 min)
   Sampling rate: 250 Hz
   Channels: 2

8. 04908:
   Duration: 10.23 hours (613.7 min)
   Sampling rate: 250 Hz
   Channels: 2

9. 04936:
   Duration: 10.23 hours (613.7 min)
   Sampling rate: 250 Hz
   Channels: 2

10. 05091:
   Duration: 10.23 hours (613.7 min)
   Sampling rate: 250 Hz
   Channels: 2

11. 05121:
   Duration: 10.23 hours (613.7 min)
   Sampling rate: 250 Hz
   Channels: 2

12. 05261:
   Duration: 10.23 hours (613.7 min)
   Sampl

In [None]:
# Create DataFrame for better visualization
df = pd.DataFrame(record_data)
print("\nðŸ“Š SUMMARY TABLE")
print("="*80)
print(df.to_string(index=False))
print("="*80)

## 3. Overall Statistics

In [None]:
# Calculate overall statistics
total_duration_hours = df['Duration (hours)'].sum()
avg_duration_hours = df['Duration (hours)'].mean()
min_duration = df['Duration (hours)'].min()
max_duration = df['Duration (hours)'].max()

print("\n" + "="*80)
print("ðŸ“ˆ OVERALL DATASET STATISTICS")
print("="*80)
print(f"\nTotal Records: {len(record_list)}")
print(f"Total Duration: {total_duration_hours:.2f} hours ({total_duration_hours*60:.1f} minutes)")
print(f"Average Duration per Record: {avg_duration_hours:.2f} hours")
print(f"Shortest Record: {min_duration:.2f} hours")
print(f"Longest Record: {max_duration:.2f} hours")
print("="*80)

## 4. Visualization: Record Durations

In [None]:
# Plot record durations
fig, ax = plt.subplots(figsize=(14, 6))

records = df['Record'].values
durations = df['Duration (hours)'].values

bars = ax.bar(range(len(records)), durations, color='steelblue', alpha=0.7, edgecolor='black', linewidth=1.5)
ax.set_xlabel('Record', fontsize=12, weight='bold')
ax.set_ylabel('Duration (hours)', fontsize=12, weight='bold')
ax.set_title('Duration of Each Record in AFDB', fontsize=14, weight='bold', pad=20)
ax.set_xticks(range(len(records)))
ax.set_xticklabels(records, rotation=45, ha='right')
ax.grid(axis='y', alpha=0.3)

# Add value labels on bars
for i, (bar, duration) in enumerate(zip(bars, durations)):
    height = bar.get_height()
    ax.text(bar.get_x() + bar.get_width()/2., height,
            f'{duration:.1f}h',
            ha='center', va='bottom', fontsize=9, weight='bold')

plt.tight_layout()
plt.show()

## 5. ECG Signal Visualization: AFib vs Normal Rhythm

Now let's visualize what AFib looks like compared to normal rhythm in actual ECG signals.

In [None]:
def find_rhythm_segment(record_name, rhythm_type='AFIB', duration_samples=2500):
    """
    Find a segment of specified rhythm type in a record.
    
    Parameters:
    - record_name: name of the record
    - rhythm_type: 'AFIB', 'AFL', or 'N' (normal)
    - duration_samples: number of samples to return
    
    Returns:
    - signal: ECG signal segment
    - start_sample: starting sample index
    - fs: sampling frequency
    """
    # Read record and annotations
    record = wfdb.rdrecord(record_name, pn_dir='afdb')
    annotation = wfdb.rdann(record_name, 'atr', pn_dir='afdb')
    
    # Find the target rhythm marker
    if rhythm_type == 'AFIB':
        target_symbols = ['(AFIB', '(AFL']
    else:
        target_symbols = [f'({rhythm_type}']
    
    # Find indices where target rhythm starts
    rhythm_starts = []
    for i, symbol in enumerate(annotation.symbol):
        if symbol in target_symbols:
            rhythm_starts.append(annotation.sample[i])
    
    if not rhythm_starts:
        return None, None, None
    
    # Use the first occurrence with enough samples after it
    for start in rhythm_starts:
        if start + duration_samples < len(record.p_signal):
            signal = record.p_signal[start:start+duration_samples, 0]  # Use first channel
            return signal, start, record.fs
    
    # If no segment with enough samples, use what's available
    start = rhythm_starts[0]
    signal = record.p_signal[start:start+duration_samples, 0]
    return signal, start, record.fs

In [None]:
# Find a record with both AFib and normal rhythm
# We'll use record '04015' which contains both rhythms
best_record = '04015'

print(f"Using record: {best_record}")
print("Extracting AFib and Normal rhythm segments...")

In [None]:
# Extract 10 seconds of each rhythm type
duration_sec = 10

# Get AFib segment
afib_signal, afib_start, fs = find_rhythm_segment(best_record, 'AFIB', duration_samples=int(250*duration_sec))

# Get Normal segment
normal_signal, normal_start, _ = find_rhythm_segment(best_record, 'N', duration_samples=int(250*duration_sec))

if afib_signal is not None and normal_signal is not None:
    print(f"âœ“ Successfully extracted both rhythm segments")
    print(f"  - AFib segment: {len(afib_signal)} samples starting at sample {afib_start}")
    print(f"  - Normal segment: {len(normal_signal)} samples starting at sample {normal_start}")
    print(f"  - Sampling rate: {fs} Hz")
else:
    print("âš  Could not find both rhythm types in this record")

### 5.1 Comparison Plot: AFib vs Normal Rhythm (10 seconds)

In [None]:
if afib_signal is not None and normal_signal is not None:
    # Create time arrays
    time_afib = np.arange(len(afib_signal)) / fs
    time_normal = np.arange(len(normal_signal)) / fs
    
    # Create comparison plot
    fig, axes = plt.subplots(2, 1, figsize=(16, 10))
    
    # Plot AFib
    axes[0].plot(time_afib, afib_signal, color='#ff6b6b', linewidth=1.5, label='AFib')
    axes[0].set_title(f'Atrial Fibrillation (AFib) - Record: {best_record}', 
                      fontsize=14, weight='bold', pad=15)
    axes[0].set_ylabel('Amplitude (mV)', fontsize=12, weight='bold')
    axes[0].grid(True, alpha=0.3)
    axes[0].legend(loc='upper right', fontsize=11)
    
    # Add annotations for AFib characteristics
    axes[0].text(0.02, 0.95, 'Key Features:\nâ€¢ Irregular R-R intervals\nâ€¢ Absent P waves\nâ€¢ Irregular ventricular response',
                transform=axes[0].transAxes, fontsize=10,
                verticalalignment='top',
                bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.5))
    
    # Plot Normal
    axes[1].plot(time_normal, normal_signal, color='#4ecdc4', linewidth=1.5, label='Normal Sinus Rhythm')
    axes[1].set_title(f'Normal Sinus Rhythm - Record: {best_record}', 
                      fontsize=14, weight='bold', pad=15)
    axes[1].set_xlabel('Time (seconds)', fontsize=12, weight='bold')
    axes[1].set_ylabel('Amplitude (mV)', fontsize=12, weight='bold')
    axes[1].grid(True, alpha=0.3)
    axes[1].legend(loc='upper right', fontsize=11)
    
    # Add annotations for normal rhythm characteristics
    axes[1].text(0.02, 0.95, 'Key Features:\nâ€¢ Regular R-R intervals\nâ€¢ Clear P waves before each QRS\nâ€¢ Regular rhythm',
                transform=axes[1].transAxes, fontsize=10,
                verticalalignment='top',
                bbox=dict(boxstyle='round', facecolor='lightgreen', alpha=0.5))
    
    plt.tight_layout()
    plt.show()
    
    print("\nðŸ“Š KEY DIFFERENCES:")
    print("="*80)
    print("\nAtrial Fibrillation (AFib):")
    print("  â€¢ Irregular, chaotic atrial electrical activity")
    print("  â€¢ Absent or unrecognizable P waves")
    print("  â€¢ Irregularly irregular ventricular response (R-R intervals vary)")
    print("  â€¢ Baseline may show fibrillatory waves")
    print("\nNormal Sinus Rhythm:")
    print("  â€¢ Regular rhythm originating from SA node")
    print("  â€¢ Clear P wave before each QRS complex")
    print("  â€¢ Regular R-R intervals")
    print("  â€¢ Normal P-QRS-T sequence")
    print("="*80)
else:
    print("Unable to create comparison plot - segments not available")