# Unsubscriber Address Analysis

This notebook analyzes matches between unsubscribers and subscribers using address data from the database.
It generates CSV files for each magazine showing both matched and unmatched unsubscribers.

In [None]:
import pandas as pd
import mysql.connector
from mysql.connector import Error
import os
from pathlib import Path

# Database connection parameters - update these with your credentials
DB_CONFIG = {
    'host': 'localhost',
    'database': 'arc',
    'user': 'root',
    'password': ''  # Update with your password
}

def get_db_connection():
    """Create and return database connection"""
    try:
        connection = mysql.connector.connect(**DB_CONFIG)
        if connection.is_connected():
            print("Successfully connected to MySQL database")
            return connection
    except Error as e:
        print(f"Error while connecting to MySQL: {e}")
        return None

# Test connection
conn = get_db_connection()
if conn:
    cursor = conn.cursor()
    cursor.execute("SELECT COUNT(*) FROM magazines")
    result = cursor.fetchone()
    print(f"Total magazines in database: {result[0]}")
    cursor.close()
    conn.close()
else:
    print("Failed to connect to database")

In [None]:
def get_all_magazines():
    """Get list of all magazines with their IDs and names"""
    conn = get_db_connection()
    if not conn:
        return []
    
    query = "SELECT id, name FROM magazines ORDER BY id"
    
    try:
        df = pd.read_sql(query, conn)
        conn.close()
        return df
    except Exception as e:
        print(f"Error fetching magazines: {e}")
        if conn:
            conn.close()
        return []

# Get all magazines
magazines_df = get_all_magazines()
print("Available magazines:")
print(magazines_df)

In [None]:
def get_matched_unsubscribers(magazine_id):
    """Get unsubscribers that match subscriber addresses for a specific magazine"""
    query = """
    SELECT DISTINCT
        s.id as subscriber_id,
        s.fName,
        s.lName,
        s.company,
        s.type as subscriber_type,
        s.magazineId,
        a.streetAddress as subscriber_street,
        a.streetAddress2 as subscriber_street2,
        a.city as subscriber_city,
        a.state as subscriber_state,
        a.zip as subscriber_zip,
        CONCAT(mlr.houseNumber, ' ', mlr.street) as unsubscriber_address,
        mr.zip as unsubscriber_zip,
        mr.city as unsubscriber_city,
        mr.state as unsubscriber_state
    FROM subscribers s
    JOIN addresses a ON s.addressId = a.id
    JOIN mailing_list_recipients mlr ON (
        LOWER(TRIM(CONCAT(mlr.houseNumber, ' ', mlr.street))) = LOWER(TRIM(a.streetAddress))
    )
    JOIN mailing_routes mr ON mlr.mailingRouteId = mr.id
    JOIN mailing_lists ml ON mr.mailingListId = ml.id
    WHERE s.type = 'unsubscriber'
        AND s.magazineId = %s
        AND ml.magazineId = %s
        AND LPAD(TRIM(a.zip), 5, '0') = LPAD(TRIM(mr.zip), 5, '0')
        AND LOWER(TRIM(a.city)) = LOWER(TRIM(mr.city))
        AND LOWER(TRIM(a.state)) = LOWER(TRIM(mr.state))
    ORDER BY a.zip, a.streetAddress
    """
    
    conn = get_db_connection()
    if not conn:
        return pd.DataFrame()
    
    try:
        df = pd.read_sql(query, conn, params=(magazine_id, magazine_id))
        conn.close()
        return df
    except Exception as e:
        print(f"Error fetching matched unsubscribers for magazine {magazine_id}: {e}")
        if conn:
            conn.close()
        return pd.DataFrame()

# Test with magazine 11
matched_df = get_matched_unsubscribers(11)
print(f"Found {len(matched_df)} matched unsubscribers for magazine 11")
if len(matched_df) > 0:
    print(matched_df.head())

In [None]:
def get_unmatched_unsubscribers(magazine_id):
    """Get unsubscribers that don't match any subscriber addresses for a specific magazine"""
    query = """
    SELECT DISTINCT
        s.id as subscriber_id,
        s.fName,
        s.lName,
        s.company,
        s.type as subscriber_type,
        s.magazineId,
        a.streetAddress as subscriber_street,
        a.streetAddress2 as subscriber_street2,
        a.city as subscriber_city,
        a.state as subscriber_state,
        a.zip as subscriber_zip
    FROM subscribers s
    JOIN addresses a ON s.addressId = a.id
    LEFT JOIN mailing_list_recipients mlr ON (
        LOWER(TRIM(CONCAT(mlr.houseNumber, ' ', mlr.street))) = LOWER(TRIM(a.streetAddress))
    )
    LEFT JOIN mailing_routes mr ON (
        mlr.mailingRouteId = mr.id
        AND LPAD(TRIM(a.zip), 5, '0') = LPAD(TRIM(mr.zip), 5, '0')
        AND LOWER(TRIM(a.city)) = LOWER(TRIM(mr.city))
        AND LOWER(TRIM(a.state)) = LOWER(TRIM(mr.state))
    )
    LEFT JOIN mailing_lists ml ON (
        mr.mailingListId = ml.id
        AND ml.magazineId = %s
    )
    WHERE s.type = 'unsubscriber'
        AND s.magazineId = %s
        AND mlr.id IS NULL  -- No match found
    ORDER BY a.zip, a.streetAddress
    """
    
    conn = get_db_connection()
    if not conn:
        return pd.DataFrame()
    
    try:
        df = pd.read_sql(query, conn, params=(magazine_id, magazine_id))
        conn.close()
        return df
    except Exception as e:
        print(f"Error fetching unmatched unsubscribers for magazine {magazine_id}: {e}")
        if conn:
            conn.close()
        return pd.DataFrame()

# Test with magazine 11
unmatched_df = get_unmatched_unsubscribers(11)
print(f"Found {len(unmatched_df)} unmatched unsubscribers for magazine 11")
if len(unmatched_df) > 0:
    print(unmatched_df.head())

In [None]:
# Create output directory for CSV files
output_dir = Path('unsubscriber_csvs')
output_dir.mkdir(exist_ok=True)

def generate_csvs_for_all_magazines():
    """Generate matched and unmatched CSV files for all magazines"""
    
    if magazines_df.empty:
        print("No magazines found")
        return
    
    total_magazines = len(magazines_df)
    print(f"Processing {total_magazines} magazines...")
    
    for index, row in magazines_df.iterrows():
        magazine_id = row['id']
        magazine_name = row['name']
        
        # Clean magazine name for filename
        safe_name = "".join(c for c in magazine_name if c.isalnum() or c in (' ', '-', '_')).rstrip()
        safe_name = safe_name.replace(' ', '_')
        
        print(f"Processing magazine {magazine_id}: {magazine_name} ({index + 1}/{total_magazines})")
        
        # Get matched unsubscribers
        matched_df = get_matched_unsubscribers(magazine_id)
        matched_filename = output_dir / f"magazine_{magazine_id}_{safe_name}_matched_unsubscribers.csv"
        matched_df.to_csv(matched_filename, index=False)
        print(f"  - Matched: {len(matched_df)} records -> {matched_filename}")
        
        # Get unmatched unsubscribers
        unmatched_df = get_unmatched_unsubscribers(magazine_id)
        unmatched_filename = output_dir / f"magazine_{magazine_id}_{safe_name}_unmatched_unsubscribers.csv"
        unmatched_df.to_csv(unmatched_filename, index=False)
        print(f"  - Unmatched: {len(unmatched_df)} records -> {unmatched_filename}")
        
    print(f"\nCompleted! All CSV files saved to: {output_dir.absolute()}")

# Run the analysis
generate_csvs_for_all_magazines()

In [None]:
# Summary statistics
def generate_summary_report():
    """Generate a summary report of all magazines"""
    summary_data = []
    
    for index, row in magazines_df.iterrows():
        magazine_id = row['id']
        magazine_name = row['name']
        
        matched_count = len(get_matched_unsubscribers(magazine_id))
        unmatched_count = len(get_unmatched_unsubscribers(magazine_id))
        total_unsubscribers = matched_count + unmatched_count
        
        match_percentage = (matched_count / total_unsubscribers * 100) if total_unsubscribers > 0 else 0
        
        summary_data.append({
            'magazine_id': magazine_id,
            'magazine_name': magazine_name,
            'matched_unsubscribers': matched_count,
            'unmatched_unsubscribers': unmatched_count,
            'total_unsubscribers': total_unsubscribers,
            'match_percentage': round(match_percentage, 2)
        })
    
    summary_df = pd.DataFrame(summary_data)
    summary_filename = output_dir / "unsubscriber_analysis_summary.csv"
    summary_df.to_csv(summary_filename, index=False)
    
    print("\nSummary Report:")
    print(summary_df)
    print(f"\nSummary saved to: {summary_filename}")
    
    return summary_df

# Generate summary
summary = generate_summary_report()