In [None]:
def identify_ticker_changes(df):
    """
    Identify companies that have changed their tickers.
    
    Args:
        df: pandas DataFrame with company and ticker information
        
    Returns:
        DataFrame showing companies with multiple tickers and their details
    """
    # Group by companyId and aggregate unique tickers
    ticker_changes = df.groupby('companyId').agg({
        'companyTicker': lambda x: list(x.unique()),
        'companyName': 'first',
        'FILE_DATE': ['min', 'max']  # Get date range
    }).reset_index()
    
    # Filter for companies with multiple tickers
    ticker_changes['num_tickers'] = ticker_changes['companyTicker'].str.len()
    changed_tickers = ticker_changes[ticker_changes['num_tickers'] > 1].copy()
    
    # Rename columns for clarity
    changed_tickers.columns = [
        'companyId', 'tickers', 'companyName', 
        'earliest_date', 'latest_date', 'num_tickers'
    ]
    
    # Sort by number of tickers and date range
    changed_tickers = changed_tickers.sort_values(
        ['num_tickers', 'latest_date'], 
        ascending=[False, False]
    )
    
    print(f"Found {len(changed_tickers)} companies with ticker changes")
    
    # Get detailed timeline for each change
    def get_ticker_timeline(company_id):
        timeline = df[df['companyId'] == company_id] \
            .sort_values('FILE_DATE') \
            [['FILE_DATE', 'companyTicker', 'companyName']] \
            .drop_duplicates(subset=['companyTicker'])
        return timeline
    
    # Example of first few companies with changes
    print("\nExample ticker changes:")
    for _, row in changed_tickers.head().iterrows():
        print(f"\nCompany: {row['companyName']} (ID: {row['companyId']})")
        print("Ticker timeline:")
        print(get_ticker_timeline(row['companyId']))
    
    return changed_tickers

# Optional: Function to clean data keeping only latest ticker per company
def clean_ticker_changes(df):
    """
    Clean the dataset to keep only the latest ticker for each company.
    
    Args:
        df: Original DataFrame
        
    Returns:
        DataFrame with only the latest ticker for each company
    """
    # Sort by date and get the latest ticker for each company
    latest_tickers = df.sort_values('FILE_DATE') \
        .groupby('companyId') \
        .agg({'companyTicker': 'last'}) \
        .reset_index()
    
    # Merge back with original data to keep only latest ticker entries
    cleaned_df = df.merge(
        latest_tickers, 
        on=['companyId', 'companyTicker'], 
        how='inner'
    )
    
    print(f"Reduced from {len(df)} to {len(cleaned_df)} entries after cleaning ticker changes")
    return cleaned_df