In [1]:
# combine_to_csv.py
import pandas as pd
import os
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

def combine_excel_to_csv():
    """Combine all Excel files into a single CSV file"""
    print("üìÇ COMBINING EXCEL FILES TO CSV...")
    
    file_paths = [
        'File1.xlsx', 'File2.xlsx', 'File3.xlsx', 'File4.xlsx',
        'File5.xlsx', 'File6.xlsx', 'File7.xlsx'
    ]
    
    dataframes = []
    for i, file_path in enumerate(file_paths, 1):
        try:
            if not os.path.exists(file_path):
                print(f"‚ùå File not found: {file_path}")
                continue
                
            df = pd.read_excel(file_path, engine='openpyxl')
            df['source_file'] = f'File{i}'
            df['combined_at'] = datetime.now()
            dataframes.append(df)
            print(f"‚úÖ Loaded {file_path} - {len(df)} records")
            
        except Exception as e:
            print(f"‚ùå Error loading {file_path}: {e}")
    
    if not dataframes:
        raise Exception("No data files could be loaded.")
    
    # Combine all data
    combined_df = pd.concat(dataframes, ignore_index=True)
    print(f"üìä Total records combined: {len(combined_df)}")
    
    # Save to CSV
    csv_filename = 'combined_call_center_data.csv'
    combined_df.to_csv(csv_filename, index=False)
    print(f"üíæ Saved to {csv_filename}")
    
    # Display file info
    file_size = os.path.getsize(csv_filename) / (1024 * 1024)  # MB
    print(f"üìÅ File size: {file_size:.2f} MB")
    
    # Show sample of data
    print(f"\nüìã Sample data (first 3 rows):")
    print(combined_df.head(3))
    
    # Show column info
    print(f"\nüîç Columns: {list(combined_df.columns)}")
    print(f"üìä Data types:\n{combined_df.dtypes}")
    
    return combined_df

if __name__ == "__main__":
    combine_excel_to_csv()

üìÇ COMBINING EXCEL FILES TO CSV...
‚úÖ Loaded File1.xlsx - 135859 records
‚úÖ Loaded File2.xlsx - 148831 records
‚úÖ Loaded File3.xlsx - 148959 records
‚úÖ Loaded File4.xlsx - 144350 records
‚úÖ Loaded File5.xlsx - 145859 records
‚úÖ Loaded File6.xlsx - 132524 records
‚úÖ Loaded File7.xlsx - 86104 records
üìä Total records combined: 942486
üíæ Saved to combined_call_center_data.csv
üìÅ File size: 128.37 MB

üìã Sample data (first 3 rows):
            StartDateTime                                UserId  \
0 2023-01-01 05:01:42.267  da7fadb4-cad9-4062-8271-bbc999ac1275   
1 2023-01-01 05:11:53.170  af512203-3ea2-48d5-bbf5-e46ae915d4d8   
2 2023-01-01 05:24:16.933  6b633a32-ec20-48f4-91dd-503d459eda1f   

        CallType disconnectType  Total_Handle_Time  Total_Hold_Time  \
0  Inbound_Queue       endpoint           1188.162            0.000   
1  Inbound_Queue           peer            396.923            0.000   
2  Inbound_Queue           peer            537.893          173.363  