# Monthly Performance (Daily)

In [18]:
import numpy as np
import pandas as pd
import os
from datetime import datetime
import glob
import re
import gspread
from gspread_dataframe import set_with_dataframe
from oauth2client.service_account import ServiceAccountCredentials
from google.oauth2.service_account import Credentials

def extract_date_from_filename(filename):
    """
    Extract date from filename pattern: SA_Campaign_List_YYYYMMDD_YYYYMMDD_hash.xlsx
    Returns the first date (start date)
    """
    patterns = [
        r'SA_Campaign_List_(\d{8})_\d{8}_.*\.xlsx',  # Original pattern
        r'(\d{8})',  # Any 8-digit date
        r'(\d{2}_\d{2}_\d{4})',  # DD_MM_YYYY format
        r'(\d{4}-\d{2}-\d{2})',  # YYYY-MM-DD format
    ]
    
    basename = os.path.basename(filename)
    
    for pattern in patterns:
        match = re.search(pattern, basename)
        if match:
            date_str = match.group(1)
            try:
                if '_' in date_str:
                    return pd.to_datetime(date_str, format='%d_%m_%Y')
                elif '-' in date_str:
                    return pd.to_datetime(date_str, format='%Y-%m-%d')
                else:
                    return pd.to_datetime(date_str, format='%Y%m%d')
            except:
                continue
    
    # Fallback: use file modification date
    mod_time = os.path.getmtime(filename)
    return pd.to_datetime(datetime.fromtimestamp(mod_time).date())

def clean_currency_column(column):
    """
    Remove $ symbol and convert to float
    """
    if column.dtype == 'object':
        cleaned = column.astype(str).str.replace(r'[C$,]', '', regex=True)
        cleaned = cleaned.replace(['', 'nan', 'NaN', '--', 'N/A'], np.nan)
        return pd.to_numeric(cleaned, errors='coerce')
    return column

def convert_to_float(column):
    """
    Convert object columns to float
    """
    if column.dtype == 'object':
        cleaned = column.astype(str).str.replace(r'[%,]', '', regex=True)
        cleaned = cleaned.replace(['', 'nan', 'NaN', '--', 'N/A'], np.nan).infer_objects(copy=False)
        return pd.to_numeric(cleaned, errors='coerce')
    return column

def convert_to_int(column):
    """
    Convert object columns to int
    """
    if column.dtype == 'object':
        cleaned = column.astype(str).str.replace(r'[,]', '', regex=True)
        cleaned = cleaned.replace(['', 'nan', 'NaN', '--', 'N/A'], np.nan).infer_objects(copy=False)
        # Convert to float first, then to int (handling NaN values)
        float_col = pd.to_numeric(cleaned, errors='coerce')
        return float_col.astype('Int64')  # Nullable integer type
    return column

def extract_asin_from_portfolio(portfolio_str):
    """
    Extract ASIN from Portfolio string - exactly 10 characters
    ASIN patterns:
    - B followed by 9 alphanumeric characters (e.g., B08XXXXXXX)
    - 10 alphanumeric characters
    """
    if pd.isna(portfolio_str) or portfolio_str == '':
        return None
    
    portfolio_str = str(portfolio_str).strip()
    
    # Pattern 1: B + 9 alphanumeric (most common ASIN format)
    pattern1 = r'B[A-Z0-9]{9}'
    match1 = re.search(pattern1, portfolio_str.upper())
    if match1:
        return match1.group()
    
    # Pattern 2: Any 10 consecutive alphanumeric characters
    pattern2 = r'[A-Z0-9]{10}'
    match2 = re.search(pattern2, portfolio_str.upper())
    if match2:
        return match2.group()
    
    # Pattern 3: Extract first 10 alphanumeric characters
    clean_str = re.sub(r'[^A-Za-z0-9]', '', portfolio_str)
    if len(clean_str) >= 10:
        return clean_str[:10].upper()
    
    # Fallback: return original if less than 10 characters
    return portfolio_str.upper() if portfolio_str else None

def normalize_campaign_types(text):
    """
    Normalize campaign type keywords
    """
    if pd.isna(text) or text == '':
        return text
    
    text = str(text)
    
    normalizations = {
        'sponsoredBrands': 'SB',
        'sponsoredDisplay': 'SD', 
        'sponsoredProducts': 'SP',
        'sponsoredbrands': 'SB',
        'sponsoreddisplay': 'SD',
        'sponsoredproducts': 'SP',
        'Sponsored Brands': 'SB',
        'Sponsored Display': 'SD',
        'Sponsored Products': 'SP'
    }
    
    for original, normalized in normalizations.items():
        text = text.replace(original, normalized)
    
    return text

def process_single_xlsx(file_path):
    """
    Process a single XLSX file according to specifications
    """
    try:
        print(f"Processing: {os.path.basename(file_path)}")
        
        # Read Excel file
        df = pd.read_excel(file_path)
        
        # Remove completely empty rows and columns
        df = df.dropna(axis=0, how='all')  # Remove empty rows
        df = df.dropna(axis=1, how='all')  # Remove empty columns
        
        # Clean column names
        df.columns = [str(col).strip() for col in df.columns]
        
        # Extract date from filename
        date_extracted = extract_date_from_filename(file_path)
        
        # Drop specified columns if they exist
        columns_to_drop = ['Profile', 'Labels', 'Budget group']
        existing_columns_to_drop = [col for col in columns_to_drop if col in df.columns]
        if existing_columns_to_drop:
            df = df.drop(columns=existing_columns_to_drop)
        
        # Create ASIN column from Portfolio (first step)
        asin_values = None
        if 'Portfolio' in df.columns:
            asin_values = df['Portfolio'].apply(extract_asin_from_portfolio)
        
        # Create Date column
        date_values = [date_extracted] * len(df)
        
        # Normalize campaign types
        if 'Campaign type' in df.columns:
            df['Campaign type'] = df['Campaign type'].apply(normalize_campaign_types)
        
        # Clean currency columns
        currency_columns = ['Daily Budget', 'Current Budget']
        for col in currency_columns:
            if col in df.columns:
                df[col] = clean_currency_column(df[col])
        
        # Convert float columns
        float_columns = ['Avg.time in Budget', 'Top-of-search IS', 'CPC', 'CVR', 'ACOS', 'ROAS', 'CPA', 'CTR']
        for col in float_columns:
            if col in df.columns:
                df[col] = convert_to_float(df[col])
        
        # Convert int columns
        int_columns = ['Orders Other SKU', 'Units Other SKU', 'Orders Same SKU', 'Units Same SKU', 
                      'Impressions', 'Clicks', 'Orders', 'Units']
        for col in int_columns:
            if col in df.columns:
                df[col] = convert_to_int(df[col])
        
        # Define exact required columns only
        required_columns = [
            'ASIN', 'Date', 'Campaign type', 'Campaign', 'Status', 'Country', 'Portfolio',
            'Daily Budget', 'Bidding Strategy', 'Top-of-search IS', 'Avg.time in Budget',
            'Impressions', 'Clicks', 'CTR', 'Spend', 'CPC', 'Orders', 'Sales', 'Units',
            'CVR', 'ACOS', 'ROAS', 'CPA', 'Sales Same SKU', 'Sales Other SKU',
            'Orders Same SKU', 'Orders Other SKU', 'Units Same SKU', 'Units Other SKU'
        ]
        
        # Create new DataFrame with only required columns
        ordered_df = pd.DataFrame()
        
        # Add ASIN as first column
        ordered_df['ASIN'] = asin_values
        
        # Add Date as second column
        ordered_df['Date'] = date_values
        
        # Add remaining required columns in specified order
        for col in required_columns[2:]:  # Skip ASIN and Date since already added
            if col in df.columns:
                ordered_df[col] = df[col]
            else:
                ordered_df[col] = np.nan  # Add missing columns with NaN
        
        # DO NOT add any extra columns - only keep the required ones
        
        print(f"  - Processed {len(ordered_df)} rows with {len(ordered_df.columns)} columns")
        print(f"  - Date: {date_extracted}")
        
        return ordered_df
        
    except Exception as e:
        print(f"Error processing {file_path}: {str(e)}")
        return None

def process_folder(folder_path):
    """
    Process all XLSX files in a folder
    """
    if not os.path.exists(folder_path):
        print(f"Folder not found: {folder_path}")
        return pd.DataFrame()
    
    # Find all XLSX files
    xlsx_pattern = os.path.join(folder_path, "*.xlsx")
    xlsx_files = glob.glob(xlsx_pattern)
    
    # Also check for .xlxs files (in case of typo in original code)
    xlxs_pattern = os.path.join(folder_path, "*.xlxs")
    xlxs_files = glob.glob(xlxs_pattern)
    
    all_files = xlsx_files + xlxs_files
    
    # Filter out temporary Excel files
    all_files = [f for f in all_files if not os.path.basename(f).startswith('~')]
    
    if not all_files:
        print(f"No Excel files found in {folder_path}")
        return pd.DataFrame()
    
    print(f"Found {len(all_files)} Excel files in {folder_path}")
    
    # Process each file and collect DataFrames
    dataframes = []
    for file_path in sorted(all_files):  # Sort for consistent order
        df = process_single_xlsx(file_path)
        if df is not None and not df.empty:
            dataframes.append(df)
    
    # Combine all DataFrames
    if dataframes:
        combined_df = pd.concat(dataframes, ignore_index=True, sort=False)
        print(f"Combined {len(dataframes)} files into {len(combined_df)} total rows")
        return combined_df
    else:
        print(f"No valid data found in {folder_path}")
        return pd.DataFrame()

def main():
    """
    Main function to process Ads M7 and M8 folders
    """
    # Define folder paths
    base_path = "C:/Users/admin1/Desktop/Performance-Tracking/Ads-XNurta"
    ads_m7_path = os.path.join(base_path, "H2_2025_US", "Tháng 7")
    ads_m8_path = os.path.join(base_path, "H2_2025_US", "Tháng 8")
    
    # Check if folders exist
    folders_to_process = []
    if os.path.exists(ads_m7_path):
        folders_to_process.append(("Tháng 7", ads_m7_path))
    else:
        print(f"Warning: {ads_m7_path} not found")
    
    if os.path.exists(ads_m8_path):
        folders_to_process.append(("Tháng 8", ads_m8_path))
    else:
        print(f"Warning: {ads_m8_path} not found")
    
    if not folders_to_process:
        print("No valid folders found. Please check your paths.")
        return pd.DataFrame()
    
    # Process each folder
    all_dataframes = []
    for folder_name, folder_path in folders_to_process:
        print(f"\n=== Processing {folder_name} ===")
        df = process_folder(folder_path)
        if not df.empty:
            all_dataframes.append(df)
    
    # Combine all data from both folders
    if all_dataframes:
        final_df = pd.concat(all_dataframes, ignore_index=True, sort=False)
        
        # Remove duplicates based on ASIN, Date, and Campaign
        if 'ASIN' in final_df.columns and 'Campaign' in final_df.columns:
            final_df = final_df.drop_duplicates(subset=['ASIN', 'Date', 'Campaign'], keep='last')
        
        # Sort by ASIN and Date
        final_df = final_df.sort_values(['ASIN', 'Date'], na_position='last')
        final_df = final_df.reset_index(drop=True)
        
        print(f"\n=== Final Results ===")
        print(f"Total rows: {len(final_df)}")
        print(f"Date range: {final_df['Date'].min()} to {final_df['Date'].max()}")
        print(f"Unique ASINs: {final_df['ASIN'].nunique()}")
        print(f"Columns: {len(final_df.columns)}")
        
        # Save combined data as XLSX
        timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
        output_filename = f"Combined_Ads_Data_{timestamp}.xlsx"
        
        # Handle datetime columns for Excel compatibility
        for col in final_df.columns:
            if pd.api.types.is_datetime64_any_dtype(final_df[col]):
                if final_df[col].dt.tz is not None:
                    final_df[col] = final_df[col].dt.tz_localize(None)
        
        # Save to Excel with formatting
        save_to_excel(final_df, output_filename)
        
        print(f"\nData saved to: {output_filename}")
        
        # Display sample data
        print(f"\nSample data (first 3 rows):")
        display_sample(final_df)
        
        return final_df
    else:
        print("No data to process.")
        return pd.DataFrame()

def save_to_excel(df, filename):
    """
    Save DataFrame to Excel with proper formatting
    """
    with pd.ExcelWriter(filename, engine='openpyxl') as writer:
        df.to_excel(writer, sheet_name='Combined_Ads_Data', index=False)
        
        workbook = writer.book
        worksheet = writer.sheets['Combined_Ads_Data']
        
        # Auto-adjust column widths
        for column in worksheet.columns:
            max_length = 0
            column_letter = column[0].column_letter
            for cell in column:
                try:
                    cell_length = len(str(cell.value))
                    if cell_length > max_length:
                        max_length = cell_length
                except:
                    pass
            adjusted_width = min(max_length + 2, 50)  # Cap at 50 characters
            worksheet.column_dimensions[column_letter].width = adjusted_width

def display_sample(df, rows=3):
    """
    Display sample data with proper formatting
    """
    pd.set_option('display.max_columns', None)
    pd.set_option('display.width', None)
    pd.set_option('display.max_colwidth', 20)
    print(df.head(rows).to_string(index=False))
    pd.reset_option('display.max_columns')
    pd.reset_option('display.width')
    pd.reset_option('display.max_colwidth')

def update_with_new_file(existing_df, new_file_path):
    """
    Add new file data to existing DataFrame
    """
    new_df = process_single_xlsx(new_file_path)
    if new_df is not None and not new_df.empty:
        # Combine with existing data
        updated_df = pd.concat([existing_df, new_df], ignore_index=True, sort=False)
        
        # Remove duplicates
        if 'ASIN' in updated_df.columns and 'Campaign' in updated_df.columns:
            updated_df = updated_df.drop_duplicates(subset=['ASIN', 'Date', 'Campaign'], keep='last')
        
        # Sort by ASIN and Date
        updated_df = updated_df.sort_values(['ASIN', 'Date'], na_position='last')
        updated_df = updated_df.reset_index(drop=True)
        
        print(f"Successfully added data from {os.path.basename(new_file_path)}")
        print(f"Total rows after update: {len(updated_df)}")
        return updated_df
    else:
        print(f"Failed to process new file: {new_file_path}")
        return existing_df

def daily_update(base_df_path, new_file_path):
    """
    Daily update function for adding new data
    """
    # Load existing data
    if os.path.exists(base_df_path):
        existing_df = pd.read_excel(base_df_path)
        if 'Date' in existing_df.columns:
            existing_df['Date'] = pd.to_datetime(existing_df['Date'])
        print(f"Loaded existing data: {len(existing_df)} rows")
    else:
        existing_df = pd.DataFrame()
        print("No existing data file found, creating new dataset")
    
    # Add new file data
    updated_df = update_with_new_file(existing_df, new_file_path)
    
    # Save updated data
    save_to_excel(updated_df, base_df_path)
    
    print(f"Updated data saved to: {base_df_path}")
    return updated_df

def load_existing_data(file_path):
    """
    Load existing XLSX data file
    """
    if os.path.exists(file_path) and file_path.lower().endswith('.xlsx'):
        df = pd.read_excel(file_path)
        if 'Date' in df.columns:
            df['Date'] = pd.to_datetime(df['Date'])
        print(f"Loaded existing data: {len(df)} rows from {os.path.basename(file_path)}")
        return df
    else:
        print(f"File not found or not XLSX format: {file_path}")
        return pd.DataFrame()

def get_data_summary(df):
    """
    Get summary statistics of the processed data
    """
    if df.empty:
        print("No data to summarize")
        return
    
    print(f"\n=== Data Summary ===")
    print(f"Total rows: {len(df):,}")
    print(f"Total columns: {len(df.columns)}")
    
    if 'Date' in df.columns:
        print(f"Date range: {df['Date'].min().date()} to {df['Date'].max().date()}")
    
    if 'ASIN' in df.columns:
        print(f"Unique ASINs: {df['ASIN'].nunique():,}")
    
    if 'Campaign type' in df.columns:
        print(f"Campaign types: {df['Campaign type'].value_counts().to_dict()}")
    
    if 'Spend' in df.columns:
        total_spend = df['Spend'].sum()
        print(f"Total spend: ${total_spend:,.2f}")
    
    if 'Sales' in df.columns:
        total_sales = df['Sales'].sum()
        print(f"Total sales: ${total_sales:,.2f}")

# Example usage functions
def process_month_folder(month_path, month_name):
    """
    Process a specific month folder
    """
    print(f"\n=== Processing {month_name} ===")
    df = process_folder(month_path)
    
    if not df.empty:
        output_filename = f"{month_name}_Ads_Data_{datetime.now().strftime('%Y%m%d_%H%M%S')}.xlsx"
        save_to_excel(df, output_filename)
        print(f"{month_name} data saved to: {output_filename}")
        get_data_summary(df)
    
    return df

if __name__ == "__main__":
    # Run the main processing
    print("Starting Amazon Ads Data Processing...")
    print("=" * 50)
    
    result_df = main()
    
    if not result_df.empty:
        get_data_summary(result_df)
        
        print(f"\n=== Column Order Verification ===")
        print("Columns in order:")
        for i, col in enumerate(result_df.columns, 1):
            print(f"{i:2d}. {col}")
    
    print("\n" + "=" * 50)
    print("Processing completed!")
    
    # Example usage:
    # To update with a new file:
    # updated_data = daily_update("Combined_Ads_Data_20241201_120000.xlsx", "path/to/new_file.xlsx")
    
    # To load existing data:
    # existing_data = load_existing_data("Combined_Ads_Data_20241201_120000.xlsx")

Starting Amazon Ads Data Processing...

=== Processing Tháng 7 ===
Found 31 Excel files in C:/Users/admin1/Desktop/Performance-Tracking/Ads-XNurta\H2_2025_US\Tháng 7
Processing: SA_Campaign_List_20250701_20250701_CNW4yt.xlsx
  - Processed 675 rows with 29 columns
  - Date: 2025-07-01 00:00:00
Processing: SA_Campaign_List_20250702_20250702_2akRmQ.xlsx
  - Processed 551 rows with 29 columns
  - Date: 2025-07-02 00:00:00
Processing: SA_Campaign_List_20250703_20250703_W0ZhJk.xlsx
  - Processed 532 rows with 29 columns
  - Date: 2025-07-03 00:00:00
Processing: SA_Campaign_List_20250704_20250704_08dyuo.xlsx
  - Processed 506 rows with 29 columns
  - Date: 2025-07-04 00:00:00
Processing: SA_Campaign_List_20250705_20250705_pKy2B5.xlsx
  - Processed 504 rows with 29 columns
  - Date: 2025-07-05 00:00:00
Processing: SA_Campaign_List_20250706_20250706_JjWjI1.xlsx
  - Processed 510 rows with 29 columns
  - Date: 2025-07-06 00:00:00
Processing: SA_Campaign_List_20250707_20250707_c0sH3M.xlsx
  - Pro

In [19]:
scopes = ["https://www.googleapis.com/auth/spreadsheets", 
          "https://www.googleapis.com/auth/drive"]
creds = Credentials.from_service_account_file("c:/Users/admin1/Downloads/new_credential.json", scopes=scopes)
client = gspread.authorize(creds)

# Mở Google Sheet
sheet_id = "1lZ4dsi94HaeWshsEizKTyNHeOOG0tpLJhzL9pMxvd6k"

# Mở file Google Sheet (Spreadsheet object)
spreadsheet = client.open_by_key(sheet_id)
sheet1 = client.open_by_key(sheet_id).worksheet("Raw_XN_Q3_2025_US")

set_with_dataframe(sheet1, result_df)

# SellerBoard (Daily)

In [12]:
import os
import json
import hashlib
import re
import pandas as pd
from datetime import datetime
import gspread
from google.oauth2.service_account import Credentials
from gspread_dataframe import set_with_dataframe

class SBDataProcessor:
    def __init__(self, base_folder, credentials_path, sheet_id, worksheet_name):
        self.base_folder = base_folder
        self.credentials_path = credentials_path
        self.sheet_id = sheet_id
        self.worksheet_name = worksheet_name
        self.metadata_file = "sb_file_metadata.json"
        
        # Định nghĩa thứ tự cột chuẩn
        self.standard_columns = [
            'Product', 'ASIN', 'Date', 'SKU', 'Units', 'Refunds', 'Sales', 
            'Promo', 'Ads', 'Sponsored products (PPC)', '% Refunds', 'Refund сost',
            'Amazon fees', 'Cost of Goods', 'Gross profit', 'Net profit', 
            'Estimated payout', 'Real ACOS', 'Sessions', 'VAT', 'Shipping'
        ]
        
        # Initialize Google Sheets
        self._init_google_sheets()
        
        # Load existing metadata
        self.file_metadata = self._load_metadata()
        
    def _init_google_sheets(self):
        """Initialize Google Sheets connection"""
        scopes = ["https://www.googleapis.com/auth/spreadsheets", 
                  "https://www.googleapis.com/auth/drive"]
        creds = Credentials.from_service_account_file(self.credentials_path, scopes=scopes)
        self.client = gspread.authorize(creds)
        self.spreadsheet = self.client.open_by_key(self.sheet_id)
        self.worksheet = self.spreadsheet.worksheet(self.worksheet_name)
    
    def _load_metadata(self):
        """Load file metadata from JSON file"""
        if os.path.exists(self.metadata_file):
            with open(self.metadata_file, 'r', encoding='utf-8') as f:
                return json.load(f)
        return {}
    
    def _save_metadata(self):
        """Save file metadata to JSON file"""
        with open(self.metadata_file, 'w', encoding='utf-8') as f:
            json.dump(self.file_metadata, f, indent=2, ensure_ascii=False, default=str)
    
    def _get_file_hash(self, file_path):
        """Calculate file hash for change detection"""
        hash_md5 = hashlib.md5()
        with open(file_path, "rb") as f:
            for chunk in iter(lambda: f.read(4096), b""):
                hash_md5.update(chunk)
        return hash_md5.hexdigest()
    
    def extract_date_from_filename(self, filename):
        """Extract first DD_MM_YYYY pattern from filename"""
        match = re.search(r"(\d{2}_\d{2}_\d{4})", filename)
        if match:
            return datetime.strptime(match.group(1), "%d_%m_%Y").date()
        return None
    
    def _standardize_columns(self, df):
        """Standardize and select only required columns"""
        # Làm sạch tên cột
        df.columns = [str(c).strip() for c in df.columns]
        
        # Tạo mapping cho các tên cột có thể khác nhau
        column_mapping = {}
        df_columns_lower = [col.lower() for col in df.columns]
        
        for std_col in self.standard_columns:
            std_col_lower = std_col.lower()
            
            # Tìm cột khớp chính xác hoặc gần giống
            for i, df_col in enumerate(df.columns):
                df_col_lower = df_col.lower()
                
                # Khớp chính xác
                if std_col_lower == df_col_lower:
                    column_mapping[df_col] = std_col
                    break
                # Khớp một phần cho một số trường hợp đặc biệt
                elif 'sponsored' in std_col_lower and 'sponsored' in df_col_lower and 'ppc' in df_col_lower:
                    column_mapping[df_col] = std_col
                    break
                elif 'refund' in std_col_lower and 'cost' in std_col_lower and 'refund' in df_col_lower and ('cost' in df_col_lower or 'сost' in df_col_lower):
                    column_mapping[df_col] = std_col
                    break
        
        # Rename columns theo mapping
        df = df.rename(columns=column_mapping)
        
        # Chỉ giữ lại các cột cần thiết
        available_columns = [col for col in self.standard_columns if col in df.columns]
        df_filtered = df[available_columns].copy()
        
        # Thêm các cột thiếu với giá trị None
        for col in self.standard_columns:
            if col not in df_filtered.columns:
                df_filtered[col] = None
        
        # Sắp xếp lại theo thứ tự chuẩn
        df_filtered = df_filtered[self.standard_columns]
        
        print(f"📋 Available columns: {len(available_columns)}/{len(self.standard_columns)}")
        missing_cols = [col for col in self.standard_columns if col not in available_columns]
        if missing_cols:
            print(f"⚠️ Missing columns: {missing_cols}")
        
        return df_filtered
    
    def process_single_excel(self, file_path):
        """Process a single Excel file and return DataFrame with Date column"""
        try:
            df = pd.read_excel(file_path)
            df = df.dropna(axis=1, how="all")  
            
            # Extract date from filename
            date_val = self.extract_date_from_filename(os.path.basename(file_path))
            if date_val:
                df["Date"] = pd.to_datetime(date_val)
            
            # Standardize columns
            df = self._standardize_columns(df)
            
            return df
        except Exception as e:
            print(f"⚠️ Error processing {file_path}: {e}")
            return pd.DataFrame()
    
    def _is_july_august_file(self, file_date):
        """Check if file belongs to July or August"""
        if not file_date:
            return False
        return file_date.month in [7, 8] and file_date.year == 2025  # Adjust year as needed
    
    def _should_process_file(self, file_path, file_date, is_initial_run=False):
        """Determine if file should be processed"""
        file_name = os.path.basename(file_path)
        current_hash = self._get_file_hash(file_path)
        modification_time = os.path.getmtime(file_path)
        
        # For initial run, process all July-August files
        if is_initial_run:
            if self._is_july_august_file(file_date):
                print(f"🔄 Initial run: Processing July/August file {file_name}")
                return True
            return False
        
        # For subsequent runs, check if file is new or changed
        if file_name not in self.file_metadata:
            print(f"➕ New file detected: {file_name}")
            return True
        
        stored_metadata = self.file_metadata[file_name]
        
        # Check if file has been modified (hash changed or modification time changed)
        if (stored_metadata.get('hash') != current_hash or 
            stored_metadata.get('modification_time') != modification_time):
            print(f"🔄 Modified file detected: {file_name}")
            return True
        
        print(f"⏭️ Skipping unchanged file: {file_name}")
        return False
    
    def _update_file_metadata(self, file_path, file_date):
        """Update metadata for processed file"""
        file_name = os.path.basename(file_path)
        self.file_metadata[file_name] = {
            'path': file_path,
            'date': file_date,
            'hash': self._get_file_hash(file_path),
            'modification_time': os.path.getmtime(file_path),
            'processed_at': datetime.now()
        }
    
    def process_files(self, initial_run=False):
        """
        Main processing function
        Args:
            initial_run (bool): If True, reprocess all July-August files from scratch
        """
        print("=" * 60)
        if initial_run:
            print("🚀 INITIAL RUN: Processing all July-August files")
            # Clear existing July-August metadata for fresh start
            files_to_remove = []
            for file_name, metadata in self.file_metadata.items():
                if isinstance(metadata.get('date'), str):
                    file_date = datetime.strptime(metadata['date'], "%Y-%m-%d").date()
                elif metadata.get('date'):
                    file_date = metadata['date']
                else:
                    continue
                    
                if self._is_july_august_file(file_date):
                    files_to_remove.append(file_name)
            
            for file_name in files_to_remove:
                del self.file_metadata[file_name]
                print(f"🗑️ Cleared metadata for July/August file: {file_name}")
        else:
            print("🔄 INCREMENTAL RUN: Processing new/modified files only")
        print("=" * 60)
        
        all_dataframes = []
        processed_files = []
        
        # Scan all Excel files in subfolders
        for root, dirs, files in os.walk(self.base_folder):
            for file in files:
                if file.endswith(".xlsx"):
                    file_path = os.path.join(root, file)
                    file_date = self.extract_date_from_filename(file)
                    
                    if self._should_process_file(file_path, file_date, initial_run):
                        print(f"📊 Processing: {file}")
                        df = self.process_single_excel(file_path)
                        
                        if not df.empty:
                            all_dataframes.append(df)
                            processed_files.append(file)
                            self._update_file_metadata(file_path, file_date)
                        else:
                            print(f"⚠️ Empty dataframe for: {file}")
        
        # Combine all processed data
        if all_dataframes:
            print(f"\n📈 Combining {len(all_dataframes)} dataframes...")
            master_df = pd.concat(all_dataframes, ignore_index=True, sort=False)
            
            # Sort by date, then by sales (descending)
            if "Date" in master_df.columns and "Sales" in master_df.columns:
                master_df = master_df.sort_values(["Date", "Sales"], ascending=[True, False])
            elif "Date" in master_df.columns:
                master_df = master_df.sort_values("Date", ascending=True)
            
            print(f"✅ Combined data shape: {master_df.shape}")
            if "Date" in master_df.columns:
                print(f"📅 Date range: {master_df['Date'].min()} to {master_df['Date'].max()}")
            
            # Upload to Google Sheets
            self._upload_to_sheets(master_df)
            
            # Save metadata
            self._save_metadata()
            
            print(f"\n🎉 Successfully processed {len(processed_files)} files:")
            for file in processed_files:
                print(f"   ✓ {file}")
            
            return master_df
        else:
            print("ℹ️ No files to process.")
            return pd.DataFrame()
    
    def _upload_to_sheets(self, df):
        """Upload DataFrame to Google Sheets"""
        try:
            print("📤 Uploading to Google Sheets...")
            
            # Clear existing data (columns A to U to match our 21 standard columns)
            self.worksheet.batch_clear(['A:U'])
            
            # Upload new data
            set_with_dataframe(self.worksheet, df)
            
            print(f"✅ Successfully uploaded {len(df)} rows to Google Sheets")
            print(f"🔗 Sheet: {self.worksheet_name}")
            print(f"📋 Columns: {', '.join(self.standard_columns)}")
            
        except Exception as e:
            print(f"❌ Error uploading to Google Sheets: {e}")
    
    def get_processing_summary(self):
        """Get summary of processed files"""
        if not self.file_metadata:
            return "No files processed yet."
        
        july_files = []
        august_files = []
        other_files = []
        
        for file_name, metadata in self.file_metadata.items():
            if isinstance(metadata.get('date'), str):
                file_date = datetime.strptime(metadata['date'], "%Y-%m-%d").date()
            elif metadata.get('date'):
                file_date = metadata['date']
            else:
                other_files.append(file_name)
                continue
            
            if file_date.month == 7:
                july_files.append(file_name)
            elif file_date.month == 8:
                august_files.append(file_name)
            else:
                other_files.append(file_name)
        
        summary = f"""
📊 PROCESSING SUMMARY
=====================
July files: {len(july_files)}
August files: {len(august_files)}
Other files: {len(other_files)}
Total files: {len(self.file_metadata)}
Standard columns: {len(self.standard_columns)}
Last run: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
        """
        return summary

# Usage Example
if __name__ == "__main__":
    # Configuration
    config = {
        'base_folder': "C:/Users/admin1/Desktop/Performance-Tracking/Agg-SB/H2_2025_US",
        'credentials_path': "c:/Users/admin1/Downloads/new_credential.json",
        'sheet_id': "1lZ4dsi94HaeWshsEizKTyNHeOOG0tpLJhzL9pMxvd6k",
        'worksheet_name': "Raw_SB_H2_2025_US"
    }
    
    # Initialize processor
    processor = SBDataProcessor(**config)
    
    # First time: Run with initial_run=True to reprocess all July-August files
    print("Choose run mode:")
    print("1. Initial run (reprocess all July-August files)")
    print("2. Incremental run (process only new/modified files)")
    
    choice = input("Enter choice (1 or 2): ").strip()
    
    if choice == "1":
        result_df = processor.process_files(initial_run=True)
    else:
        result_df = processor.process_files(initial_run=False)
    
    # Print summary
    print(processor.get_processing_summary())
    
    # Show column info
    print(f"\n📋 Standard columns ({len(processor.standard_columns)}):")
    for i, col in enumerate(processor.standard_columns, 1):
        print(f"   {i:2d}. {col}")

Choose run mode:
1. Initial run (reprocess all July-August files)
2. Incremental run (process only new/modified files)
🚀 INITIAL RUN: Processing all July-August files
🗑️ Cleared metadata for July/August file: NewEleven_Dashboard Products Group by ASIN_01_07_2025-01_07_2025_(02_16_59_866).xlsx
🗑️ Cleared metadata for July/August file: NewEleven_Dashboard Products Group by ASIN_02_07_2025-02_07_2025_(02_17_19_401).xlsx
🗑️ Cleared metadata for July/August file: NewEleven_Dashboard Products Group by ASIN_03_07_2025-03_07_2025_(02_17_35_203).xlsx
🗑️ Cleared metadata for July/August file: NewEleven_Dashboard Products Group by ASIN_04_07_2025-04_07_2025_(02_17_52_472).xlsx
🗑️ Cleared metadata for July/August file: NewEleven_Dashboard Products Group by ASIN_05_07_2025-05_07_2025_(02_18_20_680).xlsx
🗑️ Cleared metadata for July/August file: NewEleven_Dashboard Products Group by ASIN_06_07_2025-06_07_2025_(02_18_35_117).xlsx
🗑️ Cleared metadata for July/August file: NewEleven_Dashboard Products 

  master_df = pd.concat(all_dataframes, ignore_index=True, sort=False)


✅ Combined data shape: (13993, 21)
📅 Date range: 2025-07-01 00:00:00 to 2025-08-28 00:00:00
📤 Uploading to Google Sheets...
✅ Successfully uploaded 13993 rows to Google Sheets
🔗 Sheet: Raw_SB_H2_2025_US
📋 Columns: Product, ASIN, Date, SKU, Units, Refunds, Sales, Promo, Ads, Sponsored products (PPC), % Refunds, Refund сost, Amazon fees, Cost of Goods, Gross profit, Net profit, Estimated payout, Real ACOS, Sessions, VAT, Shipping

🎉 Successfully processed 59 files:
   ✓ NewEleven_Dashboard Products Group by ASIN_01_07_2025-01_07_2025_(02_16_59_866).xlsx
   ✓ NewEleven_Dashboard Products Group by ASIN_02_07_2025-02_07_2025_(02_17_19_401).xlsx
   ✓ NewEleven_Dashboard Products Group by ASIN_03_07_2025-03_07_2025_(02_17_35_203).xlsx
   ✓ NewEleven_Dashboard Products Group by ASIN_04_07_2025-04_07_2025_(02_17_52_472).xlsx
   ✓ NewEleven_Dashboard Products Group by ASIN_05_07_2025-05_07_2025_(02_18_20_680).xlsx
   ✓ NewEleven_Dashboard Products Group by ASIN_06_07_2025-06_07_2025_(02_18_35_11

# XNurta H2 2024 (Daily)

In [2]:
import numpy as np
import pandas as pd
import os
from datetime import datetime
import glob
import re
import json
import hashlib
import gspread
from gspread_dataframe import set_with_dataframe
from oauth2client.service_account import ServiceAccountCredentials
from google.oauth2.service_account import Credentials

class XNurtaDataProcessor2024:
    def __init__(self, base_folder, credentials_path, sheet_id, worksheet_name):
        self.base_folder = base_folder
        self.credentials_path = credentials_path
        self.sheet_id = sheet_id
        self.worksheet_name = worksheet_name
        self.metadata_file = "xnurta_file_metadata_2024.json"
        
        # Initialize Google Sheets
        self._init_google_sheets()
        
        # Load existing metadata
        self.file_metadata = self._load_metadata()
        
    def _init_google_sheets(self):
        """Initialize Google Sheets connection"""
        scopes = ["https://www.googleapis.com/auth/spreadsheets", 
                  "https://www.googleapis.com/auth/drive"]
        creds = Credentials.from_service_account_file(self.credentials_path, scopes=scopes)
        self.client = gspread.authorize(creds)
        self.spreadsheet = self.client.open_by_key(self.sheet_id)
        self.worksheet = self.spreadsheet.worksheet(self.worksheet_name)
    
    def _load_metadata(self):
        """Load file metadata from JSON file"""
        if os.path.exists(self.metadata_file):
            with open(self.metadata_file, 'r', encoding='utf-8') as f:
                return json.load(f)
        return {}
    
    def _save_metadata(self):
        """Save file metadata to JSON file"""
        with open(self.metadata_file, 'w', encoding='utf-8') as f:
            json.dump(self.file_metadata, f, indent=2, ensure_ascii=False, default=str)
    
    def _get_file_hash(self, file_path):
        """Calculate file hash for change detection"""
        hash_md5 = hashlib.md5()
        with open(file_path, "rb") as f:
            for chunk in iter(lambda: f.read(4096), b""):
                hash_md5.update(chunk)
        return hash_md5.hexdigest()
        
    def extract_date_from_filename(self, filename):
        """
        Extract date from filename pattern: SA_Campaign_List_YYYYMMDD_YYYYMMDD_hash.xlsx
        Returns the first date (start date)
        """
        pattern = r'SA_Campaign_List_(\d{8})_\d{8}_.*\.xlsx'
        match = re.search(pattern, os.path.basename(filename))
        if match:
            date_str = match.group(1)
            return pd.to_datetime(date_str, format='%Y%m%d')
        return None
        
    def clean_currency_column(self, column):
        """Remove $ symbol and convert to float"""
        if column.dtype == 'object':
            cleaned = column.astype(str).str.replace(r'[$,]', '', regex=True)
            cleaned = cleaned.replace(['', 'nan', 'NaN'], np.nan)
            return pd.to_numeric(cleaned, errors='coerce')
        return column
        
    def convert_to_float(self, column):
        """Convert object columns to float"""
        if column.dtype == 'object':
            cleaned = column.astype(str).str.replace(r'[%,]', '', regex=True)
            cleaned = cleaned.replace(['', 'nan', 'NaN', '--', 'N/A'], np.nan)
            return pd.to_numeric(cleaned, errors='coerce')
        return column
        
    def convert_to_int(self, column):
        """Convert object columns to int"""
        if column.dtype == 'object':
            cleaned = column.astype(str).str.replace(r'[,]', '', regex=True)
            cleaned = cleaned.replace(['', 'nan', 'NaN', '--', 'N/A'], np.nan)
            float_col = pd.to_numeric(cleaned, errors='coerce')
            return float_col.astype('Int64')  # Nullable integer type
        return column
        
    def extract_asin_from_portfolio(self, portfolio_str):
        """Extract ASIN from Portfolio string"""
        if pd.isna(portfolio_str) or portfolio_str == '':
            return None
        
        portfolio_str = str(portfolio_str)
        
        # Pattern 1: B + 9 alphanumeric (most common ASIN format)
        pattern1 = r'B[A-Z0-9]{9}'
        match1 = re.search(pattern1, portfolio_str)
        if match1:
            return match1.group()
        
        # Pattern 2: 10 alphanumeric characters starting with letter
        pattern2 = r'[A-Z][A-Z0-9]{9}'
        match2 = re.search(pattern2, portfolio_str)
        if match2:
            return match2.group()
        
        # Pattern 3: Any 10 consecutive alphanumeric characters
        pattern3 = r'[A-Z0-9]{10}'
        match3 = re.search(pattern3, portfolio_str)
        if match3:
            return match3.group()
        
        # Pattern 4: 10 alphanumeric with possible lowercase (convert to uppercase)
        pattern4 = r'[A-Za-z0-9]{10}'
        match4 = re.search(pattern4, portfolio_str)
        if match4:
            return match4.group().upper()
        
        # If no pattern matches, return first 10 characters as fallback
        clean_str = re.sub(r'[^A-Za-z0-9]', '', portfolio_str)
        if len(clean_str) >= 10:
            return clean_str[:10].upper()
        
        return portfolio_str[:10] if len(portfolio_str) >= 10 else portfolio_str
        
    def normalize_campaign_types(self, text):
        """Normalize campaign type keywords"""
        if pd.isna(text) or text == '':
            return text
        
        text = str(text)
        
        normalizations = {
            'sponsoredBrands': 'SB',
            'sponsoredDisplay': 'SD', 
            'sponsoredProducts': 'SP',
            'sponsoredbrands': 'SB',
            'sponsoreddisplay': 'SD',
            'sponsoredproducts': 'SP',
            'Sponsored Brands': 'SB',
            'Sponsored Display': 'SD',
            'Sponsored Products': 'SP'
        }
        
        for original, normalized in normalizations.items():
            text = text.replace(original, normalized)
        
        return text
        
    def process_single_excel(self, file_path):
        """Process a single Excel file according to specifications"""
        try:
            # Read Excel file
            df = pd.read_excel(file_path)
            
            # Extract date from filename
            date_extracted = self.extract_date_from_filename(file_path)
            
            # Drop specified columns if they exist
            columns_to_drop = [
                'Profile', 
                'Labels', 
                'Budget group',
                'Status',
                'Current Budget',
                'SP Off-site Ads Strategy',
                'Bidding Strategy',
                'Sales Same SKU',
                'Sales Other SKU',
                'Orders Same SKU',
                'Orders Other SKU',
                'Units Same SKU',
                'Units Other SKU'
            ]
            existing_columns_to_drop = [col for col in columns_to_drop if col in df.columns]
            if existing_columns_to_drop:
                df = df.drop(columns=existing_columns_to_drop)
                print(f"   🗑️ Dropped columns: {', '.join(existing_columns_to_drop)}")
            
            # Add ASIN column as first column (extract ASIN from Portfolio)
            if 'Portfolio' in df.columns:
                df.insert(0, 'ASIN', df['Portfolio'].apply(self.extract_asin_from_portfolio))
            
            # Add Date column
            df.insert(1, 'Date', date_extracted)
            
            # Normalize campaign types in Campaign Type column
            if 'Campaign type' in df.columns:
                df['Campaign type'] = df['Campaign type'].apply(self.normalize_campaign_types)
            
            # Clean currency columns
            currency_columns = ['Daily Budget']
            for col in currency_columns:
                if col in df.columns:
                    df[col] = self.clean_currency_column(df[col])
            
            # Convert specified columns to float
            float_columns = ['Avg.time in Budget', 'Top-of-search IS', 'CPC', 'CVR', 'ACOS', 'ROAS']
            for col in float_columns:
                if col in df.columns:
                    df[col] = self.convert_to_float(df[col])
            
            # Note: Removed int_columns conversion since those columns are now dropped
            
            print(f"✅ Successfully processed: {os.path.basename(file_path)}")
            return df
            
        except Exception as e:
            print(f"⚠️ Error processing {file_path}: {str(e)}")
            return pd.DataFrame()
            
    def _is_h2_2024_file(self, file_date):
        """Check if file belongs to Q3 2024 (July to September 2024)"""
        if not file_date:
            return False
        return file_date.month in [7, 8, 9] and file_date.year == 2024
        
    def _should_process_file(self, file_path, file_date, is_initial_run=False):
        """Determine if file should be processed"""
        file_name = os.path.basename(file_path)
        current_hash = self._get_file_hash(file_path)
        modification_time = os.path.getmtime(file_path)
        
        # For initial run, process all H2 2024 files
        if is_initial_run:
            if self._is_h2_2024_file(file_date):
                print(f"🔄 Initial run: Processing Q3 2024 file {file_name}")
                return True
            return False
        
        # For subsequent runs, check if file is new or changed
        if file_name not in self.file_metadata:
            print(f"➕ New file detected: {file_name}")
            return True
        
        stored_metadata = self.file_metadata[file_name]
        
        if (stored_metadata.get('hash') != current_hash or 
            stored_metadata.get('modification_time') != modification_time):
            print(f"🔄 Modified file detected: {file_name}")
            return True
        
        print(f"⏭️ Skipping unchanged file: {file_name}")
        return False
        
    def _update_file_metadata(self, file_path, file_date):
        """Update metadata for processed file"""
        file_name = os.path.basename(file_path)
        self.file_metadata[file_name] = {
            'path': file_path,
            'date': file_date,
            'hash': self._get_file_hash(file_path),
            'modification_time': os.path.getmtime(file_path),
            'processed_at': datetime.now()
        }
        
    def process_files(self, initial_run=False):
        """Main processing function for XNurta 2024 data"""
        print("=" * 60)
        if initial_run:
            print("🚀 INITIAL RUN: Processing Q3 2024 XNurta files (Jul-Sep)")
            # Clear existing Q3 2024 metadata for fresh start
            files_to_remove = []
            for file_name, metadata in self.file_metadata.items():
                if isinstance(metadata.get('date'), str):
                    file_date = pd.to_datetime(metadata['date'])
                elif metadata.get('date'):
                    file_date = metadata['date']
                else:
                    continue
                    
                if self._is_h2_2024_file(file_date):
                    files_to_remove.append(file_name)
            
            for file_name in files_to_remove:
                del self.file_metadata[file_name]
                print(f"🗑️ Cleared metadata for Q3 2024 file: {file_name}")
        else:
            print("🔄 INCREMENTAL RUN: Processing new/modified files only")
        print("=" * 60)
        
        all_dataframes = []
        processed_files = []
        
        # Scan all Excel files in subfolders (Tháng 7, Tháng 8, etc.)
        for root, dirs, files in os.walk(self.base_folder):
            for file in files:
                if file.endswith(".xlsx"):
                    file_path = os.path.join(root, file)
                    file_date = self.extract_date_from_filename(file)
                    
                    if self._should_process_file(file_path, file_date, initial_run):
                        print(f"📊 Processing: {file}")
                        df = self.process_single_excel(file_path)
                        
                        if not df.empty:
                            all_dataframes.append(df)
                            processed_files.append(file)
                            self._update_file_metadata(file_path, file_date)
                        else:
                            print(f"⚠️ Empty dataframe for: {file}")
        
        # Combine all processed data
        if all_dataframes:
            print(f"\n📈 Combining {len(all_dataframes)} dataframes...")
            master_df = pd.concat(all_dataframes, ignore_index=True, sort=False)
            
            # Sort by Date and ASIN
            if "Date" in master_df.columns:
                master_df = master_df.sort_values(['Date', 'ASIN'], na_position='last')
            
            # Reset index
            master_df = master_df.reset_index(drop=True)
            
            print(f"✅ Combined data shape: {master_df.shape}")
            print(f"📅 Date range: {master_df['Date'].min()} to {master_df['Date'].max()}")
            
            # Upload to Google Sheets
            self._upload_to_sheets(master_df)
            
            # Save metadata
            self._save_metadata()
            
            print(f"\n🎉 Successfully processed {len(processed_files)} files:")
            for file in processed_files:
                print(f"   ✓ {file}")
            
            return master_df
        else:
            print("ℹ️ No files to process.")
            return pd.DataFrame()
            
    def _upload_to_sheets(self, df):
        """Upload DataFrame to Google Sheets"""
        try:
            print("📤 Uploading to Google Sheets...")
            
            # Clear limited columns range (A to AZ) instead of entire sheet
            self.worksheet.batch_clear(['A:AZ'])
            
            # Upload new data
            set_with_dataframe(self.worksheet, df)
            
            print(f"✅ Successfully uploaded {len(df)} rows to Google Sheets")
            print(f"🔗 Sheet: {self.worksheet_name}")
            
        except Exception as e:
            print(f"❌ Error uploading to Google Sheets: {e}")
            
    def get_processing_summary(self):
        """Get summary of processed files by month"""
        if not self.file_metadata:
            return "No files processed yet."
        
        monthly_files = {
            7: [], 8: [], 9: [], 10: [], 11: [], 12: [], 'other': []
        }
        month_names = {
            7: 'July', 8: 'August', 9: 'September', 
            10: 'October', 11: 'November', 12: 'December'
        }
        
        for file_name, metadata in self.file_metadata.items():
            if isinstance(metadata.get('date'), str):
                file_date = pd.to_datetime(metadata['date'])
            elif metadata.get('date'):
                file_date = metadata['date']
            else:
                monthly_files['other'].append(file_name)
                continue
            
            if file_date.month in monthly_files:
                monthly_files[file_date.month].append(file_name)
            else:
                monthly_files['other'].append(file_name)
        
        summary = f"""
📊 PROCESSING SUMMARY - XNurta 2024 Q3
======================================="""
        
        for month_num in [7, 8, 9]:
            count = len(monthly_files[month_num])
            summary += f"\n{month_names[month_num]} files: {count}"
        
        summary += f"""
Other files: {len(monthly_files['other'])}
Total files: {len(self.file_metadata)}
Last run: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
        """
        return summary

# Usage Example
if __name__ == "__main__":
    # Configuration for XNurta 2024
    config = {
        'base_folder': "C:/Users/admin1/Desktop/Performance-Tracking/Xnurta 2024 (by day)",  # Update path
        'credentials_path': "c:/Users/admin1/Downloads/new_credential.json",
        'sheet_id': "1lZ4dsi94HaeWshsEizKTyNHeOOG0tpLJhzL9pMxvd6k",
        'worksheet_name': "Raw_XNurta_H2_2024"
    }
    
    # Initialize processor
    processor = XNurtaDataProcessor2024(**config)
    
    # Choose run mode
    print("Choose run mode for XNurta 2024 Q3 data:")
    print("1. Initial run (reprocess all Q3 2024 files: Jul-Sep)")
    print("2. Incremental run (process only new/modified files)")
    
    choice = input("Enter choice (1 or 2): ").strip()
    
    if choice == "1":
        result_df = processor.process_files(initial_run=True)
    else:
        result_df = processor.process_files(initial_run=False)
    
    # Print summary
    print(processor.get_processing_summary())

Choose run mode for XNurta 2024 Q3 data:
1. Initial run (reprocess all Q3 2024 files: Jul-Sep)
2. Incremental run (process only new/modified files)
🚀 INITIAL RUN: Processing Q3 2024 XNurta files (Jul-Sep)
🗑️ Cleared metadata for Q3 2024 file: SA_Campaign_List_20240701_20240701_38C2RG.xlsx
🗑️ Cleared metadata for Q3 2024 file: SA_Campaign_List_20240702_20240702_9TkpbM.xlsx
🗑️ Cleared metadata for Q3 2024 file: SA_Campaign_List_20240703_20240703_G80TRn.xlsx
🗑️ Cleared metadata for Q3 2024 file: SA_Campaign_List_20240704_20240704_gZtCIL.xlsx
🗑️ Cleared metadata for Q3 2024 file: SA_Campaign_List_20240705_20240705_A1i8Dn.xlsx
🗑️ Cleared metadata for Q3 2024 file: SA_Campaign_List_20240706_20240706_ed4ep2.xlsx
🗑️ Cleared metadata for Q3 2024 file: SA_Campaign_List_20240707_20240707_1nbAjc.xlsx
🗑️ Cleared metadata for Q3 2024 file: SA_Campaign_List_20240708_20240708_nSsf7X.xlsx
🗑️ Cleared metadata for Q3 2024 file: SA_Campaign_List_20240709_20240709_Sa3Pwo.xlsx
🗑️ Cleared metadata for Q3 202

# Version 2