<a href="https://colab.research.google.com/github/john-d-noble/callcenter/blob/main/2_CB_Step_6_synthetic_Call_Volume.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Final Cleaned Code for Data Preparation

This section contains the complete and cleaned code to load and prepare all datasets, generate synthetic data, combine everything into a single DataFrame, and save the final result to `all_combined_data.csv`.

In [1]:
# --- 1. Load Original Data and Prepare for Combination ---
import pandas as pd
import numpy as np
import yfinance as yf
from datetime import datetime

# Load the original data
df_original = pd.read_csv('agent_contact_volume_wgsd2.csv')

# Convert the date column to datetime objects
df_original['V Cx Contact Volume Template Created Datetime Utc Date'] = pd.to_datetime(df_original['V Cx Contact Volume Template Created Datetime Utc Date'])

# Rename the contact volume column for consistency
df_original = df_original.rename(columns={'V Cx Contact Volume Template Contacts': 'Calls'})

# Set the date column as the index for merging
df_original = df_original.set_index('V Cx Contact Volume Template Created Datetime Utc Date')

# Keep only the 'Calls' column from the original data for concatenation
df_original_calls = df_original[['Calls']]

display(df_original_calls.head())
display(df_original_calls.tail())

Unnamed: 0_level_0,Calls
V Cx Contact Volume Template Created Datetime Utc Date,Unnamed: 1_level_1
2023-01-01,2882
2023-01-02,5055
2023-01-03,6537
2023-01-04,7238
2023-01-05,7302


Unnamed: 0_level_0,Calls
V Cx Contact Volume Template Created Datetime Utc Date,Unnamed: 1_level_1
2025-08-31,4601
2025-09-01,6793
2025-09-02,8868
2025-09-03,9748
2025-09-04,2136


In [2]:
# --- 2. Generate Synthetic Data ---

# Calculate day of week statistics from the original data
df_original['DayOfWeek_Original'] = df_original.index.dayofweek # Use a temporary name to avoid conflict
day_of_week_stats = df_original.groupby('DayOfWeek_Original')['Calls'].agg(['mean', 'std']).to_dict()

start_date_synthetic = '2021-01-01'
end_date_synthetic = '2022-12-31'
date_rng_synthetic = pd.date_range(start=start_date_synthetic, end=end_date_synthetic, freq='D')
df_synthetic = pd.DataFrame(date_rng_synthetic, columns=['Date'])
df_synthetic['DayOfWeek_Synthetic'] = df_synthetic['Date'].dt.dayofweek # Use a temporary name

def generate_calls(row):
    day = row['DayOfWeek_Synthetic']
    # Handle cases where a day of week might not be in the original data (though unlikely with this dataset)
    if day in day_of_week_stats['mean'] and day in day_of_week_stats['std']:
        mean = day_of_week_stats['mean'][day]
        std_dev = day_of_week_stats['std'][day]
        noise = np.random.normal(0, std_dev / 2) # Add realistic variation
        return max(0, int(mean + noise))
    else:
        # Return a default or handle missing stats as appropriate
        return 0

# Apply generate_calls row-wise
df_synthetic['Calls'] = df_synthetic.apply(generate_calls, axis=1)


# Set the Date column as the index for concatenation
df_synthetic = df_synthetic.set_index('Date')

# Keep only the 'Calls' column from the synthetic data
df_synthetic_calls = df_synthetic[['Calls']]

display(df_synthetic_calls.head())
display(df_synthetic_calls.tail())

Unnamed: 0_level_0,Calls
Date,Unnamed: 1_level_1
2021-01-01,9854
2021-01-02,5340
2021-01-03,4884
2021-01-04,8854
2021-01-05,8673


Unnamed: 0_level_0,Calls
Date,Unnamed: 1_level_1
2022-12-27,10001
2022-12-28,10466
2022-12-29,6867
2022-12-30,9310
2022-12-31,6805


In [3]:
# --- 3. Load Market Data ---

# Parameters
start_date_market = '2020-12-31'
end_date_market = datetime.now().strftime('%Y-%m-%d')
tickers = {
    '^VIX': '^VIX',
    'BVOL-USD': 'BVOL-USD',
    'CVOL-USD': 'CVOL-USD',
    'CVX-USD': 'CVX-USD',
    'SPY': 'SPY',
    'QQQ': 'QQQ',
    'DX-Y.NYB': 'DX-Y.NYB',
    'GC=F': 'GC=F'
}

# Download full market data
market_data = pd.DataFrame()
# Use the full date range from the start of market data to the end of original data for reindexing
full_date_range = pd.date_range(start=start_date_market, end=df_original_calls.index.max())


# Define known launch dates (approximate based on common knowledge or initial data inspection)
launch_dates = {
    'CVOL-USD': pd.to_datetime('2022-02-28'),
    'CVX-USD': pd.to_datetime('2021-05-17'),
    # Add other tickers and their launch dates if necessary
}


for label, ticker in tickers.items():
    data = yf.download(ticker, start=start_date_market, end=end_date_market)
    # Select relevant columns: Open, High, Low, Close
    data = data[['Open', 'High', 'Low', 'Close']]
    # Reindex to the full date range based on the combined data
    data = data.reindex(full_date_range)

    # Handle pre-launch dates by setting to 0
    if label in launch_dates:
        pre_launch_dates = data.index < launch_dates[label]
        data.loc[pre_launch_dates] = 0 # Use .loc for setting values

    # Forward-fill gaps after launch
    data = data.ffill()

    data.columns = [f"{col}_{label}" for col in data.columns] # Rename columns
    if market_data.empty:
        market_data = data
    else:
        market_data = market_data.join(data, how='outer')

# Display the head of the downloaded market data
display(market_data.head())
display(market_data.tail())

  data = yf.download(ticker, start=start_date_market, end=end_date_market)
[*********************100%***********************]  1 of 1 completed
  data = yf.download(ticker, start=start_date_market, end=end_date_market)
[*********************100%***********************]  1 of 1 completed
  data = yf.download(ticker, start=start_date_market, end=end_date_market)
[*********************100%***********************]  1 of 1 completed
  data = yf.download(ticker, start=start_date_market, end=end_date_market)
[*********************100%***********************]  1 of 1 completed
  data = yf.download(ticker, start=start_date_market, end=end_date_market)
[*********************100%***********************]  1 of 1 completed
  data = yf.download(ticker, start=start_date_market, end=end_date_market)
[*********************100%***********************]  1 of 1 completed
  data = yf.download(ticker, start=start_date_market, end=end_date_market)
[*********************100%***********************]  1 of 1 co

Unnamed: 0,"('Open', '^VIX')_^VIX","('High', '^VIX')_^VIX","('Low', '^VIX')_^VIX","('Close', '^VIX')_^VIX","('Open', 'BVOL-USD')_BVOL-USD","('High', 'BVOL-USD')_BVOL-USD","('Low', 'BVOL-USD')_BVOL-USD","('Close', 'BVOL-USD')_BVOL-USD","('Open', 'CVOL-USD')_CVOL-USD","('High', 'CVOL-USD')_CVOL-USD",...,"('Low', 'QQQ')_QQQ","('Close', 'QQQ')_QQQ","('Open', 'DX-Y.NYB')_DX-Y.NYB","('High', 'DX-Y.NYB')_DX-Y.NYB","('Low', 'DX-Y.NYB')_DX-Y.NYB","('Close', 'DX-Y.NYB')_DX-Y.NYB","('Open', 'GC=F')_GC=F","('High', 'GC=F')_GC=F","('Low', 'GC=F')_GC=F","('Close', 'GC=F')_GC=F"
2020-12-31,22.99,23.25,21.24,22.75,1554.442993,1629.764526,1548.796631,1555.343872,0.0,0.0,...,303.281629,305.207764,89.610001,89.980003,89.519997,89.940002,1897.0,1901.300049,1892.699951,1893.099976
2021-01-01,22.99,23.25,21.24,22.75,1555.341187,1577.42688,1553.720703,1564.578735,0.0,0.0,...,303.281629,305.207764,89.610001,89.980003,89.519997,89.940002,1897.0,1901.300049,1892.699951,1893.099976
2021-01-02,22.99,23.25,21.24,22.75,1564.644775,1600.865601,1518.926636,1600.128784,0.0,0.0,...,303.281629,305.207764,89.610001,89.980003,89.519997,89.940002,1897.0,1901.300049,1892.699951,1893.099976
2021-01-03,22.99,23.25,21.24,22.75,1600.305664,1693.79126,1562.991821,1693.064697,0.0,0.0,...,303.281629,305.207764,89.610001,89.980003,89.519997,89.940002,1897.0,1901.300049,1892.699951,1893.099976
2021-01-04,23.040001,29.190001,22.559999,26.969999,1693.105347,1780.254028,1649.489746,1778.977783,0.0,0.0,...,296.880596,300.898285,89.93,89.940002,89.419998,89.879997,1912.199951,1945.099976,1912.199951,1944.699951


Unnamed: 0,"('Open', '^VIX')_^VIX","('High', '^VIX')_^VIX","('Low', '^VIX')_^VIX","('Close', '^VIX')_^VIX","('Open', 'BVOL-USD')_BVOL-USD","('High', 'BVOL-USD')_BVOL-USD","('Low', 'BVOL-USD')_BVOL-USD","('Close', 'BVOL-USD')_BVOL-USD","('Open', 'CVOL-USD')_CVOL-USD","('High', 'CVOL-USD')_CVOL-USD",...,"('Low', 'QQQ')_QQQ","('Close', 'QQQ')_QQQ","('Open', 'DX-Y.NYB')_DX-Y.NYB","('High', 'DX-Y.NYB')_DX-Y.NYB","('Low', 'DX-Y.NYB')_DX-Y.NYB","('Close', 'DX-Y.NYB')_DX-Y.NYB","('Open', 'GC=F')_GC=F","('High', 'GC=F')_GC=F","('Low', 'GC=F')_GC=F","('Close', 'GC=F')_GC=F"
2025-08-31,14.31,15.97,14.31,15.36,67.991486,67.991486,67.991486,67.991486,69.659958,69.659958,...,568.539978,570.400024,97.870003,98.129997,97.690002,97.769997,3432.5,3475.600098,3426.600098,3473.699951
2025-09-01,14.31,15.97,14.31,15.36,67.991486,67.991486,67.991486,67.991486,69.659958,69.659958,...,568.539978,570.400024,97.870003,98.129997,97.690002,97.769997,3432.5,3475.600098,3426.600098,3473.699951
2025-09-02,16.65,19.379999,16.549999,17.17,67.991486,67.991486,67.991486,67.991486,69.659958,69.659958,...,559.539978,565.619995,97.650002,98.599998,97.620003,98.400002,3485.699951,3559.199951,3485.699951,3549.399902
2025-09-03,17.4,17.57,16.34,16.35,67.991486,67.991486,67.991486,67.991486,69.659958,69.659958,...,566.72998,570.070007,98.400002,98.639999,98.010002,98.139999,3554.800049,3593.699951,3553.199951,3593.199951
2025-09-04,16.219999,16.35,15.28,15.3,67.991486,67.991486,67.991486,67.991486,69.659958,69.659958,...,569.030029,575.22998,98.150002,98.440002,98.080002,98.349998,3549.899902,3573.600098,3549.899902,3565.800049


In [4]:
# --- 4. Combine Call Volume Data (Synthetic and Original) ---

# Concatenate the synthetic and original call volume dataframes
# The index (Date) will be used for alignment
df_combined_calls = pd.concat([df_synthetic_calls, df_original_calls])

# Sort the combined dataframe by date to ensure correct time series order
df_combined_calls = df_combined_calls.sort_index()

display(df_combined_calls.head())
display(df_combined_calls.tail())

Unnamed: 0,Calls
2021-01-01,9854
2021-01-02,5340
2021-01-03,4884
2021-01-04,8854
2021-01-05,8673


Unnamed: 0,Calls
2025-08-31,4601
2025-09-01,6793
2025-09-02,8868
2025-09-03,9748
2025-09-04,2136


In [5]:
# --- 5. Merge Combined Call Volume with Market Data and Finalize ---

# Merge the combined call volume dataframe with the market data dataframe on their index (Date)
df_all_combined_cleaned = df_combined_calls.join(market_data, how='left')

# Add a single, fully populated 'DayOfWeek' column derived from the index (Date)
df_all_combined_cleaned['DayOfWeek'] = df_all_combined_cleaned.index.dayofweek

# Display the head of the final combined and cleaned DataFrame
display(df_all_combined_cleaned.head())
display(df_all_combined_cleaned.tail())

# Save the final combined DataFrame to a new CSV file
df_all_combined_cleaned.to_csv('all_combined_data.csv')

Unnamed: 0,Calls,"('Open', '^VIX')_^VIX","('High', '^VIX')_^VIX","('Low', '^VIX')_^VIX","('Close', '^VIX')_^VIX","('Open', 'BVOL-USD')_BVOL-USD","('High', 'BVOL-USD')_BVOL-USD","('Low', 'BVOL-USD')_BVOL-USD","('Close', 'BVOL-USD')_BVOL-USD","('Open', 'CVOL-USD')_CVOL-USD",...,"('Close', 'QQQ')_QQQ","('Open', 'DX-Y.NYB')_DX-Y.NYB","('High', 'DX-Y.NYB')_DX-Y.NYB","('Low', 'DX-Y.NYB')_DX-Y.NYB","('Close', 'DX-Y.NYB')_DX-Y.NYB","('Open', 'GC=F')_GC=F","('High', 'GC=F')_GC=F","('Low', 'GC=F')_GC=F","('Close', 'GC=F')_GC=F",DayOfWeek
2021-01-01,9854,22.99,23.25,21.24,22.75,1555.341187,1577.42688,1553.720703,1564.578735,0.0,...,305.207764,89.610001,89.980003,89.519997,89.940002,1897.0,1901.300049,1892.699951,1893.099976,4
2021-01-02,5340,22.99,23.25,21.24,22.75,1564.644775,1600.865601,1518.926636,1600.128784,0.0,...,305.207764,89.610001,89.980003,89.519997,89.940002,1897.0,1901.300049,1892.699951,1893.099976,5
2021-01-03,4884,22.99,23.25,21.24,22.75,1600.305664,1693.79126,1562.991821,1693.064697,0.0,...,305.207764,89.610001,89.980003,89.519997,89.940002,1897.0,1901.300049,1892.699951,1893.099976,6
2021-01-04,8854,23.040001,29.190001,22.559999,26.969999,1693.105347,1780.254028,1649.489746,1778.977783,0.0,...,300.898285,89.93,89.940002,89.419998,89.879997,1912.199951,1945.099976,1912.199951,1944.699951,0
2021-01-05,8673,26.940001,28.6,24.799999,25.34,1778.970093,1800.249878,1755.270752,1800.193848,0.0,...,303.378845,89.900002,89.900002,89.43,89.440002,1941.699951,1952.699951,1941.300049,1952.699951,1


Unnamed: 0,Calls,"('Open', '^VIX')_^VIX","('High', '^VIX')_^VIX","('Low', '^VIX')_^VIX","('Close', '^VIX')_^VIX","('Open', 'BVOL-USD')_BVOL-USD","('High', 'BVOL-USD')_BVOL-USD","('Low', 'BVOL-USD')_BVOL-USD","('Close', 'BVOL-USD')_BVOL-USD","('Open', 'CVOL-USD')_CVOL-USD",...,"('Close', 'QQQ')_QQQ","('Open', 'DX-Y.NYB')_DX-Y.NYB","('High', 'DX-Y.NYB')_DX-Y.NYB","('Low', 'DX-Y.NYB')_DX-Y.NYB","('Close', 'DX-Y.NYB')_DX-Y.NYB","('Open', 'GC=F')_GC=F","('High', 'GC=F')_GC=F","('Low', 'GC=F')_GC=F","('Close', 'GC=F')_GC=F",DayOfWeek
2025-08-31,4601,14.31,15.97,14.31,15.36,67.991486,67.991486,67.991486,67.991486,69.659958,...,570.400024,97.870003,98.129997,97.690002,97.769997,3432.5,3475.600098,3426.600098,3473.699951,6
2025-09-01,6793,14.31,15.97,14.31,15.36,67.991486,67.991486,67.991486,67.991486,69.659958,...,570.400024,97.870003,98.129997,97.690002,97.769997,3432.5,3475.600098,3426.600098,3473.699951,0
2025-09-02,8868,16.65,19.379999,16.549999,17.17,67.991486,67.991486,67.991486,67.991486,69.659958,...,565.619995,97.650002,98.599998,97.620003,98.400002,3485.699951,3559.199951,3485.699951,3549.399902,1
2025-09-03,9748,17.4,17.57,16.34,16.35,67.991486,67.991486,67.991486,67.991486,69.659958,...,570.070007,98.400002,98.639999,98.010002,98.139999,3554.800049,3593.699951,3553.199951,3593.199951,2
2025-09-04,2136,16.219999,16.35,15.28,15.3,67.991486,67.991486,67.991486,67.991486,69.659958,...,575.22998,98.150002,98.440002,98.080002,98.349998,3549.899902,3573.600098,3549.899902,3565.800049,3
