<a href="https://colab.research.google.com/github/john-d-noble/callcenter/blob/main/CB_Step_A_EDA_ipynb.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Cell 1: Install all required packages (matching EDA for consistency)
!pip install yfinance pandas numpy matplotlib seaborn statsmodels scipy scikit-learn



In [4]:
# Cell 2: All imports and data preparation code
import pandas as pd
import yfinance as yf
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

# Step 1: Load call volume data
calls_df = pd.read_csv('agent_contact_volume_wgsd2.csv')

# Step 2: Rename column to 'Date'
calls_df = calls_df.rename(columns={
    'V Cx Contact Volume Template Created Datetime Utc Date': 'Date'
})

# Parse dates and set index
calls_df['Date'] = pd.to_datetime(calls_df['Date'])
calls_df = calls_df.set_index('Date')
calls_df = calls_df.sort_index()

print(f"Loaded {len(calls_df)} rows of call data from {calls_df.index.min()} to {calls_df.index.max()}")
print(calls_df.head())

# Step 3: Fetch market data using yfinance
tickers = {
    '^VIX': ['^VIX_close'],
    'SPY': ['SPY_close', 'SPY_volume'],
    'QQQ': ['QQQ_close', 'QQQ_volume'],
    'DX-Y.NYB': ['DX-Y.NYB_close'],
    'GC=F': ['GC=F_close', 'GC=F_volume'],
    'BTC-USD': ['BTC-USD_close', 'BTC-USD_volume'],
    'ETH-USD': ['ETH-USD_close', 'ETH-USD_volume']
}

start_date = calls_df.index.min().strftime('%Y-%m-%d')
end_date = (calls_df.index.max() + pd.Timedelta(days=1)).strftime('%Y-%m-%d')  # +1 to include last day

market_data = pd.DataFrame(index=calls_df.index)

for ticker, cols in tickers.items():
    try:
        data = yf.download(ticker, start=start_date, end=end_date, progress=False)
        if not data.empty:
            for col in cols:
                clean_col = col.replace('^', '').replace('=', '').replace('-', '_')  # Normalize col names if needed
                if 'close' in col.lower():
                    market_data[col] = data['Close'].reindex(market_data.index)
                elif 'volume' in col.lower():
                    market_data[col] = data['Volume'].reindex(market_data.index)
        print(f"Fetched data for {ticker}")
    except Exception as e:
        print(f"Error fetching {ticker}: {e}")

print(f"Market data shape: {market_data.shape}")
print(market_data.head())

# Step 4: Merge calls and market data, then forward-fill and back-fill for non-trading days
df_merged = calls_df.join(market_data, how='left')

# Forward-fill and back-fill market data (for weekends/holidays when markets are closed; bfill handles leading NaNs)
market_cols = [col for col in df_merged.columns if col != 'V Cx Contact Volume Template Contacts']
df_merged[market_cols] = df_merged[market_cols].ffill().bfill()

# Rename calls column to 'calls' for consistency
df_merged = df_merged.rename(columns={'V Cx Contact Volume Template Contacts': 'calls'})

# Reorder columns: date, calls first, then markets
cols_order = ['calls'] + [col for col in df_merged.columns if col != 'calls']
df_merged = df_merged[cols_order]

print(f"Merged data shape: {df_merged.shape}")
print(df_merged.head())
print("\nMissing values after ffill/bfill:")
print(df_merged.isnull().sum())

# Step 5: Save to CSV for EDA notebook
output_filename = 'final_merged_data.csv'
df_merged.to_csv(output_filename)
print(f"Saved {output_filename} with {len(df_merged)} rows.")

# Quick check: Ensure Date is a column for EDA parsing
df_check = pd.read_csv(output_filename)
print("CSV head:")
print(df_check.head())

Loaded 978 rows of call data from 2023-01-01 00:00:00 to 2025-09-04 00:00:00
            V Cx Contact Volume Template Contacts
Date                                             
2023-01-01                                   2882
2023-01-02                                   5055
2023-01-03                                   6537
2023-01-04                                   7238
2023-01-05                                   7302
Fetched data for ^VIX
Fetched data for SPY
Fetched data for QQQ
Fetched data for DX-Y.NYB
Fetched data for GC=F
Fetched data for BTC-USD
Fetched data for ETH-USD
Market data shape: (978, 12)
            ^VIX_close   SPY_close  SPY_volume   QQQ_close  QQQ_volume  \
Date                                                                     
2023-01-01         NaN         NaN         NaN         NaN         NaN   
2023-01-02         NaN         NaN         NaN         NaN         NaN   
2023-01-03   22.900000  368.168671  74850700.0  260.432220  42335300.0   
2023-01-04  