# Bloomberg Data Analysis

This notebook reads and analyzes the Bloomberg data from CSV files, recreating the tables and providing visualizations.

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import sys

# Add project root to path
project_root = Path().absolute().parent
sys.path.append(str(project_root))

# Import our utility function
from src.utils.csv_exporter import read_csv_to_df

# Set up plotting style
plt.style.use('default')  # Using default style instead of seaborn
sns.set_theme()  # This will apply seaborn styling
plt.rcParams['figure.figsize'] = [15, 8]
plt.rcParams['figure.dpi'] = 100
plt.rcParams['axes.grid'] = True
plt.rcParams['grid.alpha'] = 0.3
plt.rcParams['lines.linewidth'] = 1.5

# Read the CSV files using our utility function
data_dir = project_root / 'data'
sprds_df = read_csv_to_df(data_dir / 'sprds_data.csv')
derv_df = read_csv_to_df(data_dir / 'derv_data.csv')
er_ytd_df = read_csv_to_df(data_dir / 'er_ytd_data.csv')

In [2]:
sprds_df

Unnamed: 0,cad_ig_oas,us_ig_oas,us_hy_oas,eur_ig_oas,cad_ig_oas_1-5yr,cad_ig_oas_5-10yr,cad_ig_oas_>10yr,cad_ig_oas_fins,cad_ig_oas_industrials,cad_ig_oas_utility,...,us_ig_oas_a,us_ig_oas_bbb,us_ig_oas_1-3yr,us_ig_oas_3-5yr,us_ig_oas_5-7yr,us_ig_oas_7-10yr,us_ig_oas_10yr,us_ig_oas_industrials,us_ig_oas_uts,us_ig_oas_fins
2002-10-01,,237.251123,987.413426,27.555947,,,,,,,...,,,,,,,,245.098157,335.289945,201.854154
2002-10-02,79.545983,235.572126,989.083859,27.864795,,,,104.747729,113.246025,25.104327,...,,,,,,,,243.757077,332.656747,199.961194
2002-10-03,83.816601,236.956207,992.347223,27.930020,,,,110.478972,117.441034,28.743526,...,,,,,,,,244.253533,337.352631,201.676215
2002-10-04,88.702731,240.074894,995.134869,28.150093,,,,119.996876,120.742771,33.282393,...,,,,,,,,245.003255,348.770366,205.822819
2002-10-07,81.292068,247.402370,1005.114001,28.277141,,,,110.365939,113.235020,27.134917,...,,,,,,,,250.492461,367.635810,212.823693
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-11-25,101.675500,77.110300,261.131700,64.988800,76.0116,117.7889,153.4893,83.154100,116.625100,113.087500,...,66.6202,97.8110,13.8887,25.2992,35.2969,38.8339,49.5854,75.782000,80.839700,78.376300
2024-11-26,101.889100,78.215300,263.430900,65.016900,76.2628,117.6826,153.7216,83.470700,116.751800,113.181300,...,67.7476,98.6520,13.9557,25.7813,36.5688,40.0064,49.9692,76.777800,81.366900,79.833000
2024-11-27,101.448300,78.176600,265.045400,65.950200,76.0163,116.7361,153.0217,83.258300,116.150000,112.434600,...,67.5447,98.6556,13.8102,25.6089,36.1713,39.8310,48.8862,76.739000,80.746200,79.958600
2024-11-28,101.352300,78.176600,265.045400,64.818100,75.8799,116.5844,152.9557,83.193400,116.078500,112.102100,...,67.5447,98.6556,13.8102,25.6089,36.1713,39.8310,48.8862,76.739000,80.746200,79.958600


In [3]:
derv_df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 5113 entries, 2004-11-19 to 2024-11-29
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   cdx_ig  5112 non-null   float64
 1   cdx_hy  4009 non-null   float64
dtypes: float64(2)
memory usage: 119.8 KB


In [4]:
er_ytd_df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 5563 entries, 2002-10-01 to 2024-11-29
Data columns (total 29 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   cad_ig_oas              5562 non-null   float64
 1   us_ig_oas               5563 non-null   float64
 2   us_hy_oas               5563 non-null   float64
 3   eur_ig_oas              5475 non-null   float64
 4   cad_ig_oas_1-5yr        1762 non-null   float64
 5   cad_ig_oas_5-10yr       1762 non-null   float64
 6   cad_ig_oas_>10yr        1762 non-null   float64
 7   cad_ig_oas_fins         5562 non-null   float64
 8   cad_ig_oas_industrials  5562 non-null   float64
 9   cad_ig_oas_utility      5562 non-null   float64
 10  cad_ig_oas_a            5562 non-null   float64
 11  cad_ig_oas_bbb          5522 non-null   float64
 12  cad_prov_oas            1800 non-null   float64
 13  cad_prov_oas_1-5yr      1708 non-null   float64
 14  cad_prov_oas_5-10yr   

# Trying All The Ways To Manipulate The Date From CSV

In [5]:
# 1. Basic usage (original behavior - no filling, original dates)
sprds_df_basic = read_csv_to_df(data_dir / 'sprds_data.csv')

# 2. With different fill methods
# Forward fill
sprds_df_ffill = read_csv_to_df(data_dir / 'sprds_data.csv', fill='ffill')
# Backward fill
sprds_df_bfill = read_csv_to_df(data_dir / 'sprds_data.csv', fill='bfill')
# Interpolate
sprds_df_interp = read_csv_to_df(data_dir / 'sprds_data.csv', fill='interpolate')

# 3. With start date alignment
# Align start dates only
sprds_df_aligned = read_csv_to_df(data_dir / 'sprds_data.csv', start_date_align='yes')

# 4. Combining both features
# Align dates and forward fill
sprds_df_aligned_ffill = read_csv_to_df(data_dir / 'sprds_data.csv', 
                                       fill='ffill', 
                                       start_date_align='yes')

# Align dates and interpolate
sprds_df_aligned_interp = read_csv_to_df(data_dir / 'sprds_data.csv', 
                                        fill='interpolate', 
                                        start_date_align='yes')

print("\n=== Basic DataFrame (No Fill, Original Dates) ===")
sprds_df_basic.info()

print("\n=== Forward-Filled DataFrame ===")
sprds_df_ffill.info()

print("\n=== Backward-Filled DataFrame ===")
sprds_df_bfill.info()

print("\n=== Interpolated DataFrame ===")
sprds_df_interp.info()

print("\n=== Date-Aligned DataFrame (No Fill) ===")
sprds_df_aligned.info()

print("\n=== Date-Aligned + Forward-Filled DataFrame ===")
sprds_df_aligned_ffill.info()

print("\n=== Date-Aligned + Interpolated DataFrame ===")
sprds_df_aligned_interp.info()


=== Basic DataFrame (No Fill, Original Dates) ===
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 5530 entries, 2002-10-01 to 2024-11-29
Data columns (total 29 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   cad_ig_oas              5529 non-null   float64
 1   us_ig_oas               5530 non-null   float64
 2   us_hy_oas               5530 non-null   float64
 3   eur_ig_oas              5443 non-null   float64
 4   cad_ig_oas_1-5yr        1729 non-null   float64
 5   cad_ig_oas_5-10yr       1729 non-null   float64
 6   cad_ig_oas_>10yr        1729 non-null   float64
 7   cad_ig_oas_fins         5529 non-null   float64
 8   cad_ig_oas_industrials  5529 non-null   float64
 9   cad_ig_oas_utility      5529 non-null   float64
 10  cad_ig_oas_a            5529 non-null   float64
 11  cad_ig_oas_bbb          5489 non-null   float64
 12  cad_prov_oas            1767 non-null   float64
 13  cad_prov_oas_1-5yr      

  df = df.fillna(method='ffill')
  df = df.fillna(method='bfill')
  df = df.fillna(method='ffill')


In [6]:
sprds_df_aligned_ffill.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 5469 entries, 2002-12-31 to 2024-11-29
Data columns (total 29 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   cad_ig_oas              5469 non-null   float64
 1   us_ig_oas               5469 non-null   float64
 2   us_hy_oas               5469 non-null   float64
 3   eur_ig_oas              5469 non-null   float64
 4   cad_ig_oas_1-5yr        5469 non-null   float64
 5   cad_ig_oas_5-10yr       5469 non-null   float64
 6   cad_ig_oas_>10yr        5469 non-null   float64
 7   cad_ig_oas_fins         5469 non-null   float64
 8   cad_ig_oas_industrials  5469 non-null   float64
 9   cad_ig_oas_utility      5469 non-null   float64
 10  cad_ig_oas_a            5469 non-null   float64
 11  cad_ig_oas_bbb          5469 non-null   float64
 12  cad_prov_oas            5469 non-null   float64
 13  cad_prov_oas_1-5yr      5469 non-null   float64
 14  cad_prov_oas_5-10yr   

In [7]:
# Create a DataFrame showing the first and last non-null dates for each column
column_dates = pd.DataFrame(index=sprds_df_basic.columns)
column_dates['First Non-Null Date'] = [sprds_df_basic[col].first_valid_index() for col in sprds_df_basic.columns]
column_dates['Last Non-Null Date'] = [sprds_df_basic[col].last_valid_index() for col in sprds_df_basic.columns]
column_dates['Null Count'] = sprds_df_basic.isna().sum()
column_dates['Total Records'] = len(sprds_df_basic)
column_dates['Null Percentage'] = (column_dates['Null Count'] / column_dates['Total Records'] * 100).round(2)

# Sort by the start date to see which columns start later
print("Column Start and End Dates (sorted by start date):")
print(column_dates.sort_values('First Non-Null Date'))

Column Start and End Dates (sorted by start date):
                       First Non-Null Date Last Non-Null Date  Null Count  \
us_ig_oas_fins                  2002-10-01         2024-11-29           0   
us_ig_oas                       2002-10-01         2024-11-29           0   
us_hy_oas                       2002-10-01         2024-11-29           0   
eur_ig_oas                      2002-10-01         2024-11-29          87   
us_ig_oas_industrials           2002-10-01         2024-11-29           0   
us_ig_oas_uts                   2002-10-01         2024-11-29           0   
cad_ig_oas_a                    2002-10-02         2024-11-29           1   
cad_ig_oas_utility              2002-10-02         2024-11-29           1   
cad_ig_oas                      2002-10-02         2024-11-29           1   
cad_ig_oas_fins                 2002-10-02         2024-11-29           1   
cad_ig_oas_industrials          2002-10-02         2024-11-29           1   
us_ig_oas_10yr           