In [7]:
# Import necessary libraries
import pandas as pd
import os
from pathlib import Path

# Set the data directory path - using absolute path
data_dir = Path('/Users/tiamathur/Documents/GitHub/data')

# List all CSV files in the data directory
csv_files = list(data_dir.glob('*.csv'))
print(f"Found {len(csv_files)} CSV files:")
for file in csv_files:
    print(f"  - {file.name}")


Found 7 CSV files:
  - IWM_hourly_ohlcv.csv
  - VIX_hourly_ohlcv.csv
  - VTI_hourly_ohlcv.csv
  - SPY_hourly_ohlcv.csv
  - QQQ_hourly_ohlcv.csv
  - EFA_hourly_ohlcv.csv
  - DIA_hourly_ohlcv.csv


In [8]:
# Create separate DataFrames for each CSV file
dataframes = {}

for csv_file in csv_files:
    # Extract the symbol name from the filename (remove '_hourly_ohlcv.csv')
    symbol = csv_file.stem.replace('_hourly_ohlcv', '')
    
    # Read the CSV file
    df = pd.read_csv(csv_file)
    
    # Parse the 'Open time' column as datetime
    df['Open time'] = pd.to_datetime(df['Open time'])
    
    # Set the 'Open time' as the index
    df.set_index('Open time', inplace=True)
    
    # Store the DataFrame in the dictionary
    dataframes[symbol] = df
    
    print(f"Loaded {symbol}: {len(df)} rows, {len(df.columns)} columns")
    print(f"  Date range: {df.index.min()} to {df.index.max()}")
    print(f"  Columns: {list(df.columns)}")
    print()


Loaded IWM: 6002 rows, 7 columns
  Date range: 2021-07-19 14:00:00 to 2025-07-16 19:00:00
  Columns: ['Open', 'High', 'Low', 'Close', 'Volume', 'Trade count', 'VWAP']

Loaded VIX: 6002 rows, 7 columns
  Date range: 2021-07-19 14:00:00 to 2025-07-16 19:00:00
  Columns: ['Open', 'High', 'Low', 'Close', 'Volume', 'Trade count', 'VWAP']

Loaded VTI: 6002 rows, 7 columns
  Date range: 2021-07-19 14:00:00 to 2025-07-16 19:00:00
  Columns: ['Open', 'High', 'Low', 'Close', 'Volume', 'Trade count', 'VWAP']

Loaded SPY: 6002 rows, 7 columns
  Date range: 2021-07-19 14:00:00 to 2025-07-16 19:00:00
  Columns: ['Open', 'High', 'Low', 'Close', 'Volume', 'Trade count', 'VWAP']

Loaded QQQ: 6002 rows, 7 columns
  Date range: 2021-07-19 14:00:00 to 2025-07-16 19:00:00
  Columns: ['Open', 'High', 'Low', 'Close', 'Volume', 'Trade count', 'VWAP']

Loaded EFA: 6002 rows, 7 columns
  Date range: 2021-07-19 14:00:00 to 2025-07-16 19:00:00
  Columns: ['Open', 'High', 'Low', 'Close', 'Volume', 'Trade count', '

In [9]:
# Display basic information about each DataFrame
print("DataFrame Summary:")
print("=" * 50)

for symbol, df in dataframes.items():
    print(f"\n{symbol} DataFrame:")
    print(f"  Shape: {df.shape}")
    print(f"  Memory usage: {df.memory_usage(deep=True).sum() / 1024**2:.2f} MB")
    print(f"  Data types:")
    for col, dtype in df.dtypes.items():
        print(f"    {col}: {dtype}")
    print(f"  First few rows:")
    print(df.head(3))
    print("-" * 30)


DataFrame Summary:

IWM DataFrame:
  Shape: (6002, 7)
  Memory usage: 0.37 MB
  Data types:
    Open: float64
    High: float64
    Low: float64
    Close: float64
    Volume: float64
    Trade count: int64
    VWAP: float64
  First few rows:
                       Open     High     Low    Close     Volume  Trade count  \
Open time                                                                       
2021-07-19 14:00:00  209.92  212.450  209.05  211.510  9037547.0        80016   
2021-07-19 15:00:00  211.52  214.445  211.37  214.280  7786318.0        69446   
2021-07-19 16:00:00  214.27  214.270  211.17  211.575  5702837.0        50283   

                         VWAP  
Open time                      
2021-07-19 14:00:00  210.9506  
2021-07-19 15:00:00  213.1119  
2021-07-19 16:00:00  212.4086  
------------------------------

VIX DataFrame:
  Shape: (6002, 7)
  Memory usage: 0.37 MB
  Data types:
    Open: float64
    High: float64
    Low: float64
    Close: float64
    Volume: flo

In [10]:
# Check what symbols are available in the dataframes dictionary
print("Available symbols in dataframes dictionary:")
for symbol in dataframes.keys():
    print(f"  '{symbol}'")

print(f"\nTotal DataFrames created: {len(dataframes)}")


Available symbols in dataframes dictionary:
  'IWM'
  'VIX'
  'VTI'
  'SPY'
  'QQQ'
  'EFA'
  'DIA'

Total DataFrames created: 7


In [11]:
# Access individual DataFrames
# You can now access each DataFrame individually using the symbol name

# Example: Access SPY data
spy_data = dataframes['SPY']
print("SPY DataFrame shape:", spy_data.shape)
print("\nSPY recent data:")
print(spy_data.tail())

# Example: Access QQQ data
qqq_data = dataframes['QQQ']
print(f"\nQQQ DataFrame shape: {qqq_data.shape}")
print(f"QQQ latest close price: ${qqq_data['Close'].iloc[-1]:.2f}")

# Example: Check for missing values
print("\nMissing values check:")
for symbol, df in dataframes.items():
    missing = df.isnull().sum().sum()
    print(f"  {symbol}: {missing} missing values")


SPY DataFrame shape: (6002, 7)

SPY recent data:
                       Open      High      Low     Close      Volume  \
Open time                                                              
2025-07-16 15:00:00  622.17  623.3600  618.050  621.9600  32508618.0   
2025-07-16 16:00:00  621.95  623.1350  621.040  623.1048   8724455.0   
2025-07-16 17:00:00  623.10  623.7900  622.910  623.0857   5144624.0   
2025-07-16 18:00:00  623.09  624.3599  622.720  624.3489   6021480.0   
2025-07-16 19:00:00  624.34  624.7300  623.825  624.1800  11988860.0   

                     Trade count      VWAP  
Open time                                   
2025-07-16 15:00:00       324653  620.6913  
2025-07-16 16:00:00       105999  621.9031  
2025-07-16 17:00:00        71685  623.2815  
2025-07-16 18:00:00        71738  623.4973  
2025-07-16 19:00:00       119627  624.0937  

QQQ DataFrame shape: (6002, 7)
QQQ latest close price: $557.34

Missing values check:
  IWM: 0 missing values
  VIX: 0 missing val