In [None]:
# Import necessary libraries
import pandas as pd
import os
from pathlib import Path

# Set the data directory path - using absolute path
data_dir = Path('/Users/tiamathur/Documents/GitHub/data')

# List all CSV files in the data directory
csv_files = list(data_dir.glob('*.csv'))
print(f"Found {len(csv_files)} CSV files:")
for file in csv_files:
    print(f"  - {file.name}")


Found 0 CSV files:


In [2]:
# Create separate DataFrames for each CSV file
dataframes = {}

for csv_file in csv_files:
    # Extract the symbol name from the filename (remove '_hourly_ohlcv.csv')
    symbol = csv_file.stem.replace('_hourly_ohlcv', '')
    
    # Read the CSV file
    df = pd.read_csv(csv_file)
    
    # Parse the 'Open time' column as datetime
    df['Open time'] = pd.to_datetime(df['Open time'])
    
    # Set the 'Open time' as the index
    df.set_index('Open time', inplace=True)
    
    # Store the DataFrame in the dictionary
    dataframes[symbol] = df
    
    print(f"Loaded {symbol}: {len(df)} rows, {len(df.columns)} columns")
    print(f"  Date range: {df.index.min()} to {df.index.max()}")
    print(f"  Columns: {list(df.columns)}")
    print()


In [3]:
# Display basic information about each DataFrame
print("DataFrame Summary:")
print("=" * 50)

for symbol, df in dataframes.items():
    print(f"\n{symbol} DataFrame:")
    print(f"  Shape: {df.shape}")
    print(f"  Memory usage: {df.memory_usage(deep=True).sum() / 1024**2:.2f} MB")
    print(f"  Data types:")
    for col, dtype in df.dtypes.items():
        print(f"    {col}: {dtype}")
    print(f"  First few rows:")
    print(df.head(3))
    print("-" * 30)


DataFrame Summary:


In [6]:
# Check what symbols are available in the dataframes dictionary
print("Available symbols in dataframes dictionary:")
for symbol in dataframes.keys():
    print(f"  '{symbol}'")

print(f"\nTotal DataFrames created: {len(dataframes)}")


Available symbols in dataframes dictionary:

Total DataFrames created: 0


In [5]:
# Access individual DataFrames
# You can now access each DataFrame individually using the symbol name

# Example: Access SPY data
spy_data = dataframes['SPY']
print("SPY DataFrame shape:", spy_data.shape)
print("\nSPY recent data:")
print(spy_data.tail())

# Example: Access QQQ data
qqq_data = dataframes['QQQ']
print(f"\nQQQ DataFrame shape: {qqq_data.shape}")
print(f"QQQ latest close price: ${qqq_data['Close'].iloc[-1]:.2f}")

# Example: Check for missing values
print("\nMissing values check:")
for symbol, df in dataframes.items():
    missing = df.isnull().sum().sum()
    print(f"  {symbol}: {missing} missing values")


KeyError: 'SPY'