In [1]:
import pandas as pd

In [None]:
from pathlib import Path
filename = Path('raw_data_stock/AAPL_1m_data.csv')
# Fallback to data/ if the raw_data_stock path doesn't exist
if not filename.exists():
    filename = Path('data/AAPL_1m_data.csv')
print(f'Using: {filename}')
# Read CSV (first column is typically the datetime index saved by to_csv)
df = pd.read_csv(filename, index_col=0)
# Ensure we have a single datetime column named 'datetime'
try:
    # If the index contains datetimes, convert and reset to a column
    df.index = pd.to_datetime(df.index)
    df = df.reset_index().rename(columns={'index': 'datetime'})
except Exception:
    # If there is already a datetime-like column, try to parse it
    if 'datetime' in df.columns:
        df['datetime'] = pd.to_datetime(df['datetime'])
    else:
        candidates = [c for c in df.columns if 'date' in c.lower() or 'time' in c.lower() or 'datetime' in c.lower()]
        if candidates:
            df[candidates[0]] = pd.to_datetime(df[candidates[0]])
            df = df.rename(columns={candidates[0]: 'datetime'})
        else:
            raise ValueError('No datetime column found to parse')
# Split datetime into separate date and time columns
df['date'] = df['datetime'].dt.date
df['time'] = df['datetime'].dt.time
# Add ticker column inferred from filename (e.g., AAPL_1m_data.csv -> AAPL)
ticker = filename.stem.split('_')[0]
df['ticker'] = ticker
# Show a sample of the cleaned DataFrame
df.head()

Unnamed: 0_level_0,Close,High,Low,Open,Volume
Price,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Ticker,AAPL,AAPL,AAPL,AAPL,AAPL
Datetime,,,,,
2025-11-07 14:30:00+00:00,268.67498779296875,270.5,268.45001220703125,269.7950134277344,1028645
2025-11-07 14:31:00+00:00,268.30999755859375,269.29998779296875,268.260009765625,268.7099914550781,146747
2025-11-07 14:32:00+00:00,267.4200134277344,268.5299987792969,267.3500061035156,268.2699890136719,213643
...,...,...,...,...,...
2025-11-18 20:55:00+00:00,267.75,267.9049987792969,267.3699951171875,267.6400146484375,185985
2025-11-18 20:56:00+00:00,267.67999267578125,267.79998779296875,267.6199951171875,267.7449951171875,147204
2025-11-18 20:57:00+00:00,267.635009765625,267.760009765625,267.57000732421875,267.67999267578125,190296
2025-11-18 20:58:00+00:00,267.510009765625,267.75,267.3399963378906,267.6400146484375,354181
