In [1]:
import pandas as pd

# Load the data
df = pd.read_csv('data/merged_pe_roe_updated.csv')

# Convert Reported_Date to datetime, coercing errors to NaT
df['Reported_Date'] = pd.to_datetime(df['Reported_Date'], errors='coerce')

# Drop rows where Reported_Date is null (NaT)
df = df.dropna(subset=['Reported_Date'])

# Generate daily date range from 2023-01-01 to 2024-12-31
daily_dates = pd.date_range(start='2023-01-01', end='2024-12-31', freq='D')
daily_df = pd.DataFrame({'Date': daily_dates})

# Get unique tickers
tickers = df['Ticker'].unique()

for ticker in tickers:
    # Filter data for the current ticker and ensure no null Reported_Date
    ticker_df = df[df['Ticker'] == ticker][['Reported_Date', 'PE_Ratio', 'ROE']]
    ticker_df = ticker_df.dropna(subset=['Reported_Date'])  # Double-check for nulls
    
    # If no valid data remains after dropping nulls, skip this ticker
    if ticker_df.empty:
        print(f"No valid data for {ticker} after removing null Reported_Date.")
        continue
    
    # Sort by Reported_Date for merge_asof
    ticker_df = ticker_df.sort_values('Reported_Date')
    
    # Perform the merge with daily dates
    merged_df = pd.merge_asof(
        daily_df.sort_values('Date'),
        ticker_df,
        left_on='Date',
        right_on='Reported_Date',
        direction='backward'
    )
    
    # Select relevant columns
    result_df = merged_df[['Date', 'PE_Ratio', 'ROE']]
    
    # Save the result to a CSV file
    result_df.to_csv(f'data/{ticker}_2023_24.csv', index=False)
    print(f"Saved time series data for {ticker} to data/{ticker}_2023_24.csv")

Saved time series data for AAPL to data/AAPL_2023_24.csv
Saved time series data for MSFT to data/MSFT_2023_24.csv
Saved time series data for NVDA to data/NVDA_2023_24.csv
Saved time series data for GOOGL to data/GOOGL_2023_24.csv
Saved time series data for AMZN to data/AMZN_2023_24.csv
Saved time series data for META to data/META_2023_24.csv
Saved time series data for BRK-B to data/BRK-B_2023_24.csv
Saved time series data for AVGO to data/AVGO_2023_24.csv
Saved time series data for TSLA to data/TSLA_2023_24.csv
Saved time series data for LLY to data/LLY_2023_24.csv
Saved time series data for WMT to data/WMT_2023_24.csv
Saved time series data for JPM to data/JPM_2023_24.csv
Saved time series data for V to data/V_2023_24.csv
Saved time series data for MA to data/MA_2023_24.csv
Saved time series data for XOM to data/XOM_2023_24.csv
Saved time series data for COST to data/COST_2023_24.csv
Saved time series data for UNH to data/UNH_2023_24.csv
Saved time series data for HD to data/HD_2023_2