In [1]:
import pandas as pd

# Load the CSV files
pe_data = pd.read_csv('data/pe_ratios.csv')
roe_data = pd.read_csv('data/roe_data.csv')

# Ensure Date columns are in datetime format for consistent merging
pe_data['Date'] = pd.to_datetime(pe_data['Date'])
roe_data['Date'] = pd.to_datetime(roe_data['Date'])

# Merge the DataFrames on Ticker and Date using an inner join
merged_data = pd.merge(
    pe_data,
    roe_data,
    on=['Ticker', 'Date'],
    how='inner'  # Change to 'left', 'right', or 'outer' if needed
)

# Save the merged DataFrame to a new CSV file
merged_data.to_csv('merged_pe_roe.csv', index=False)
print("Merged data saved to 'merged_pe_roe.csv'")
print(f"Total rows in merged data: {len(merged_data)}")
print(f"Unique tickers in merged data: {merged_data['Ticker'].nunique()}")

# Display the first few rows of the merged data
print("\nSample of merged data:")
print(merged_data.head())

# Check for missing matches
if len(merged_data) == 0:
    print("\nWarning: No matches found. Checking date overlaps...")
    pe_dates = set(pe_data['Date'])
    roe_dates = set(roe_data['Date'])
    common_dates = pe_dates.intersection(roe_dates)
    print(f"Common dates between PE and ROE data: {len(common_dates)}")
    if common_dates:
        print(f"Sample common dates: {list(common_dates)[:5]}")
    else:
        print("No common dates found. Verify date alignment between files.")

Merged data saved to 'merged_pe_roe.csv'
Total rows in merged data: 1808
Unique tickers in merged data: 30

Sample of merged data:
  Ticker       Date  Stock_Price    EPS  PE_Ratio Net_Income  \
0   AAPL 2024-12-31       250.15  $6.30     39.71    $96.15B   
1   AAPL 2024-09-30       232.49  $6.08     38.24    $93.74B   
2   AAPL 2024-06-30       209.92  $6.57     31.95   $101.96B   
3   AAPL 2024-03-31       170.67  $6.43     26.54   $100.39B   
4   AAPL 2023-12-31       191.38  $6.42     29.81   $100.91B   

  Shareholder_Equity      ROE  
0            $66.76B  145.35%  
1            $56.95B  137.87%  
2            $66.71B  147.15%  
3            $74.19B  148.33%  
4            $74.10B  156.04%  
