### Extracting 2024 Earnings Call Transcripts for Trading Simulation

To prepare data for trading simulations, this script scans each stock folder for `earnings_transcripts_2020_2024.json` and extracts only the **2024 earnings call transcripts**. It filters relevant entries based on the date and consolidates the management speaker content into a single field. The results are saved in a new file named `2024_trading.csv`, containing the **stock symbol**, **earnings call date**, and the **formatted transcript** for each 2024 call. This structured output supports downstream applications such as real-time prediction or backtesting.


In [12]:
import os
import json
import csv


def process_json_file(json_path, folder_name):
    """Extract relevant earnings call data from JSON file."""
    data_2024 = []
    with open(json_path, "r", encoding="utf-8") as file:
        try:
            data = json.load(file)
            for entry in data:
                earnings_date = entry.get("date", "").split(" ")[0]
                if earnings_date.startswith("2024"):
                    transcript = {
                        key: value
                        for key, value in entry.items()
                        if key.startswith("speaker")
                    }
                    data_2024.append(
                        [
                            folder_name,
                            earnings_date,
                            json.dumps(transcript, ensure_ascii=False),
                        ]
                    )
        except json.JSONDecodeError:
            print(f"Error decoding JSON file: {json_path}")
    return data_2024


def create_csv(folder_path, data):
    """Create CSV file with extracted data."""
    csv_path = os.path.join(folder_path, "2024_trading.csv")
    with open(csv_path, "w", newline="", encoding="utf-8") as file:
        writer = csv.writer(file)
        writer.writerow(["Stock", "Earnings_Call_Date", "Earnings_Call_Transcript"])
        writer.writerows(data)


def main():
    """Main function to iterate over directories and process JSON files."""
    current_directory = os.getcwd()
    for folder in os.listdir(current_directory):
        folder_path = os.path.join(current_directory, folder)
        if os.path.isdir(folder_path):
            json_path = os.path.join(folder_path, "earnings_transcripts_2020_2024.json")
            if os.path.exists(json_path):
                data = process_json_file(json_path, folder)
                if data:
                    create_csv(folder_path, data)
                    print(
                        f"CSV file created: {os.path.join(folder_path, '2024_trading.csv')}"
                    )
                else:
                    print(f"No relevant 2024 data found in {json_path}")


if __name__ == "__main__":
    main()


CSV file created: /Users/gary/Library/CloudStorage/OneDrive-UniversityofNottinghamMalaysia/FYP/Project/CTAS/2024_trading.csv
CSV file created: /Users/gary/Library/CloudStorage/OneDrive-UniversityofNottinghamMalaysia/FYP/Project/WELL/2024_trading.csv
CSV file created: /Users/gary/Library/CloudStorage/OneDrive-UniversityofNottinghamMalaysia/FYP/Project/VZ/2024_trading.csv
CSV file created: /Users/gary/Library/CloudStorage/OneDrive-UniversityofNottinghamMalaysia/FYP/Project/AMZN/2024_trading.csv
CSV file created: /Users/gary/Library/CloudStorage/OneDrive-UniversityofNottinghamMalaysia/FYP/Project/CNP/2024_trading.csv
CSV file created: /Users/gary/Library/CloudStorage/OneDrive-UniversityofNottinghamMalaysia/FYP/Project/RCL/2024_trading.csv
CSV file created: /Users/gary/Library/CloudStorage/OneDrive-UniversityofNottinghamMalaysia/FYP/Project/CAT/2024_trading.csv
CSV file created: /Users/gary/Library/CloudStorage/OneDrive-UniversityofNottinghamMalaysia/FYP/Project/TFC/2024_trading.csv
CSV fi

### Merging 2024 Earnings Call Transcripts with Financial Context

To prepare for real-time prediction or trading simulation, this script compiles **2024 earnings call transcripts** with corresponding **earnings surprises**, **estimated earnings**, and **market capitalization**. It searches each stock folder for the required JSON files, then finds the closest earnings surprise date to each transcript within 2024 to ensure accurate alignment. The output is saved as `2024_trading.csv` and contains the **stock symbol**, **call date**, **management transcript**, and supporting financial metrics for each earnings event.


In [13]:
import os
import json
import csv
from datetime import datetime


def load_json_data(json_path, key_name):
    """Load JSON data and store it by date."""
    data_dict = {}
    if os.path.exists(json_path):
        with open(json_path, "r", encoding="utf-8") as file:
            try:
                data = json.load(file)
                for entry in data:
                    data_dict[entry["date"]] = entry.get(key_name)
            except json.JSONDecodeError:
                print(f"Error decoding JSON file: {json_path}")
    return data_dict


def find_nearest_earnings(earnings_surprises, estimated_earnings, target_date):
    """Find the nearest earnings surprise date to the given earnings call date."""
    target_date = datetime.strptime(target_date, "%Y-%m-%d")
    nearest_date = None
    min_diff = float("inf")
    for date in earnings_surprises.keys():
        current_date = datetime.strptime(date, "%Y-%m-%d")
        diff = abs((current_date - target_date).days)
        if diff < min_diff:
            min_diff = diff
            nearest_date = date
    return (
        earnings_surprises.get(nearest_date, None),
        estimated_earnings.get(nearest_date, None),
    )


def process_json_file(json_path, surprises_path, market_cap_path, folder_name):
    """Extract relevant earnings call data from JSON file and match earnings surprises and market cap."""
    data_2024 = []
    earnings_surprises = load_json_data(surprises_path, "actualEarningResult")
    estimated_earnings = load_json_data(surprises_path, "estimatedEarning")
    market_caps = load_json_data(market_cap_path, "marketCap")

    with open(json_path, "r", encoding="utf-8") as file:
        try:
            data = json.load(file)
            for entry in data:
                earnings_date = entry.get("date", "").split(" ")[0]
                if earnings_date.startswith("2024"):
                    transcript = {
                        key: value
                        for key, value in entry.items()
                        if key.startswith("speaker")
                    }
                    actual_earning, estimated_earning = find_nearest_earnings(
                        earnings_surprises, estimated_earnings, earnings_date
                    )
                    market_cap = market_caps.get(earnings_date, None)
                    data_2024.append(
                        [
                            folder_name,
                            earnings_date,
                            json.dumps(transcript, ensure_ascii=False),
                            actual_earning,
                            estimated_earning,
                            market_cap,
                        ]
                    )
        except json.JSONDecodeError:
            print(f"Error decoding JSON file: {json_path}")
    return data_2024


def create_csv(folder_path, data):
    """Create CSV file with extracted data."""
    csv_path = os.path.join(folder_path, "2024_trading.csv")
    with open(csv_path, "w", newline="", encoding="utf-8") as file:
        writer = csv.writer(file)
        writer.writerow(
            [
                "Stock",
                "Earnings_Call_Date",
                "Earnings_Call_Transcript",
                "Actual_Earning",
                "Estimated_Earning",
                "Market_Cap",
            ]
        )
        writer.writerows(data)


def main():
    """Main function to iterate over directories and process JSON files."""
    current_directory = os.getcwd()
    for folder in os.listdir(current_directory):
        folder_path = os.path.join(current_directory, folder)
        if os.path.isdir(folder_path):
            json_path = os.path.join(folder_path, "earnings_transcripts_2020_2024.json")
            surprises_path = os.path.join(folder_path, "earnings_surprises.json")
            market_cap_path = os.path.join(
                folder_path, "historical_market_cap_2020_2024.json"
            )

            if (
                os.path.exists(json_path)
                and os.path.exists(surprises_path)
                and os.path.exists(market_cap_path)
            ):
                data = process_json_file(
                    json_path, surprises_path, market_cap_path, folder
                )
                if data:
                    create_csv(folder_path, data)
                    print(
                        f"CSV file created: {os.path.join(folder_path, '2024_trading.csv')}"
                    )
                else:
                    print(f"No relevant 2024 data found in {json_path}")


if __name__ == "__main__":
    main()


CSV file created: /Users/gary/Library/CloudStorage/OneDrive-UniversityofNottinghamMalaysia/FYP/Project/CTAS/2024_trading.csv
CSV file created: /Users/gary/Library/CloudStorage/OneDrive-UniversityofNottinghamMalaysia/FYP/Project/WELL/2024_trading.csv
CSV file created: /Users/gary/Library/CloudStorage/OneDrive-UniversityofNottinghamMalaysia/FYP/Project/VZ/2024_trading.csv
CSV file created: /Users/gary/Library/CloudStorage/OneDrive-UniversityofNottinghamMalaysia/FYP/Project/AMZN/2024_trading.csv
CSV file created: /Users/gary/Library/CloudStorage/OneDrive-UniversityofNottinghamMalaysia/FYP/Project/CNP/2024_trading.csv
CSV file created: /Users/gary/Library/CloudStorage/OneDrive-UniversityofNottinghamMalaysia/FYP/Project/RCL/2024_trading.csv
CSV file created: /Users/gary/Library/CloudStorage/OneDrive-UniversityofNottinghamMalaysia/FYP/Project/CAT/2024_trading.csv
CSV file created: /Users/gary/Library/CloudStorage/OneDrive-UniversityofNottinghamMalaysia/FYP/Project/TFC/2024_trading.csv
CSV fi

### Merging All 2024 Trading CSVs into a Unified Dataset

To consolidate earnings call data for trading simulations, this script scans all subdirectories for `2024_trading.csv` files, reads them into DataFrames, and merges them into a single dataset. The resulting data is sorted by **Earnings_Call_Date** and saved as a unified `2024_trading.csv` in the root directory. This consolidated file enables streamlined analysis, modeling, or real-time decision-making based on the full set of 2024 earnings events.


In [14]:
import pandas as pd

# Define the filename to search for
file_name = "2024_trading.csv"
merged_file = "2024_trading.csv"

# Get the current working directory
current_dir = os.getcwd()

# List to store dataframes
dataframes = []

# Traverse through all folders in the current directory
for folder in os.listdir(current_dir):
    folder_path = os.path.join(current_dir, folder)

    # Check if it's a directory
    if os.path.isdir(folder_path):
        file_path = os.path.join(folder_path, file_name)

        # Check if the file exists in the directory
        if os.path.exists(file_path):
            print(f"Found: {file_path}")
            # Read the CSV file and append to the list
            df = pd.read_csv(file_path)
            dataframes.append(df)

# Merge all dataframes
if dataframes:
    merged_df = pd.concat(dataframes, ignore_index=True)

    # Ensure "Earnings_Call_Date" is in datetime format
    merged_df["Earnings_Call_Date"] = pd.to_datetime(
        merged_df["Earnings_Call_Date"], errors="coerce"
    )

    # Sort the dataframe based on "Earnings_Call_Date" in ascending order
    merged_df = merged_df.sort_values(by="Earnings_Call_Date", ascending=True)

    # Save the merged dataframe to a CSV file in the current directory
    merged_df.to_csv(merged_file, index=False)
    print(f"Merged CSV saved as: {merged_file}")
else:
    print("No matching files found.")


Found: /Users/gary/Library/CloudStorage/OneDrive-UniversityofNottinghamMalaysia/FYP/Project/CTAS/2024_trading.csv
Found: /Users/gary/Library/CloudStorage/OneDrive-UniversityofNottinghamMalaysia/FYP/Project/WELL/2024_trading.csv
Found: /Users/gary/Library/CloudStorage/OneDrive-UniversityofNottinghamMalaysia/FYP/Project/VZ/2024_trading.csv
Found: /Users/gary/Library/CloudStorage/OneDrive-UniversityofNottinghamMalaysia/FYP/Project/AMZN/2024_trading.csv
Found: /Users/gary/Library/CloudStorage/OneDrive-UniversityofNottinghamMalaysia/FYP/Project/CNP/2024_trading.csv
Found: /Users/gary/Library/CloudStorage/OneDrive-UniversityofNottinghamMalaysia/FYP/Project/RCL/2024_trading.csv
Found: /Users/gary/Library/CloudStorage/OneDrive-UniversityofNottinghamMalaysia/FYP/Project/CAT/2024_trading.csv
Found: /Users/gary/Library/CloudStorage/OneDrive-UniversityofNottinghamMalaysia/FYP/Project/TFC/2024_trading.csv
Found: /Users/gary/Library/CloudStorage/OneDrive-UniversityofNottinghamMalaysia/FYP/Project/AA