In [2]:
import pandas as pd

# Load the CSV file
file_path = '/Users/balmeru/Desktop/mediana_with_expected_dates.csv'
df = pd.read_csv(file_path)

# Ensure the expected_date column is in datetime format
df['expected_date'] = pd.to_datetime(df['expected_date'], errors='coerce')

# Drop rows with NaT in expected_date and keep only unique tic values
df.dropna(subset=['expected_date'], inplace=True)
tickers = df['tic'].unique()

# Generate a date range for all Mondays from Jan 1, 1984, to Dec 31, 2012
date_range = pd.date_range(start='1984-01-01', end='2012-12-31', freq='W-MON')

# Create the non_announcer frame initialized with 1s
non_announcer_frame = pd.DataFrame(1, index=date_range, columns=tickers)

# Populate the non_announcer frame: set to 0 for announcement weeks
for _, row in df.iterrows():
    expected_date = row['expected_date']
    # Find the Monday of the week for the expected_date
    week_start = expected_date - pd.Timedelta(days=expected_date.weekday())
    if week_start in non_announcer_frame.index:
        non_announcer_frame.at[week_start, row['tic']] = 0  # Set to 0 for the announcement week

# Drop columns that contain only 1s (tickers with no announcements)
non_announcer_frame = non_announcer_frame.loc[:, (non_announcer_frame != 1).any(axis=0)]

# Save the non_announcer frame to a new CSV file
output_file_path = '/Users/balmeru/Desktop/non_announcer_frame.csv'
non_announcer_frame.to_csv(output_file_path)

print(f"Non-announcer frame has been created and saved to {output_file_path}.")

Non-announcer frame has been created and saved to /Users/balmeru/Desktop/non_announcer_frame.csv.


In [3]:
output_file_path = '/Users/balmeru/Desktop/non_announcer_frame.csv'

# Load the CSV file into a DataFrame
df = pd.read_csv(output_file_path)
num_rows, num_columns = df.shape

# Display the result
print(f"The CSV file has {num_rows} rows and {num_columns} columns.")


The CSV file has 1514 rows and 15035 columns.
