In [1]:
import pandas as pd
import numpy as np
from datetime import timedelta

# Load data
csv_file_path = "/Users/balmeru/Downloads/QQQQ.csv"
df = pd.read_csv(csv_file_path)
df['rdq'] = pd.to_datetime(df['rdq'], errors='coerce')

df['txdbq'] = df['txdbq'].fillna(0)
df['dvpq'] = df['dvpq'].fillna(0)
df['income_good'] = ~df['ibq'].isna()

# 1
df['se1_good'] = ~df['teqq'].isna()

# 2
df['se2_good'] = ~df['ceqq'].isna() & ~df['pstkq'].isna()

# 3
df['se3_good'] = ~df['atq'].isna() & ~df['ltq'].isna()

# market cap 
df['market_cap'] = df['prccq'] / df['ajexq'] * df['cshoq']

# 

total_rows = len(df)
income_good_count = df['income_good'].sum()
se1_good_count = df['se1_good'].sum()
se2_good_count = df['se2_good'].sum()
se3_good_count = df['se3_good'].sum()
any_se_good_count = (df['se1_good'] | df['se2_good'] | df['se3_good']).sum()
se_and_income_good_count = ((df['se1_good'] | df['se2_good'] | df['se3_good']) & df['income_good']).sum()

num_unique_tickers = df['tic'].nunique()

print(f"\nOverall number of unique tickers: {num_unique_tickers}")


print(f"Total number of rows: {total_rows}")
print(f"Number of rows with income_good = True: {income_good_count}")
print(f"Number of rows with se1_good = True: {se1_good_count}")
print(f"Number of rows with se2_good = True: {se2_good_count}")
print(f"Number of rows with se3_good = True: {se3_good_count}")

print(f"Number of rows where any of se1_good, se2_good, or se3_good is True: {any_se_good_count}")

print(f"Number of rows where income and any se is True: {se_and_income_good_count}")



Overall number of unique tickers: 25157
Total number of rows: 1085915
Number of rows with income_good = True: 988555
Number of rows with se1_good = True: 292366
Number of rows with se2_good = True: 969679
Number of rows with se3_good = True: 972202
Number of rows where any of se1_good, se2_good, or se3_good is True: 976529
Number of rows where income and any se is True: 973632


In [35]:

target_year = 1984
reference_year = 1983
expected_dates = []
for tic in df['tic'].unique():
    tic_data = df[
        (df['tic'] == tic) & 
        (df['fyearq'] == reference_year)
    ]
    
    for quarter in range(1, 5):
        quarter_data = tic_data[
            (tic_data['fqtr'] == quarter)
        ]
        
        if not quarter_data.empty:
            reference_date = quarter_data['rdq'].iloc[0]
            expected_date = reference_date + timedelta(weeks=52)
        else:
            expected_date = np.nan  
        
        expected_dates.append({
            'quarter': f"1984Q{quarter}",  
            'tic': tic,                   
            'expected_announcement_date': expected_date  
        })

expected_dates_df = pd.DataFrame(expected_dates)
expected_dates_df['quarter'] = pd.Categorical(expected_dates_df['quarter'], categories=['1984Q1', '1984Q2', '1984Q3', '1984Q4'], ordered=True)

pivot_table = expected_dates_df.pivot_table(
    index='quarter',   
    columns='tic',    
    values='expected_announcement_date'  
)


pivot_table_cleaned = pivot_table.dropna(axis=1, how='all')
print("Pivot Table corrected for both early and late reporting:")
print(pivot_table_cleaned)


pivot_table_cleaned.to_csv("/Users/balmeru/Downloads/expected_dates_1984.csv", index=True)


Pivot Table corrected for both early and late reporting:
tic          0015B      0033A      0131A      0141A      0170A      0173A  \
quarter                                                                     
1984Q1         NaT 1984-04-23 1984-04-25 1984-04-20        NaT 1984-08-10   
1984Q2         NaT 1984-07-25 1984-08-14 1984-07-24        NaT 1984-11-14   
1984Q3         NaT 1984-10-29 1984-11-05 1984-10-23 1984-11-07 1985-02-15   
1984Q4  1985-01-14 1985-02-19 1985-02-15 1985-01-29 1985-04-03 1985-06-06   

tic          0223B      0431B      0571B      0575B  ...       ZGCO  \
quarter                                              ...              
1984Q1  1984-04-23 1984-06-01 1984-04-30 1983-11-14  ... 1984-04-16   
1984Q2  1984-08-01 1984-08-31 1984-07-25 1984-02-03  ... 1984-07-13   
1984Q3  1984-10-24 1984-11-15 1984-10-26 1984-05-04  ... 1984-10-19   
1984Q4  1985-01-30 1985-03-07 1985-03-13 1984-08-29  ... 1985-01-16   

tic           ZIAD      ZIM.1       ZION        ZLC  

  pivot_table = expected_dates_df.pivot_table(


In [36]:
#Above calculation is with no accounting for earnings, se check nor fyr change!