In [34]:
import pandas as pd
import numpy as np
from datetime import timedelta

# Load data
csv_file_path = "/Users/balmeru/Downloads/ALLLQ.csv"
df = pd.read_csv(csv_file_path)
df['rdq'] = pd.to_datetime(df['rdq'], errors='coerce')

df['txdbq'] = df['txdbq'].fillna(0)
df['dvpq'] = df['dvpq'].fillna(0)
df['income_good'] = ~df['ibq'].isna()

# 1
df['se1_good'] = ~df['teqq'].isna()

# 2
df['se2_good'] = ~df['ceqq'].isna() & ~df['pstkq'].isna()

# 3
df['se3_good'] = ~df['atq'].isna() & ~df['ltq'].isna()

# 4
df['se4_good'] = ~df['lseq'].isna() & ~df['ltq'].isna()

# market cap 
df['market_cap'] = df['prccq'] / df['ajexq'] * df['tstknq']

# 
fyr_changes = df.groupby('tic')['fyr'].nunique()
tickers_with_changes = fyr_changes[fyr_changes > 1].index
df['fyr_change'] = df['tic'].isin(tickers_with_changes)

total_rows = len(df)
income_good_count = df['income_good'].sum()
txdbq_nan_count = df['txdbq'].isna().sum()
dvpq_nan_count = df['dvpq'].isna().sum()
se1_good_count = df['se1_good'].sum()
se2_good_count = df['se2_good'].sum()
se3_good_count = df['se3_good'].sum()
se4_good_count = df['se4_good'].sum()
market_cap_count = df['market_cap'].notna().sum()
market_val_count = df['mkvaltq'].notna().sum()

# Count non-NaN values in 'mkvaltq' or 'market_cap'
non_na_market_cap = (df['mkvaltq'].notna() | df['market_cap'].notna()).sum()

any_se_good_count = (df['se1_good'] | df['se2_good'] | df['se3_good']).sum()
fyr_change_count = df['fyr_change'].sum()
se_and_income_good_count = ((df['se1_good'] | df['se2_good'] | df['se3_good']) & df['income_good']).sum()
count_fyr_change_and_non_na = df[df['fyr_change'] & (df['mkvaltq'].notna() | df['market_cap'].notna())].shape[0]

fyr_distribution = fyr_changes.value_counts().sort_index()
num_unique_tickers = df['tic'].nunique()
print("Frequency distribution of fiscal year changes:")
print(fyr_distribution)
print(f"\nOverall number of unique tickers: {num_unique_tickers}")


print(f"Total number of rows: {total_rows}")
print(f"Number of rows with income_good = True: {income_good_count}")
print(f"Number of NaNs in txdbq, Double Checking: {txdbq_nan_count}")
print(f"Number of NaNs in dvpq, Double Checking: {dvpq_nan_count}")
print(f"Number of rows with se1_good = True: {se1_good_count}")
print(f"Number of rows with se2_good = True: {se2_good_count}")
print(f"Number of rows with se3_good = True: {se3_good_count}")
print(f"Number of rows with non NA market_cap: {market_cap_count}")
print(f"Number of rows with non NA mkvaltq: {market_val_count}")
print(f"Number of rows where 'mkvaltq' or 'market_cap' are not NaN: {non_na_market_cap}")


print(f"Number of rows where any of se1_good, se2_good, or se3_good is True: {any_se_good_count}")

print(f"Number of rows where fyr_change is True: {fyr_change_count}")
print(f"Number of rows where income and any se is True: {se_and_income_good_count}")
print(f"Number of rows where 'fyr_change' is True and either 'mkvaltq' or 'market_cap' are not NaN: {count_fyr_change_and_non_na}")

filtered_df = df[df['mkvaltq'].notna() | df['market_cap'].notna()]
unique_tickers_with_non_na = filtered_df['tic'].unique()
count_unique_tickers_with_non_na = len(unique_tickers_with_non_na)
print(f"Number of unique tickers with at least one non-NaN value in 'mkvaltq' or 'market_cap': {count_unique_tickers_with_non_na}")

filtered_df = df[df['mkvaltq'].notna() | df['market_cap'].notna()]

# Get the unique tickers from the filtered DataFrame
unique_tickers_with_non_na = filtered_df['tic'].unique()
# Count the number of unique tickers
count_unique_tickers_with_non_na = len(unique_tickers_with_non_na)

# Print the count of unique tickers
print(f"Number of unique tickers with at least one non-NaN value in 'mkvaltq' or 'market_cap': {count_unique_tickers_with_non_na}")
unique_tickers_with_fyr_change = df[df['fyr_change']]['tic'].unique()
count_unique_tickers_with_fyr_change = len(unique_tickers_with_fyr_change)

# 2. Count of unique tickers with fiscal year change and non-NaN 'mkvaltq' or 'market_cap'
unique_tickers_with_fyr_change_and_non_na = np.intersect1d(unique_tickers_with_fyr_change, filtered_df['tic'].unique())
count_unique_tickers_with_fyr_change_and_non_na = len(unique_tickers_with_fyr_change_and_non_na)

# Print the results
print(f"Number of unique tickers that experienced a fiscal year change: {count_unique_tickers_with_fyr_change}")
print(f"Number of unique tickers with fiscal year change and non-NaN 'mkvaltq' or 'market_cap': {count_unique_tickers_with_fyr_change_and_non_na}")


Frequency distribution of fiscal year changes:
fyr
1    22053
2     1846
3       83
4        3
Name: count, dtype: int64

Overall number of unique tickers: 23985
Total number of rows: 1021115
Number of rows with income_good = True: 929843
Number of NaNs in txdbq, Double Checking: 0
Number of NaNs in dvpq, Double Checking: 0
Number of rows with se1_good = True: 233731
Number of rows with se2_good = True: 911242
Number of rows with se3_good = True: 913570
Number of rows with non NA market_cap: 438191
Number of rows with non NA mkvaltq: 233231
Number of rows where 'mkvaltq' or 'market_cap' are not NaN: 446394
Number of rows where any of se1_good, se2_good, or se3_good is True: 917854
Number of rows where fyr_change is True: 128412
Number of rows where income and any se is True: 915045
Number of rows where 'fyr_change' is True and either 'mkvaltq' or 'market_cap' are not NaN: 49218
Number of unique tickers with at least one non-NaN value in 'mkvaltq' or 'market_cap': 14495
Number of unique

In [35]:

target_year = 1984
reference_year = 1983
expected_dates = []
for tic in df['tic'].unique():
    tic_data = df[
        (df['tic'] == tic) & 
        (df['fyearq'] == reference_year)
    ]
    
    for quarter in range(1, 5):
        quarter_data = tic_data[
            (tic_data['fqtr'] == quarter)
        ]
        
        if not quarter_data.empty:
            reference_date = quarter_data['rdq'].iloc[0]
            expected_date = reference_date + timedelta(weeks=52)
        else:
            expected_date = np.nan  
        
        expected_dates.append({
            'quarter': f"1984Q{quarter}",  
            'tic': tic,                   
            'expected_announcement_date': expected_date  
        })

expected_dates_df = pd.DataFrame(expected_dates)
expected_dates_df['quarter'] = pd.Categorical(expected_dates_df['quarter'], categories=['1984Q1', '1984Q2', '1984Q3', '1984Q4'], ordered=True)

pivot_table = expected_dates_df.pivot_table(
    index='quarter',   
    columns='tic',    
    values='expected_announcement_date'  
)


pivot_table_cleaned = pivot_table.dropna(axis=1, how='all')
print("Pivot Table corrected for both early and late reporting:")
print(pivot_table_cleaned)


pivot_table_cleaned.to_csv("/Users/balmeru/Downloads/expected_dates_1984.csv", index=True)

all_expected_dates = pivot_table.stack()
min_expected_date = all_expected_dates.min()
max_expected_date = all_expected_dates.max()

print("Range of Expected Announcement Dates after correction:")
print(f"Minimum date: {min_expected_date}")
print(f"Maximum date: {max_expected_date}")

Pivot Table corrected for both early and late reporting:
tic          0015B      0033A      0131A      0141A      0170A      0173A  \
quarter                                                                     
1984Q1         NaT 1984-04-23 1984-04-25 1984-04-20        NaT 1984-08-10   
1984Q2         NaT 1984-07-25 1984-08-14 1984-07-24        NaT 1984-11-14   
1984Q3         NaT 1984-10-29 1984-11-05 1984-10-23 1984-11-07 1985-02-15   
1984Q4  1985-01-14 1985-02-19 1985-02-15 1985-01-29 1985-04-03 1985-06-06   

tic          0223B      0431B      0571B      0575B  ...       ZGCO  \
quarter                                              ...              
1984Q1  1984-04-23 1984-06-01 1984-04-30 1983-11-14  ... 1984-04-16   
1984Q2  1984-08-01 1984-08-31 1984-07-25 1984-02-03  ... 1984-07-13   
1984Q3  1984-10-24 1984-11-15 1984-10-26 1984-05-04  ... 1984-10-19   
1984Q4  1985-01-30 1985-03-07 1985-03-13 1984-08-29  ... 1985-01-16   

tic           ZIAD      ZIM.1       ZION        ZLC  

  pivot_table = expected_dates_df.pivot_table(


In [36]:
#Above calculation is with no accounting for earnings, se check nor fyr change!