In [7]:
import pandas as pd
import numpy as np

# Sample data
data = {
    'GVKEY': [164277, 164278, 164279, 164280, 164281, 164282, 164283, 164284, 164285, 164286, 164287, 164288, 164289],
    'datadate': ['12/31/79', '3/31/80', '6/30/80', '9/30/80', '12/31/80', '3/31/81', '6/30/81', '9/30/81', '12/31/81', '12/31/88', '3/31/89', '6/30/89', '9/30/89'],
    'fyearq': [1980, 1980, 1980, 1981, 1981, 1981, 1981, 1982, 1982, 1988, 1989, 1989, 1989],
    'fqtr': [2, 3, 4, 1, 2, 3, 4, 1, 2, 4, 1, 2, 3],
    'tic': ['BEC'] * 13,
    'datafqtr': ['1980Q2', '1980Q3', '1980Q4', '1981Q1', '1981Q2', '1981Q3', '1981Q4', '1982Q1', '1982Q2', '1988Q4', '1989Q1', '1989Q2', '1989Q3'],
    'rdq': ['1/17/80', '4/17/80', '8/19/80', '10/16/80', '1/16/81', '4/16/81', '8/25/81', '10/16/81', '1/19/82', np.nan, '4/25/89', '7/24/89', '10/24/89'],
    'fyr_change_dummy': [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0],  # Corrected fyr_change_dummy
    'median_distance': [0] * 13,
    'quarterly_median': [np.nan] * 13,
    'annual_median': [np.nan] * 13,
    'income_good': [True] * 13,
    'se1_good': [False] * 13,
    'se2_good': [True] * 13,
    'se3_good': [True] * 13
}


# Create DataFrame
df = pd.DataFrame(data)
# Convert datadate and rdq to datetime
df['datadate'] = pd.to_datetime(df['datadate'], errors='coerce')
df['rdq'] = pd.to_datetime(df['rdq'], errors='coerce')

# Initialize the count of rows with no previous reports
total_no_previous_reports_count = 0

# Function to calculate the difference in quarters between two fiscal periods
# Function to calculate the difference in quarters between two datafqtr values
def quarter_diff(datafqtr1, datafqtr2):
    year1, quarter1 = datafqtr1.split('Q')
    year2, quarter2 = datafqtr2.split('Q')
    quarter_diff = (int(year1) - int(year2)) * 4 + (int(quarter1) - int(quarter2))
    return quarter_diff

# Iterate over each group by 'tic'
for tic, group in df.groupby('tic'):
    group = group.sort_values(by=['datadate'])  # Sort by 'datadate' to ensure chronological order
    
    # Iterate over each row in the group
    for idx, row in group.iterrows():
        if row['fyr_change_dummy'] == 1 and row['income_good'] and (row['se1_good'] or row['se2_good'] or row['se3_good']):
            # Find the most recent previous report with rdq
            previous_rows = group[(group['rdq'] < row['rdq']) & (group['rdq'].notna())].sort_values(by='rdq')
            
            if previous_rows.empty:
                total_no_previous_reports_count += 1
                # Print the row where no previous report with 'rdq' is found
                print("Row index:", idx)
                print(row)
                print()
                print("=" * 50)

print(f"Total number of rows where no previous report with 'rdq' is found for tickers: {total_no_previous_reports_count}")


Row index: 9
GVKEY                            164286
datadate            1988-12-31 00:00:00
fyearq                             1988
fqtr                                  4
tic                                 BEC
datafqtr                         1988Q4
rdq                                 NaT
fyr_change_dummy                      1
median_distance                       0
quarterly_median                    NaN
annual_median                       NaN
income_good                        True
se1_good                          False
se2_good                           True
se3_good                           True
Name: 9, dtype: object

Total number of rows where no previous report with 'rdq' is found for tickers: 1


  df['datadate'] = pd.to_datetime(df['datadate'], errors='coerce')
  df['rdq'] = pd.to_datetime(df['rdq'], errors='coerce')


In [1]:
import pandas as pd
import numpy as np

# Sample data
data = {
    'GVKEY': [164277, 164278, 164279, 164280, 164281, 164282, 164283, 164284, 164285, 164286, 164287, 164288, 164289],
    'datadate': ['12/31/79', '3/31/80', '6/30/80', '9/30/80', '12/31/80', '3/31/81', '6/30/81', '9/30/81', '12/31/81', '12/31/88', '3/31/89', '6/30/89', '9/30/89'],
    'fyearq': [1980, 1980, 1980, 1981, 1981, 1981, 1981, 1982, 1982, 1988, 1989, 1989, 1989],
    'fqtr': [2, 3, 4, 1, 2, 3, 4, 1, 2, 4, 1, 2, 3],
    'tic': ['BEC'] * 13,
    'datafqtr': ['1980Q2', '1980Q3', '1980Q4', '1981Q1', '1981Q2', '1981Q3', '1981Q4', '1982Q1', '1982Q2', '1988Q4', '1989Q1', '1989Q2', '1989Q3'],
    'rdq': ['1/17/80', '4/17/80', '8/19/80', '10/16/80', '1/16/81', '4/16/81', '8/25/81', '10/16/81', '1/19/82', np.nan, '4/25/89', '7/24/89', '10/24/89'],
    'fyr_change_dummy': [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0],  # Corrected fyr_change_dummy
    'median_distance': [0] * 13,
    'quarterly_median': [np.nan] * 13,
    'annual_median': [np.nan] * 13,
    'income_good': [True] * 13,
    'se1_good': [False] * 13,
    'se2_good': [True] * 13,
    'se3_good': [True] * 13
}

# Create DataFrame
df = pd.DataFrame(data)

# Convert datadate and rdq to datetime
df['datadate'] = pd.to_datetime(df['datadate'], errors='coerce')
df['rdq'] = pd.to_datetime(df['rdq'], errors='coerce')

# Function to calculate the difference in quarters between two fiscal periods
def quarter_diff(fyearq1, fqtr1, fyearq2, fqtr2):
    return (fyearq1 - fyearq2) * 4 + (fqtr1 - fqtr2)

# Initialize the count of rows with no previous reports
total_no_previous_reports_count = 0

# Iterate over each group by 'tic'
for tic, group in df.groupby('tic'):
    # Sort the group DataFrame by datafqtr in descending order
    group = group.sort_values(by='datafqtr', ascending=False)
    
    # Iterate over each row in the group
    for idx, row in group.iterrows():
        if row['fyr_change_dummy'] == 1 and row['income_good'] and (row['se1_good'] or row['se2_good'] or row['se3_good']):
            # Iterate over the sorted DataFrame
            for _, sorted_row in group.iterrows():
                if not pd.isnull(sorted_row['rdq']):
                    # Found the most recent datafqtr with non-empty rdq
                    most_recent_datafqtr = sorted_row['datafqtr']
                    most_recent_rdq = sorted_row['rdq']
                    break
            
            # Calculate the difference in quarters between most recent datafqtr and current row's datafqtr
            if most_recent_datafqtr:
                most_recent_year, most_recent_quarter = most_recent_datafqtr.split('Q')
                current_year, current_quarter = row['datafqtr'].split('Q')
                quarter_diff_val = quarter_diff(int(current_year), int(current_quarter), int(most_recent_year), int(most_recent_quarter))
                print("Row index:", idx)
                print(row)
                print("Most recent datafqtr with non-empty rdq:", most_recent_datafqtr)
                print("Most recent rdq:", most_recent_rdq)
                print("Difference in quarters:", quarter_diff_val)
                print("=" * 50)


Row index: 9
GVKEY                            164286
datadate            1988-12-31 00:00:00
fyearq                             1988
fqtr                                  4
tic                                 BEC
datafqtr                         1988Q4
rdq                                 NaT
fyr_change_dummy                      1
median_distance                       0
quarterly_median                    NaN
annual_median                       NaN
income_good                        True
se1_good                          False
se2_good                           True
se3_good                           True
Name: 9, dtype: object
Most recent datafqtr with non-empty rdq: 1989Q3
Most recent rdq: 1989-10-24 00:00:00
Difference in quarters: -3


  df['datadate'] = pd.to_datetime(df['datadate'], errors='coerce')
  df['rdq'] = pd.to_datetime(df['rdq'], errors='coerce')
