In [1]:
import pandas as pd
import numpy as np
from scipy import stats

# Load the two CSV files
announcer_returns_path = "/Users/balmeru/Desktop/announcer_returns.csv"
non_announcer_returns_path = "/Users/balmeru/Desktop/non_announcer_returns.csv"

announcer_returns = pd.read_csv(announcer_returns_path, index_col=0)
non_announcer_returns = pd.read_csv(non_announcer_returns_path, index_col=0)

# Extract the 'Average Return' columns and rename them
ann = announcer_returns['Average Return']
non_ann = non_announcer_returns['Average Return']

# Create a new DataFrame with the required columns
result_df = pd.DataFrame({
    'Ann': ann,
    'Non-Ann': non_ann
})

# Calculate the 'Ann-Non' column
result_df['Ann-Non'] = result_df['Ann'] - result_df['Non-Ann']

# Calculate the count of non-empty columns minus 1 for both DataFrames
result_df['N(Ann)'] = announcer_returns.notnull().sum(axis=1) - 1
result_df['N(Non)'] = non_announcer_returns.notnull().sum(axis=1) - 1

# Calculate summary statistics for the new DataFrame
summary_stats = result_df.describe(percentiles=[0.1, 0.9]).transpose()
summary_stats['Median'] = result_df.median()  # Add median
summary_stats['t_stat'] = [stats.ttest_1samp(result_df[col], 0)[0] for col in result_df.columns]  # t-statistics
summary_stats['SD'] = result_df.std()  # Standard deviation
summary_stats['Ske w'] = result_df.skew()  # Skewness
summary_stats['Kurt'] = result_df.kurtosis()  # Kurtosis
summary_stats['n'] = result_df.count()  # Count of non-NaN values

# Rename summary statistics for better readability
summary_stats = summary_stats.rename(columns={
    'mean': 'Mean',
    '50%': 'Median',
    'min': 'Min',
    '10%': 'Perc. 10',
    '90%': 'Perc. 90',
    'max': 'Max',
    't_stat': 't stat',
    'std': 'SD',
    'skew': 'Ske w',
    'kurt': 'Kurt'
})

# Save the result DataFrame to a new CSV file
output_csv_path = "/Users/balmeru/Desktop/combined_returns_summary.csv"
result_df.to_csv(output_csv_path, index=True)

# Save summary statistics to a separate CSV file
summary_stats_output_path = "/Users/balmeru/Desktop/summary_statistics.csv"
summary_stats.to_csv(summary_stats_output_path)

print(f"Combined DataFrame saved to {output_csv_path}")
print(f"Summary statistics saved to {summary_stats_output_path}")

Combined DataFrame saved to /Users/balmeru/Desktop/combined_returns_summary.csv
Summary statistics saved to /Users/balmeru/Desktop/summary_statistics.csv


In [2]:
print(summary_stats)

          count         Mean          SD        Min     Perc. 10       Median  \
Ann      1501.0     0.445098    2.724936 -18.231902    -2.628271     0.554390   
Non-Ann  1501.0     0.342090    2.576801 -18.608154    -2.409437     0.447673   
Ann-Non  1501.0     0.103009    1.178730 -11.205356    -1.067719     0.056734   
N(Ann)   1503.0   287.271457  269.284129  -1.000000    60.000000   179.000000   
N(Non)   1503.0  4335.592149  921.523742  -1.000000  3260.000000  4169.000000   

            Perc. 90          Max       Median     t stat          SD  \
Ann         3.401230    17.170837     0.554390        NaN    2.724936   
Non-Ann     2.812752    17.230486     0.447673        NaN    2.576801   
Ann-Non     1.359053     7.487928     0.056734        NaN    1.178730   
N(Ann)    682.400000  1258.000000   179.000000   41.35816  269.284129   
N(Non)   5750.400000  6434.000000  4169.000000  182.39855  921.523742   

            Ske w      Kurt     n  
Ann     -0.414788  5.099451  1501  
No