In [10]:
import pandas as pd
import os

# Import custom data processing functions
from data_prep import load_data, handle_missing_data
from data_analysis import calculate_weekly_returns

# Initialize an empty DataFrame to store the merged data for all years
all_years_merged = pd.DataFrame()

# Loop through the years from 1984 to 2012
for year in range(1984, 2013):
    # Define the file path for the current year's CSV file
    csv_file_path = f'/Users/balmeru/Documents/{year}.csv'  

    # Load and preprocess the data
    df = load_data(csv_file_path)
    df_cleaned = handle_missing_data(df)

    # Perform data analysis: calculate weekly returns
    weekly_returns = calculate_weekly_returns(df_cleaned)

    # Append the weekly returns for the current year to the merged DataFrame
    all_years_merged = pd.concat([all_years_merged, weekly_returns], axis=0)

    # Display first few rows for each year for debugging purposes
    print(f"Year: {year}")
    print(weekly_returns.head())

# After the loop, save the final merged DataFrame for all years
final_output_file = '/Users/balmeru/Desktop/merged_weekly_pivot_1984_2012.csv' 

all_years_merged.to_csv(final_output_file)

print(f"All years merged and saved to {final_output_file}")



Missing data summary before filling:
 ajexdi      2753
prccd       2753
trfd      501338
dtype: int64
Missing data percentage before filling:
 ajexdi     0.229482
prccd      0.229482
trfd      41.790007
dtype: float64


  df['trfd'] = df['trfd'].fillna(method='ffill')  # Forward fill


Missing data summary after filling:
 ajexdi    279
prccd     279
trfd        0
dtype: int64
Missing data percentage after filling:
 ajexdi    0.023257
prccd     0.023257
trfd      0.000000
dtype: float64
Year: 1984
tic                0146A  0153A     0223B      0485B     0491B      3234B  \
1984-01-02           NaN    NaN  0.980392  13.004089 -3.831262 -14.111507   
1984-01-09 -3.508772e-01    NaN  0.485437 -10.294118  3.521127 -10.714286   
1984-01-16  7.042254e-01    NaN  0.483092  -3.278689 -2.721088  -0.666667   
1984-01-23  3.496503e-01    NaN       NaN  -8.474576 -0.699301   1.342282   
1984-01-30  2.220446e-14    NaN -0.480769  -7.407407  2.112676        NaN   

tic             3406B      3521B     3614B     3900B  ...      ZION  \
1984-01-02        NaN        NaN  3.367003       NaN  ... -0.383985   
1984-01-09   1.041667  -1.923077  1.302932 -9.302326  ... -5.000000   
1984-01-16   0.515464 -19.607843 -5.787781  2.564103  ... -5.263158   
1984-01-23  -3.589744  -4.878049  2.73

  df['trfd'] = df['trfd'].fillna(method='ffill')  # Forward fill


Missing data summary after filling:
 ajexdi    15
prccd     15
trfd       0
dtype: int64
Missing data percentage after filling:
 ajexdi    0.001112
prccd     0.001112
trfd      0.000000
dtype: float64
Year: 1985
tic            0146A     0153A     0223B     0485B      0491B         3234B  \
1984-12-10  0.606061 -0.438596  0.934579 -4.347826 -19.130435  8.947372e-01   
1984-12-17  0.602410  3.964758  2.777778  9.090909   2.150538 -1.110223e-14   
1984-12-24  0.598802  2.542373       NaN       NaN  10.526316 -1.315789e+00   
1984-12-31  0.892857 -0.826446 -1.801802       NaN  -1.904762  1.333333e+00   
1985-01-07 -0.294985  1.666667  0.917431       NaN  -2.912621           NaN   

tic             3406B      3521B  3565B         3614B  ...           ZMX  \
1984-12-10  -1.020408  15.151515    NaN  5.902778e+00  ... -1.250000e+00   
1984-12-17  14.432990   5.263158    NaN  2.990164e+00  ... -2.531646e+00   
1984-12-24  -1.801802  -2.500000    NaN -2.220446e-14  ...  1.298701e+00   
1984-12-3

  df['trfd'] = df['trfd'].fillna(method='ffill')  # Forward fill


Missing data summary after filling:
 ajexdi    8
prccd     8
trfd      0
dtype: int64
Missing data percentage after filling:
 ajexdi    0.000542
prccd     0.000542
trfd      0.000000
dtype: float64
Year: 1986
tic                0146A     0153A     0223B      0485B         0491B  2185B  \
1985-12-09           NaN  4.347826  5.263158  -7.352941  3.000000e+00    NaN   
1985-12-16 -2.604167e-01  3.571429       NaN  -4.761905  9.708738e-01    NaN   
1985-12-23  7.832898e-01  2.011494  0.625000  -1.666667 -1.110223e-14    NaN   
1985-12-30           NaN -1.690141 -0.621118  13.559322  2.884615e+00    NaN   
1986-01-06 -1.110223e-14  0.286533 -2.500000   1.492537  3.738318e+00    NaN   

tic         3219B      3234B     3406B     3521B  ...       ZMX       ZNT  \
1985-12-09    NaN   2.380952 -0.645161  9.230769  ...  2.061856       NaN   
1985-12-16    NaN  -3.488372  0.649351 -2.816901  ...       NaN  4.891304   
1985-12-23    NaN  10.843373 -2.580645       NaN  ...  2.020202  1.554404   
19

  df['trfd'] = df['trfd'].fillna(method='ffill')  # Forward fill


Missing data summary after filling:
 ajexdi    23
prccd     23
trfd       0
dtype: int64
Missing data percentage after filling:
 ajexdi    0.001404
prccd     0.001404
trfd      0.000000
dtype: float64
Year: 1987
tic                0146A         0223B         0485B     0491B      2185B  \
1986-12-08 -2.220446e-14  5.555556e-01 -1.110223e-14 -2.564103   4.000000   
1986-12-15 -1.010101e+00 -1.110223e-14  1.333333e+00  3.947368 -15.384615   
1986-12-22 -5.102041e-01 -2.209945e+00           NaN -1.898734  -2.272727   
1986-12-29  1.538462e+00  1.129944e+00  1.315789e+00 -0.645161  11.627907   
1987-01-05  1.010101e+00  2.234637e+00  1.818182e+01  1.948052        NaN   

tic         3213B         3219B     3234B      3406B      3521B  ...  \
1986-12-08    NaN  2.220446e-14 -2.442016   1.005025  -3.896104  ...   
1986-12-15    NaN -1.886792e+00  3.030303  -0.995025        NaN  ...   
1986-12-22    NaN -9.615385e-01 -2.941176  -2.512563        NaN  ...   
1986-12-29    NaN  9.708738e-01      

  df['trfd'] = df['trfd'].fillna(method='ffill')  # Forward fill


Missing data summary after filling:
 ajexdi    23
prccd     23
trfd       0
dtype: int64
Missing data percentage after filling:
 ajexdi    0.001339
prccd     0.001339
trfd      0.000000
dtype: float64
Year: 1988
tic         0099A     0146A     0223B      0485B     0491B      2185B  3189B  \
1987-12-07    NaN       NaN -2.702703   2.272727  9.243697  12.121212    NaN   
1987-12-14    NaN       NaN -0.694444  35.555556  5.384615   2.702703    NaN   
1987-12-21    NaN  1.052632  3.496503  -4.918033  6.569343 -10.526316    NaN   
1987-12-28    NaN -1.041667       NaN  -3.448276  1.369863   8.823529    NaN   
1988-01-04    NaN       NaN  4.054054   1.785714 -0.675676  -8.108108    NaN   

tic             3213B      3219B     3234B  ...       ZMX           ZNT  \
1987-12-07  10.596026  -3.846154  0.972973  ... -1.298701 -2.400000e+00   
1987-12-14   4.790419  12.000000 -1.351351  ... -2.631579  1.639344e+00   
1987-12-21  -1.997714  14.285714  6.849315  ... -1.351351 -3.225806e+00   
1987-12

  df['trfd'] = df['trfd'].fillna(method='ffill')  # Forward fill


Missing data summary after filling:
 ajexdi    12
prccd     12
trfd       0
dtype: int64
Missing data percentage after filling:
 ajexdi    0.000748
prccd     0.000748
trfd      0.000000
dtype: float64
Year: 1989
tic            0099A     0183B         0223B     0485B     0491B  0517B  \
1989-01-09  2.298851  1.145455 -1.110223e-14 -1.176471  1.869159    NaN   
1989-01-16  0.561798  8.987956           NaN  2.380952 -3.211009    NaN   
1989-01-23 -5.586592 -3.100775  1.265823e+00  2.325581  1.421801    NaN   
1989-01-30 -5.325444       NaN -6.250000e-01  7.954545  0.934579    NaN   
1989-02-06  0.625000       NaN  6.289308e-01 -3.157895  1.388889    NaN   

tic            2185B         3189B     3213B         3219B  ...           ZMX  \
1989-01-09 -1.369863           NaN  1.010101  6.521739e+00  ... -1.110223e-14   
1989-01-16 -1.388889           NaN  1.752000 -2.220446e-14  ... -2.912621e+00   
1989-01-23 -5.633803           NaN  7.836139  2.040816e+00  ... -5.000000e+00   
1989-01-30 -4

  df['trfd'] = df['trfd'].fillna(method='ffill')  # Forward fill


Missing data summary after filling:
 ajexdi    20
prccd     20
trfd       0
dtype: int64
Missing data percentage after filling:
 ajexdi    0.001239
prccd     0.001239
trfd      0.000000
dtype: float64
Year: 1990
tic             0099A     0183B     0223B      0485B     0491B     0517B  \
1990-01-08 -13.043478       NaN -2.840909  -0.806452 -5.434783 -2.666667   
1990-01-15  20.000000  9.756098 -2.339181   0.813008 -2.681992 -3.424658   
1990-01-22 -29.166667 -2.222222 -0.598802   5.645161 -7.480315 -2.127660   
1990-01-29   5.882353       NaN -0.602410  -0.763359  6.382979  5.797101   
1990-02-05  16.666667       NaN  1.212121  10.000000 -0.800000  6.164384   

tic            2185B  3186B     3189B         3213B  ...       ZMX       ZNT  \
1990-01-08 -3.539823    NaN       NaN -3.208556e+00  ... -2.228016 -3.355705   
1990-01-15 -8.256881    NaN -1.785714  2.220446e-14  ... -4.545455  1.388889   
1990-01-22 -7.000000    NaN       NaN -1.736912e+00  ... -4.761905 -2.967065   
1990-01-29 

  df['trfd'] = df['trfd'].fillna(method='ffill')  # Forward fill


Missing data summary after filling:
 ajexdi    21
prccd     21
trfd       0
dtype: int64
Missing data percentage after filling:
 ajexdi    0.001199
prccd     0.001199
trfd      0.000000
dtype: float64
Year: 1991
tic             0099A     0183B     0223B      0485B      0491B     0517B  \
1990-12-10        NaN       NaN -0.645161  -1.086957 -14.391144       NaN   
1990-12-17        NaN  3.571429 -3.896104  -3.296703   0.768618  3.000000   
1990-12-24   8.333333 -3.448276  4.729730  -9.090909  -0.429185 -0.970874   
1990-12-31  -7.692308 -7.142857  1.935484   2.500000   2.155172 -1.960784   
1991-01-07 -16.666667  7.692308  1.265823 -17.073171  -3.375527       NaN   

tic                2185B         3186B     3189B     3213B  ...          ZIXI  \
1990-12-10 -1.086957e+00  6.976744e+00 -1.599999  1.993303  ... -2.702703e+00   
1990-12-17  8.888891e-01 -2.220446e-14       NaN  1.302898  ...  1.388889e+01   
1990-12-24  1.098901e+00 -4.347826e+00  3.030303  2.891244  ... -4.878049e+00   
1

  df['trfd'] = df['trfd'].fillna(method='ffill')  # Forward fill


Missing data summary after filling:
 ajexdi    16
prccd     16
trfd       0
dtype: int64
Missing data percentage after filling:
 ajexdi    0.000866
prccd     0.000866
trfd      0.000000
dtype: float64
Year: 1992
tic             0099A      0183B         0223B      0485B     0491B  \
1991-12-09 -21.371429  -7.407407  2.220446e-14   7.826087 -0.749064   
1991-12-16        NaN  16.000000  1.388889e+00   3.225806  4.905660   
1991-12-23        NaN  25.875862  4.109589e+00  -0.781250  5.035971   
1991-12-30  27.180233  -2.739426  1.973684e+00  11.023622 -2.739726   
1992-01-06  42.857143        NaN  3.870968e+00   4.964539 -3.169014   

tic             0517B     2185B      3186B     3189B     3213B  ...       ZNT  \
1991-12-09  -7.042254 -2.439024   4.000000 -2.127659  8.947368  ... -4.237288   
1991-12-16  -1.515152 -0.436667 -11.538462  2.272727 -0.966184  ...  8.849558   
1991-12-23   7.692308  1.680672  21.739130 -2.222222  1.709268  ...  4.065041   
1991-12-30   5.000000  2.479339   3.5

  df['trfd'] = df['trfd'].fillna(method='ffill')  # Forward fill


Missing data summary after filling:
 ajexdi    12
prccd     12
trfd       0
dtype: int64
Missing data percentage after filling:
 ajexdi    0.000604
prccd     0.000604
trfd      0.000000
dtype: float64
Year: 1993
tic             0099A     0183B     0223B     0485B     0491B     0517B  \
1992-12-07  -4.545455 -4.000000 -4.819277 -3.508772  5.439331  3.539823   
1992-12-14  19.047619 -8.333333 -6.329114       NaN  2.380952 -0.854701   
1992-12-21   8.000000       NaN  2.027027  1.818182       NaN -1.724138   
1992-12-28  14.814815 -9.090909  0.662252  3.571429 -0.387597  1.754386   
1993-01-04   3.225806       NaN -3.947368 -3.879310       NaN  2.155172   

tic            2185B     3186B  3187B     3189B  ...      ZOLT    ZOOM.1  \
1992-12-07 -1.709168  3.703704    NaN  1.044123  ...       NaN  3.100775   
1992-12-14  5.957672  3.571429    NaN -6.560198  ...  1.575000 -0.751880   
1992-12-21 -5.263158 -5.862069    NaN       NaN  ...  4.602510 -3.030303   
1992-12-28 -2.777778       NaN   

  df['trfd'] = df['trfd'].fillna(method='ffill')  # Forward fill


Missing data summary after filling:
 ajexdi    9
prccd     9
trfd      0
dtype: int64
Missing data percentage after filling:
 ajexdi    0.000446
prccd     0.000446
trfd      0.000000
dtype: float64
Year: 1994
tic             0099A      0183B     0223B  0230B     0485B     0491B  \
1994-01-10        NaN  23.809524  0.729927    NaN -6.521739  3.539823   
1994-01-17  -3.571429 -11.538462 -0.724638    NaN  1.162791 -1.709402   
1994-01-24  -3.703704   4.347826  3.649635    NaN  1.724138  3.478261   
1994-01-31        NaN  -4.166667  1.408451    NaN  1.694915 -3.361345   
1994-02-07  11.538462   4.347826  2.083333    NaN  2.777778  6.521739   

tic            0517B     2185B      3186B     3187B  ...       ZOLT    ZOOM.1  \
1994-01-10  0.129730 -0.961538   2.941176  6.735751  ... -12.000000  5.000000   
1994-01-17  1.081081 -4.854369 -14.285714  0.485437  ...   2.272727 -5.714286   
1994-01-24  3.208556 -4.081633  13.333333  3.381643  ...   6.666667  1.010101   
1994-01-31  4.145078  2.1276

  df['trfd'] = df['trfd'].fillna(method='ffill')  # Forward fill


Missing data summary after filling:
 ajexdi    13
prccd     13
trfd       0
dtype: int64
Missing data percentage after filling:
 ajexdi    0.000627
prccd     0.000627
trfd      0.000000
dtype: float64
Year: 1995
tic            0099A      0183B     0223B      0230B      0485B         0491B  \
1995-01-09  3.000000   9.011628  1.694915  -1.562500   2.352941  2.347418e+00   
1995-01-16 -0.970874  16.666667       NaN  14.285714 -18.390805 -1.834862e+00   
1995-01-23 -8.823529        NaN       NaN  -2.777778   4.225352  2.336449e+00   
1995-01-30 -5.376344        NaN  5.000000  19.285714  -4.054054 -2.220446e-14   
1995-02-06  1.136364 -21.371429 -3.968254  -0.598802  -7.746479  3.196347e+00   

tic                0517B     2185B      3186B     3187B  ...     ZOOM.1  \
1995-01-09 -1.018182e+00 -2.941176  -2.500000  5.376344  ...   4.838710   
1995-01-16 -2.220446e-14  6.060606   7.692308  1.530612  ...  13.846154   
1995-01-23 -3.067485e+00 -1.428571  38.095238 -2.512563  ...  -4.054054   
1

  df['trfd'] = df['trfd'].fillna(method='ffill')  # Forward fill


Missing data summary after filling:
 ajexdi    31
prccd     31
trfd       0
dtype: int64
Missing data percentage after filling:
 ajexdi    0.001311
prccd     0.001311
trfd      0.000000
dtype: float64
Year: 1996
tic         0081A     0099A      0183B     0223B     0230B      0485B  \
1995-12-04    NaN -1.219512  -3.199174       NaN -1.818182   2.564103   
1995-12-11    NaN  6.172840        NaN  0.671141 -1.851852  -2.500000   
1995-12-18    NaN  0.351649 -13.326226  0.666667  5.660377  -5.769231   
1995-12-25    NaN       NaN  23.001230 -0.662252  4.464286   0.680272   
1996-01-01    NaN  2.325581  -3.100000  2.000000  4.273504 -12.837838   

tic            0491B         0517B     2185B      3186B  ...    ZOOM.1  \
1995-12-04 -1.003344 -3.144654e+00 -0.403252 -12.572128  ...  3.921569   
1995-12-11  2.364865  6.493506e-01       NaN        NaN  ...  1.886792   
1995-12-18 -2.640264 -1.110223e-14       NaN        NaN  ... -6.790123   
1995-12-25  8.474577 -3.225806e+00       NaN -10.0000

  df['trfd'] = df['trfd'].fillna(method='ffill')  # Forward fill


Missing data summary after filling:
 ajexdi    7
prccd     7
trfd      0
dtype: int64
Missing data percentage after filling:
 ajexdi    0.000284
prccd     0.000284
trfd      0.000000
dtype: float64
Year: 1997
tic            0081A     0099A     0183B     0223B     0230B     0485B  \
1996-12-09       NaN -3.571429 -3.100000  3.921569 -0.613497  1.886792   
1996-12-16       NaN  0.299065       NaN  5.660377  4.938272       NaN   
1996-12-23 -1.139394       NaN       NaN       NaN -0.588235  2.469136   
1996-12-30 -0.890960  2.777778 -2.425181 -2.678571  7.692308  0.602410   
1997-01-06  3.092784       NaN  2.485457 -2.293578  4.395604  1.197605   

tic            0491B         0517B     3213B      3219B  ...     ZQKSQ  \
1996-12-09 -5.921053 -3.061224e+00  5.797101   2.013423  ... -4.790419   
1996-12-16 -4.195804 -1.052632e+00  5.479452  31.578947  ...  5.660377   
1996-12-23 -0.729927  2.220446e-14 -1.298701   0.220000  ...       NaN   
1996-12-30  1.470588  3.191489e+00  5.263158  -0.5

  df['trfd'] = df['trfd'].fillna(method='ffill')  # Forward fill


Missing data summary after filling:
 ajexdi    5
prccd     5
trfd      0
dtype: int64
Missing data percentage after filling:
 ajexdi    0.000204
prccd     0.000204
trfd      0.000000
dtype: float64
Year: 1998
tic            0081A      0183B     0223B      0230B         0485B     0491B  \
1997-12-08       NaN  -7.018529  4.566210  -5.729167  2.507463e-01 -4.690447   
1997-12-15  0.380065  -7.548309  5.240175  -8.839779  8.186438e+00 -2.259574   
1997-12-22       NaN -10.189419  5.603320   0.606061  2.220446e-14 -0.272106   
1997-12-29       NaN   9.090909  3.929211  10.544578  1.834862e+00 -0.137516   
1998-01-05       NaN  -2.066667 -2.268397  -2.454442 -7.430631e+00 -4.918033   

tic                0517B         3565B      3614B      3615B  ...      ZQKSQ  \
1997-12-08  1.024257e+01 -9.615385e+00  -9.338338  -5.141282  ...  -5.990783   
1997-12-15           NaN -3.333333e+01   1.084274  -0.273165  ...  -0.980392   
1997-12-22  2.043791e-01           NaN  -4.875651  11.413043  ...  -0.

  df['trfd'] = df['trfd'].fillna(method='ffill')  # Forward fill


Missing data summary after filling:
 ajexdi    5
prccd     5
trfd      0
dtype: int64
Missing data percentage after filling:
 ajexdi    0.000215
prccd     0.000215
trfd      0.000000
dtype: float64
Year: 1999
tic             0183B     0223B      0230B         0485B     0491B    3AATTZ  \
1998-12-07 -12.500000 -3.612479 -11.111111  2.220446e-14 -6.451613  1.913876   
1998-12-14   3.571429  1.533220  -1.470588 -6.603774e+00 -1.724138 -1.276995   
1998-12-21  -3.448276  0.671141   5.223881 -3.030303e+00  1.315789 -0.478469   
1998-12-28 -14.285714  1.166667  14.184397  4.687500e+00  3.896104  0.961538   
1999-01-04  10.416667 -3.459638  -6.211180  6.965174e+00 -9.375000  0.952381   

tic             3ABHG      3ABHH     3ABNKQ      3ABTE  ...     ZOOM.1  ZOOXQ  \
1998-12-07  23.076923 -10.833333 -21.428571  -8.928571  ...   2.247191    NaN   
1998-12-14        NaN  34.579439   4.545455  -4.901961  ...  -6.593407    NaN   
1998-12-21 -12.500000  -7.291667  -4.347826  -4.123711  ... -20.000

  df['trfd'] = df['trfd'].fillna(method='ffill')  # Forward fill


Missing data summary after filling:
 ajexdi    4
prccd     4
trfd      0
dtype: int64
Missing data percentage after filling:
 ajexdi    0.000192
prccd     0.000192
trfd      0.000000
dtype: float64
Year: 2000
tic             0183B     0223B      0230B     0491B    3AATTZ      3ABTE  \
2000-01-10  18.604651  1.363636  11.764706  0.411523       NaN   2.985075   
2000-01-17 -13.725490 -0.149477  -3.643725  3.688525 -0.020942   9.420290   
2000-01-24  13.636364 -1.946108 -19.327731 -5.138340       NaN -15.231788   
2000-01-31  96.000000  1.984733   7.812500  2.916667       NaN -23.437500   
2000-02-07 -18.367347  2.694611  -0.483092 -2.429150       NaN        NaN   

tic                3ACEC      3ACKH     3ACMI.     3ACNAQ  ...     ZOOM.1  \
2000-01-10  2.723735e+00 -20.772059  -2.127660   0.970874  ...   6.766917   
2000-01-17 -1.515152e+00  -6.496520  -4.347826 -15.384615  ...  -9.154930   
2000-01-24 -2.220446e-14 -10.669975  18.181818  -2.272727  ...  36.434109   
2000-01-31 -1.538462

  df['trfd'] = df['trfd'].fillna(method='ffill')  # Forward fill


Missing data summary after filling:
 ajexdi    4
prccd     4
trfd      0
dtype: int64
Missing data percentage after filling:
 ajexdi    0.00021
prccd     0.00021
trfd      0.00000
dtype: float64
Year: 2001
tic                0183B      0491B    3AATTZ      3ACEC      3ACKH  \
2001-01-08 -1.110223e-14  10.526316       NaN -11.111111  42.857143   
2001-01-15  8.695652e+00   1.831502  0.458334  10.000000  18.333333   
2001-01-22  3.600000e+01   4.316547  0.520833 -15.909091  -2.816901   
2001-01-29 -1.764706e+01  -5.875862 -0.103627  -5.405405  -8.637681   
2001-02-05 -1.607143e+01   2.168816       NaN  -2.857143  -2.284264   

tic            3ACNAQ        3ACTRQ     3ADLTQ      3AEGN      3AEMI  ...  \
2001-01-08 -16.406250  1.597938e+01 -11.475410  11.038961  33.333333  ...   
2001-01-15  -4.672897  2.533333e+01  41.666667  -8.771930        NaN  ...   
2001-01-22  -5.882353  2.220446e-14 -16.339869 -12.820513  50.000000  ...   
2001-01-29   5.208333  2.163121e+01 -10.937500 -20.588235 -

  df['trfd'] = df['trfd'].fillna(method='ffill')  # Forward fill


Missing data summary after filling:
 ajexdi    8
prccd     8
trfd      0
dtype: int64
Missing data percentage after filling:
 ajexdi    0.000411
prccd     0.000411
trfd      0.000000
dtype: float64
Year: 2002
tic             0183B     0491B  2100B    3AATTZ         3ACEC      3ACKH  \
2001-12-10  -1.023891  4.109048    NaN -0.166251 -2.220446e-14   8.196721   
2001-12-17 -10.344828  0.417457    NaN  0.417432 -2.500000e+00  40.530303   
2001-12-24   2.307692  1.095994    NaN  0.332779 -2.564103e+00  -5.121294   
2001-12-31  19.548872 -1.719626    NaN -0.414594  9.649123e+00  -7.102273   
2002-01-07   6.918239 -1.103081    NaN  0.749376  2.560000e+01  -4.892966   

tic            3ACNAQ     3ACTRQ     3ADLTQ      3AEGN  ...       ZONS  \
2001-12-10  14.953271 -12.500000 -11.764706  26.068376  ... -14.000000   
2001-12-17   1.219512  11.111111 -19.333333   1.694915  ...  -5.813953   
2001-12-24  -8.032129   8.285714   9.917355  10.000000  ...  -1.234568   
2001-12-31  -3.056769   0.527704

  df['trfd'] = df['trfd'].fillna(method='ffill')  # Forward fill


Missing data summary after filling:
 ajexdi    18
prccd     18
trfd       0
dtype: int64
Missing data percentage after filling:
 ajexdi    0.000978
prccd     0.000978
trfd      0.000000
dtype: float64
Year: 2003
tic             0491B     2100B        3AATTZ     3ACEC     3ACTRQ     3ADLTQ  \
2002-12-09  -0.309598  3.624009  4.440892e-14 -0.909091 -10.740741  -8.333333   
2002-12-16  12.594893  2.732240  2.887789e-01 -0.917431 -12.863071 -30.303030   
2002-12-23   0.030647  1.356383           NaN -6.481481 -14.285714  21.739130   
2002-12-30  -0.337010 -0.106383           NaN  2.970297   5.555556  14.285714   
2003-01-06   0.215186  0.106496           NaN  8.653846  15.263158 -28.125000   

tic            3AEGN      3AEMI     3AICX     3ALGXQ  ...       ZOLT  \
2002-12-09  2.439024        NaN  3.397341 -15.306122  ... -10.984848   
2002-12-16 -4.761905  20.000000 -0.476190 -12.048193  ... -12.340426   
2002-12-23 -2.500000  -8.333333  2.870813  -5.616438  ...   2.912621   
2002-12-30 -7

  df['trfd'] = df['trfd'].fillna(method='ffill')  # Forward fill


Missing data summary after filling:
 ajexdi    12
prccd     12
trfd       0
dtype: int64
Missing data percentage after filling:
 ajexdi    0.000664
prccd     0.000664
trfd      0.000000
dtype: float64
Year: 2004
tic            2100B      3ACEC      3AEGN      3AEMI      3AICX  \
2003-12-08  0.270636 -16.816063  -9.252669   0.441176  -4.347826   
2003-12-15  5.870445  12.068966 -10.549020  22.532943  -6.818182   
2003-12-22  6.692161   3.076923  -1.359053   7.539730   0.081301   
2003-12-29 -2.110372  -4.850746  -2.222222  -7.777778  22.664500   
2004-01-05 -3.327172   0.784314   5.045455  12.409639  19.205298   

tic               3ANDN.      3AXSO     3BPOM      3CPTH      3CSOF  ...  \
2003-12-08 -3.333333e+01  -5.405405  4.273504 -10.837438  -7.600000  ...   
2003-12-15  1.500000e+02   0.571429 -2.377049  -6.187845  -4.761905  ...   
2003-12-22  4.500000e+01   1.136364  0.755668 -24.028269  -9.090909  ...   
2003-12-29 -2.413793e+01   5.617978  9.166667   4.651163   4.000000  ...   

  df['trfd'] = df['trfd'].fillna(method='ffill')  # Forward fill


Missing data summary after filling:
 ajexdi    3
prccd     3
trfd      0
dtype: int64
Missing data percentage after filling:
 ajexdi    0.000181
prccd     0.000181
trfd      0.000000
dtype: float64
Year: 2005
tic            2100B      3ACEC     3AEGN     3AEMI      3AICX  3AURDQ  \
2005-01-10  3.233831   1.310044 -7.206704  3.869833  54.545455     NaN   
2005-01-17 -0.674699  -2.586207  4.154124  2.286198 -18.627451     NaN   
2005-01-24  2.548277  15.929204 -4.624277 -1.614238 -15.662651     NaN   
2005-01-31  5.202864  17.938931  9.090909  5.384939        NaN     NaN   
2005-02-07  2.994555  14.886731 -2.777778 -0.598802 -10.000000     NaN   

tic            3AXSO      3BPOM      3CPTH         3CSOF  ...       ZOLT  \
2005-01-10 -2.522936 -12.592593   3.947368  5.102041e+00  ...  10.105263   
2005-01-17  5.882353  -2.542373  -7.594937 -1.553398e+01  ...   7.641810   
2005-01-24  3.703704  -3.478261 -15.753425  2.220446e-14  ...  -6.211143   
2005-01-31 -0.357143  20.720721  -7.317073

  df['trfd'] = df['trfd'].fillna(method='ffill')  # Forward fill


Missing data summary after filling:
 ajexdi    4
prccd     4
trfd      0
dtype: int64
Missing data percentage after filling:
 ajexdi    0.000245
prccd     0.000245
trfd      0.000000
dtype: float64
Year: 2006
tic            2100B     3ACEC      3AEMI     3AICX    3AURDQ     3AXSO  \
2006-01-09  0.250627 -0.573066  13.972603 -2.631579 -1.235521  7.520666   
2006-01-16  1.875000  1.152738   5.889423 -4.859459  0.390930 -0.537634   
2006-01-23  2.862986 -3.703704  -0.454030       NaN  1.246106  8.648649   
2006-01-30 -3.055475  5.029586   0.228050       NaN -5.461538  8.457711   
2006-02-06 -4.054613 -0.408451   3.299204       NaN -2.278275  5.045872   

tic             3BPOM    3CTDBQ  3CYRV     3FDTR  ...       ZONS    ZOOM.1  \
2006-01-09  -4.347826 -3.171091    NaN  0.482703  ...   7.199710 -5.333333   
2006-01-16        NaN -1.751714    NaN -1.521217  ...   5.825188 -0.704225   
2006-01-23   5.454545 -5.658915    NaN  0.081301  ...  -0.529317  2.127660   
2006-01-30 -17.780273 -1.725

  df['trfd'] = df['trfd'].fillna(method='ffill')  # Forward fill


Missing data summary after filling:
 ajexdi    0
prccd     0
trfd      0
dtype: int64
Missing data percentage after filling:
 ajexdi    0.0
prccd     0.0
trfd      0.0
dtype: float64
Year: 2007
tic             3ACEC      3AEMI     3AURDQ     3AXSO    3CTDBQ     3CYRV  \
2007-01-08   4.651163   8.421053   1.083032  1.250000  3.726708  1.841903   
2007-01-15  -8.148148   9.223301   1.339286 -4.938272  1.696607  3.617182   
2007-01-22 -16.935484  41.333333  -0.969163  2.597403  3.336605 -0.181818   
2007-01-29  -4.854369  -9.748428   2.402135 -2.531646  2.279202  2.513661   
2007-02-05   2.040816  -1.428571  18.071242 -5.194805  1.193142  0.035537   

tic            3FDTR  3GLOI.    3GMRRQ     3NHRX  ...       ZONS     ZOOM.1  \
2007-01-08  0.490196     NaN -0.896991  5.590062  ...   5.284016  13.888889   
2007-01-15 -0.390244     NaN  1.284672 -3.529412  ...   2.132999   7.951220   
2007-01-22 -2.056807     NaN  4.641107 -1.219512  ...   0.859951   7.696942   
2007-01-29  1.201000     Na

  df['trfd'] = df['trfd'].fillna(method='ffill')  # Forward fill


Missing data summary after filling:
 ajexdi    0
prccd     0
trfd      0
dtype: int64
Missing data percentage after filling:
 ajexdi    0.0
prccd     0.0
trfd      0.0
dtype: float64
Year: 2008
tic             3AEMI     3AURDQ     3CTDBQ     3CYRV      3FDTR     3GLOI.  \
2007-12-10 -40.677966  -7.911803   3.535354 -6.812500 -10.071942   0.753769   
2007-12-17  65.714286  16.901408   3.902439  9.926224  -4.000000 -13.541147   
2007-12-24 -15.689655   0.843373  -2.347418 -7.835823 -14.583333  11.623882   
2007-12-31  18.609407 -16.009558 -17.307692 -6.509695  -7.317073  -9.560724   
2008-01-07        NaN -17.211949  -9.302326 -4.000000 -10.526316 -12.857143   

tic           3GMRRQ      3NHRX     3SIXFQ      3TLON  ...      ZOLT  \
2007-12-10 -4.433140 -13.043478  -0.389105  -2.608696  ... -6.149013   
2007-12-17 -2.585551  32.500000  -9.765625  -8.928571  ...  0.860832   
2007-12-24 -4.371585 -24.528302 -10.822511   7.843137  ...  7.088668   
2007-12-31 -3.061224  -7.500000  -7.766990 

  df['trfd'] = df['trfd'].fillna(method='ffill')  # Forward fill


Missing data summary after filling:
 ajexdi    0
prccd     0
trfd      0
dtype: int64
Missing data percentage after filling:
 ajexdi    0.0
prccd     0.0
trfd      0.0
dtype: float64
Year: 2009
tic            3CTDBQ     3GLOI.     3GMRRQ  3HUTC    3SIXFQ      3TLON  \
2008-12-08  -5.555556  13.609467  11.881978    NaN  9.375000  14.285714   
2008-12-15  35.294118 -11.979167   7.925873    NaN -5.714286 -21.875000   
2008-12-22 -17.391304  44.970414 -12.212389    NaN -3.030303 -20.000000   
2008-12-29        NaN -12.244898   8.467742    NaN  3.125000  20.000000   
2009-01-05  21.052632 -18.604651   1.672862    NaN  9.090909  29.166667   

tic            7131B          A       AA.3       AABA  ...      ZQKSQ  \
2008-12-08  0.144928   0.753187  23.680982  12.778731  ...  27.500000   
2008-12-15  1.591896  -6.382979  -3.769841  -0.912548  ...  24.836601   
2008-12-22  3.561254  -5.098280   0.824742  -5.295472  ... -12.565445   
2008-12-29  2.957359   5.113269  23.824131   4.132901  ...  23.

  df['trfd'] = df['trfd'].fillna(method='ffill')  # Forward fill


Missing data summary after filling:
 ajexdi    0
prccd     0
trfd      0
dtype: int64
Missing data percentage after filling:
 ajexdi    0.0
prccd     0.0
trfd      0.0
dtype: float64
Year: 2010
tic            3GLOI.     3GMRRQ     7131B         A       AA.3      AABA  \
2009-12-07  -0.613497  -4.830287  0.462250 -0.736772  12.471132  3.620803   
2009-12-14 -10.493827  -2.606310  2.530675 -0.809717  -0.205339  2.541296   
2009-12-21  13.793103   0.845070 -0.972326  3.163265  12.071331  3.593556   
2009-12-28        NaN  -2.374302 -0.981873  2.439829  -1.346389  0.358852   
2010-01-04   6.060606  17.167382 -0.457666 -0.869005   5.583127 -0.476758   

tic              AAC.1      AACC      AACG  AACPF  ...      ZOLT      ZQKSQ  \
2009-12-07   -5.882353 -0.331675  3.441296    NaN  ... -2.076125  -8.556150   
2009-12-14  -10.416667 -0.166389 -3.522505    NaN  ...  8.833922  15.789474   
2009-12-21  104.651163  4.333333 -7.302231    NaN  ...  3.246753  13.131313   
2009-12-28  -17.034091  8.3

  df['trfd'] = df['trfd'].fillna(method='ffill')  # Forward fill


Missing data summary after filling:
 ajexdi    0
prccd     0
trfd      0
dtype: int64
Missing data percentage after filling:
 ajexdi    0.0
prccd     0.0
trfd      0.0
dtype: float64
Year: 2011
tic               3GLOI.    3GMRRQ     7131B         A      AA.3      AABA  \
2011-01-10  3.952569e-01 -9.340659 -3.389831  3.940413 -2.740560 -0.547256   
2011-01-17  3.937008e-01 -9.696970  7.508772 -2.658345 -1.127113 -5.026770   
2011-01-24 -1.110223e-14  8.053691 -0.130548 -2.683448  2.153262 -0.845600   
2011-01-31  1.964706e+00 -3.416149  0.392157  4.904832  6.446857  6.032849   
2011-02-07  3.769086e-01 -9.003215  3.387734  4.722028  1.341890  0.387251   

tic             AACC      AACG     AACPF       AAI  ...       ZOLT      ZQKSQ  \
2011-01-10  6.239737  3.376623  0.515464  0.267380  ...   4.197531  -4.770992   
2011-01-17 -6.800618  0.452261 -0.820513 -0.266667  ...  -8.767773 -11.623246   
2011-01-24  0.663350  1.050525 -0.620476 -1.470588  ...  -3.636364  -2.040816   
2011-01-31  0

  df['trfd'] = df['trfd'].fillna(method='ffill')  # Forward fill


Missing data summary after filling:
 ajexdi    0
prccd     0
trfd      0
dtype: int64
Missing data percentage after filling:
 ajexdi    0.0
prccd     0.0
trfd      0.0
dtype: float64
Year: 2012
tic         3PBCP     7131B         A      AA.3      AABA      AACC  \
2012-01-09    NaN  0.681115  4.438503  6.986900 -0.225588  4.166667   
2012-01-16    NaN  0.430504  4.966718  3.775510  3.100775  0.705882   
2012-01-23    NaN -0.183711  4.097561  2.556539 -1.378446  8.411215   
2012-01-30    NaN  3.558282  6.091846  3.467373  1.143583  1.293103   
2012-02-06    NaN -6.481964 -2.517668 -4.368030  1.381910 -4.255319   

tic              AACG     AACPF      AAIC      AAME  ...       ZOLT  \
2012-01-09   3.265306 -0.505051  5.802708 -0.505051  ...   8.690330   
2012-01-16 -18.181818       NaN  2.650823  1.015228  ...  -3.941441   
2012-01-23   1.449275       NaN  1.202137  3.015075  ...   3.399766   
2012-01-30  11.111111       NaN  4.267488       NaN  ...  48.639456   
2012-02-06        NaN   

In [11]:
output_file_path = '/Users/balmeru/Desktop/merged_weekly_pivot_1984_2012.csv'

# Load the CSV file into a DataFrame
df = pd.read_csv(output_file_path)
num_rows, num_columns = df.shape

# Display the result
print(f"The CSV file has {num_rows} rows and {num_columns} columns.")


The CSV file has 1571 rows and 20700 columns.


In [17]:
import pandas as pd

# Load the CSV file into a DataFrame without a header
df = pd.read_csv('/Users/balmeru/Desktop/merged_weekly_pivot_1984_2012.csv', header=None, low_memory =False)

# Convert the first column to datetime format
df[0] = pd.to_datetime(df[0])

# Drop duplicate dates in the first column, keeping the last instance
df_unique = df.drop_duplicates(subset=0, keep='last')

# If you want to reset the index
df_unique.reset_index(drop=True, inplace=True)

# Save the resulting DataFrame to a new CSV file (optional)
df_unique.to_csv('/Users/balmeru/Desktop/merged_WR.csv', index=False, header=False)

# Display the modified DataFrame
print(df_unique)

          0                     1      2                    3      \
0           NaT                 0146A  0153A                0223B   
1    1984-01-02                   NaN    NaN   0.9803921568627638   
2    1984-01-09  -0.35087719298246833    NaN   0.4854368932038833   
3    1984-01-16    0.7042253521126751    NaN  0.48309178743961567   
4    1984-01-23   0.34965034965035446    NaN                  NaN   
...         ...                   ...    ...                  ...   
1499 2012-12-03                   NaN    NaN                  NaN   
1500 2012-12-10                   NaN    NaN                  NaN   
1501 2012-12-17                   NaN    NaN                  NaN   
1502 2012-12-24                   NaN    NaN                  NaN   
1503 2012-12-31                   NaN    NaN                  NaN   

                    4                    5                    6      \
0                   0485B                0491B                3234B   
1      13.004088519663505   -

In [18]:
output_file_path = '/Users/balmeru/Desktop/merged_WR.csv'

# Load the CSV file into a DataFrame
df = pd.read_csv(output_file_path)
num_rows, num_columns = df.shape

# Display the result
print(f"The CSV file has {num_rows} rows and {num_columns} columns.")


The CSV file has 1503 rows and 20700 columns.
