# Import packages 

In [1]:
import numpy as np
import pandas as pd
import statistics  

# Create list of monthly dates

In [2]:
def list_monthly_dates(first_year, last_year):
    # Gather each yyyy-mm
    all_dates = []
    # Loop over each yyyy-mm
    for year in range(first_year, last_year+1):
        for month in range(1, 12+1):
            if len(str(month)) == 2:
                date = f"{month}/{year}"
                all_dates.append(date)
            else:
                date = f"0{month}/{year}"
                all_dates.append(date)
    return all_dates

# Extract Monthly Features From Daily Series

In [3]:
def extract_features(first_year, last_year, file_names):
    # Import list with all months (mm-yyyy) 
    dates = list_monthly_dates(first_year, last_year)
    n_months = len(dates)
    
    n_indicators = len(file_names)
    n_stats = 6 # (1) mean, (2) st. dev, (3) kurtosis, (4) skew, (5) 25th perentile, (6) 75th percentile
    
    all_results_array = np.zeros((n_months, n_indicators * n_stats)) 
    
    column = 0
    for file in file_names:
        df = pd.read_csv("Generated series/Stationary only/Daily/" + file + ".csv")    
        np.zeros((n_months, n_stats))
        row = 0
        print(file)
        for mm_yyyy in dates:
            daily_data_df = df[df['DATE'].str.contains(mm_yyyy)]
            daily_data_list = daily_data_df[file].tolist()

            # Add for stock
            all_results_array[row, column] = statistics.mean(daily_data_list)
            all_results_array[row, column+1] = statistics.stdev(daily_data_list)
            all_results_array[row, column+2] = kurtosis(daily_data_list)
            all_results_array[row, column+3] = skew(daily_data_list)
            daily_data_list.sort() 
            all_results_array[row, column+4] = np.percentile(daily_data_list, 25)
            all_results_array[row, column+5] = np.percentile(daily_data_list, 75)
            row += 1
        column += 6
    # Create column names                    
    column_names = []
    for file in file_names:
        column_names.append("Mean (" + file + ")")
        column_names.append("Standard dev. (" + file + ")")
        column_names.append("Kurtosis (" + file + ")")
        column_names.append("Skew (" + file + ")")
        column_names.append("25th percentile (" + file + ")")
        column_names.append("75th percentile (" + file + ")")

    all_results = pd.DataFrame(all_results_array, columns=column_names, index=dates)
    all_results.to_csv("All features/Monthly Extracted Features.csv", index=True)
    return all_results

# Call function

In [None]:
file_names = ["S&P500", "BAMLCC0A1AAATRIV", "USD-GBP", "3M T-Bill", "DGS2", "DGS5", "DGS10", "TEDRATE", "T10Y3M", "Crude Oil"]
extract_features(first_year=1990, last_year=2021, file_names=file_names)