# libraries

In [68]:
# standard
import pandas as pd
import numpy as np
from tqdm import tqdm
from datetime import datetime, date
from collections import defaultdict, Counter
import json
from concurrent.futures import ThreadPoolExecutor, as_completed

# graphs
import matplotlib.pyplot as plt
import seaborn as sns

# load data

In [69]:
market_cap_df = pd.read_csv('../data/market_cap.csv').T
closing_prices_df = pd.read_csv('../data/closing_prices.csv').T
relative_changes_df = pd.read_csv('../data/relative_pricechanges.csv')
# rename columns
market_cap_df.columns = market_cap_df.loc['Quarter']
closing_prices_df.columns = closing_prices_df.loc['Unnamed: 0']
# Drop the 'Quarter' row
market_cap_df = market_cap_df.drop('Quarter').sort_index()
closing_prices_df = closing_prices_df.drop('Unnamed: 0').sort_index()

relative_changes_df.index = relative_changes_df['Unnamed: 0']
relative_changes_df = relative_changes_df.drop('Unnamed: 0', axis=1).sort_index()

# add market_cap to closing prices df
closing_prices_df['market_cap'] = relative_changes_df['market_cap']

In [70]:
test = closing_prices_df.drop('market_cap', axis=1).iloc[:,::-1].ffill().pct_change(periods=-1, axis = 1).iloc[:,::-1]

In [71]:
test.head()

Unnamed: 0,2010-01-04,2010-01-05,2010-01-06,2010-01-07,2010-01-08,2010-01-11,2010-01-12,2010-01-13,2010-01-14,2010-01-15,...,2023-10-16,2023-10-17,2023-10-18,2023-10-19,2023-10-20,2023-10-23,2023-10-24,2023-10-25,2023-10-26,2023-10-27
1COV,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,-0.026784,0.005462,-0.021521,0.01687,0.00567,0.025266,0.00835,-0.01535,-0.001846,-0.033087
2GB,,0.030744,0.045087,-0.031195,0.067595,0.080214,-0.012871,-0.057172,0.042553,-0.05,...,-0.01559,0.015837,-0.011136,-0.009009,-0.034091,0.0,0.014118,-0.025522,0.014286,-0.00939
2HRA,,0.0,0.003272,-0.011742,-0.039274,-0.010649,-0.044097,0.014166,0.058739,-0.030108,...,-0.002083,0.025052,-0.01222,0.010309,-0.006122,0.002053,-0.002049,0.002053,-0.006148,-0.006186
4DS,,0.006608,0.056455,0.026926,-0.006454,-0.009338,-0.002049,-0.014374,-0.01875,-0.014862,...,-0.004914,-0.002469,0.002475,-0.012346,0.0275,-0.026764,0.0,-0.04,-0.005208,-0.010471
5UH,,0.006608,0.056455,0.026926,-0.006454,-0.009338,-0.002049,-0.014374,-0.01875,-0.014862,...,0.016552,-0.015604,0.006203,-0.058904,-0.008734,-0.012482,0.04684,-0.012074,0.010784,-0.036273


# size effect

In [72]:
def relative_prices_daily(data, interval_size):
    # Step 1: Calculate Daily Relative Price Changes
    df = data.drop('market_cap', axis=1).iloc[:,::-1].ffill().pct_change(periods=-1, axis = 1).iloc[:,::-1]

    # Step 2: Identify Outliers
    std_dev = df.std()
    mean = df.mean()
    cutoff = std_dev * interval_size

    lower_bound = mean - cutoff
    upper_bound = mean + cutoff

    # Step 3: Interpolate Outliers
    for column in df.columns:
        outliers = (df[column] < lower_bound[column]) | (df[column] > upper_bound[column])
        df[column][outliers] = np.nan
        # Ensure the data type is numeric for interpolation
        df[column] = pd.to_numeric(df[column], errors='coerce')
        df[column].interpolate(method='linear', inplace=True)

    # Avoid DataFrame fragmentation by creating a new DataFrame
    df = pd.concat([df, data[['market_cap']]], axis=1)

    return df 

relative_changes_daily_df = relative_prices_daily(closing_prices_df, 3)

In [8]:
relevant_quarters = ['2015-03-31', '2020-09-30', '2021-03-31', '2023-03-31']
plot_data = []

def plot_relevant_quarters(data, quarters):
    for quarter in quarters:
        # Find the index of the quarter and get the indices of the last three months
        quarter_index = data.columns.get_loc(quarter)
        start_index = max(0, quarter_index - 21)  # Ensure it doesn't go below 0

        # Slicing the DataFrame for the last three months
        sliced_df = data.iloc[:, start_index:quarter_index + 1]

        for market_cap_category in ['Small-Cap', 'Micro-Cap', 'Large-Cap']:
            category_df = sliced_df[sliced_df['market_cap'] == market_cap_category]

            # Calculate average, min, and max price change
            avg_change = category_df.drop(columns=['market_cap']).mean(axis=1)
            min_change = category_df.drop(columns=['market_cap']).min(axis=1).min()
            max_change = category_df.drop(columns=['market_cap']).max(axis=1).max()

            plot_data.append({'Quarter': quarter, 'Market_Cap': market_cap_category, 
                          'Average_Change': avg_change, 'Min_Change': min_change, 'Max_Change': max_change})

    # Create a DataFrame for plotting
    plot_df = pd.DataFrame(plot_data)

    # Plotting
    plt.figure(figsize=(10, 6))
    sns.pointplot(data=plot_df, x='Quarter', y='Average_Change', hue='Market_Cap', 
              palette='deep', markers=['o', 's', 'x'], join=False)

    # Adding error bars for bounds
    for i, row in plot_df.iterrows():
        plt.errorbar(x=row['Quarter'], y=row['Average_Change'], 
                    yerr=[[row['Average_Change'] - row['Min_Change']], [row['Max_Change'] - row['Average_Change']]], 
                    fmt='none', capsize=5, color='gray')

    plt.title('Average Price Changes with Bounds by Market Cap Category')
    plt.xlabel('Quarter')
    plt.ylabel('Average Price Change')
    plt.xticks(rotation=45)
    plt.tight_layout()

    # Save the figure
    plt.savefig('../graphs/average_price_changes.png', dpi=300)

    plt.show()

plot_relevant_quarters(closing_prices_changes_daily_df, relevant_quarters)


KeyError: 'market_cap'

# Test Area

In [121]:
quarters = ['2015-03-31', '2020-09-30', '2021-03-31', '2023-03-31']
data = relative_changes_daily_df.copy()

for quarter in quarters:
        # Find the index of the quarter and get the indices of the last three months
    quarter_index = data.columns.get_loc(quarter)
    start_index = max(0, quarter_index - 63)  # Ensure it doesn't go below 0

        # Slicing the DataFrame for the last three months
    sliced_df = data.iloc[:, np.r_[start_index:quarter_index + 1, -1]]
    plot_data = pd.DataFrame(columns=sliced_df.columns)

    for market_cap_category in ['Small-Cap', 'Micro-Cap', 'Large-Cap']:
            category_df = sliced_df[sliced_df['market_cap'] == market_cap_category]

            # Calculate average, min, and max price change
            avg_company = category_df.drop(columns=['market_cap']).mean(axis=0)
            total_returns = (category_df.iloc[:,:-1] + 1).prod(axis = 1) - 1
            max_company = category_df.loc[total_returns.idxmax(),][:-1]
            min_company = category_df.loc[total_returns.idxmin(),][:-1]

            plot_df = pd.DataFrame(columns=sliced_df.columns)
            plot_df.loc[f"{market_cap_category}_avg"] = avg_company
            plot_df.loc[f"{total_returns.idxmax()}"] = max_company
            plot_df.loc[f"{total_returns.idxmin()}"] = min_company
            plot_df.loc[:,'market_cap'] = market_cap_category

            plot_data = pd.concat([plot_data, plot_df])

  plot_data = pd.concat([plot_data, plot_df])
  plot_data = pd.concat([plot_data, plot_df])
  plot_data = pd.concat([plot_data, plot_df])
  plot_data = pd.concat([plot_data, plot_df])


In [122]:
plot_data

Unnamed: 0,2023-01-03,2023-01-04,2023-01-05,2023-01-06,2023-01-09,2023-01-10,2023-01-11,2023-01-12,2023-01-13,2023-01-16,...,2023-03-21,2023-03-22,2023-03-23,2023-03-24,2023-03-27,2023-03-28,2023-03-29,2023-03-30,2023-03-31,market_cap
Small-Cap_avg,0.006181,0.014755,0.001127,0.008531,0.013296,-0.003386,0.005298,0.006684,0.00905,0.005769,...,0.009413,-0.008404,0.007726,-0.015642,0.006382,-0.006178,0.006906,0.012441,0.004792,Small-Cap
DEZ,-0.000487,0.019006,0.02439,0.004202,0.035332,0.010328,0.0,0.009778,0.002201,0.004831,...,0.045863,-0.003439,0.008628,-0.035928,0.01331,0.0,0.014886,0.031061,0.0,Small-Cap
GTY,0.009804,0.024272,0.004739,-0.023585,-0.019324,-0.073892,-0.015957,-0.032432,-0.01676,-0.011364,...,-0.01875,0.012739,0.006289,-0.0125,-0.012658,-0.025641,-0.013158,0.073333,0.012422,Small-Cap
Micro-Cap_avg,0.006066,0.008691,-0.001102,0.005748,0.010498,-0.000408,0.007607,0.002328,0.002729,0.004049,...,0.008297,0.001821,-0.002971,-0.007059,0.003002,-0.003392,0.003606,0.006014,0.000909,Micro-Cap
ELG,0.00565,0.016854,0.003683,0.056881,0.045139,0.0299,-0.032258,-0.008333,0.030252,-0.013051,...,0.012151,0.046819,0.005734,-0.017104,0.025522,-0.040724,0.021226,0.018476,0.014739,Micro-Cap
AAQ1,0.0,0.0,0.0,-0.043478,0.0,-0.045455,0.027408,0.003435,-0.015038,-0.003485,...,0.019022,-0.024096,-0.111111,-0.034722,0.007194,0.0,-0.007143,0.0,0.007194,Micro-Cap
Large-Cap_avg,0.010373,0.022585,0.003953,0.007939,0.009297,-0.00394,0.015514,0.007504,-0.002235,0.008089,...,0.01928,0.002403,-0.002496,-0.019507,0.014875,0.001056,0.012425,0.013885,0.007672,Large-Cap
LHA,0.010165,0.030692,0.01733,-0.001799,0.012138,-0.006768,0.005499,0.016169,0.024921,0.005251,...,0.020977,-0.005136,-0.003543,-0.048359,0.013024,-0.005058,0.02182,0.031409,0.031156,Large-Cap
VNA,0.017904,0.054054,-0.017501,0.01367,0.036371,-0.005915,0.06545,0.008935,-0.001845,0.036599,...,-0.023152,-0.046054,-0.011293,-0.047116,-0.001498,-0.060324,0.056531,0.050181,-0.001727,Large-Cap
