# Stocks Number Analysis

In [None]:
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
%matplotlib inline

import plotly.express as px

from pandas.tseries.holiday import USFederalHolidayCalendar

import numpy as np
import pandas as pd
import seaborn as sns
from pathlib import Path

ROOT = Path('../../stabilizing_volatility')
DATA = Path('G:/UNIPA/ECONOFISICA/DATA/BLOOMBERG')

In [None]:
def read_raw_data(market):
    raw_data = DATA / f'{market}.csv'
    df = pd.read_csv(raw_data,
                     index_col=0,
                     skiprows=[1],
                     sep=';',
                     decimal=',',
                     parse_dates=True,
                     infer_datetime_format=True,
                     na_values=['#N/D'])
    df.rename(columns=lambda x: x.replace(f' {market} Equity', ''), inplace=True)
    df.index.name = 'Day'
    # Order columns by number of values
    ordered_columns_list = df.isnull().sum().sort_values().index
    df = df.loc[:, ordered_columns_list]
    return df

def manipulate_dataframe(df, simple_index=False):
    # Order columns by number of values
    ordered_columns_list = df.isnull().sum().sort_values().index
    df = df.loc[:, ordered_columns_list]
    # Select business days
    business_days = pd.bdate_range(df.index[0], df.index[-1])
    df = df.loc[business_days]
    # Convert index format for easier visualization
    if simple_index:
        df.index = pd.to_datetime(df.index).strftime("%Y")
    return df

def make_plot(df, market):
    fig, ax = plt.subplots(figsize=(16, 8))
    sns.heatmap(df.isnull().T,
                yticklabels=False,
                xticklabels=365,
                cmap='gray',
                cbar=False,
                ax=ax)
    ax.set_title(f'Missing data in {market} market', fontsize=24)
    ax.set_ylabel('Stocks', fontsize=24)
    ax.set_xlabel('Days', fontsize=24)
    ax.grid(visible=True, ls='--')
    return ax

def make_miniplot(df, height=4):
    df_to_plot = df.copy()
    df_to_plot.index = pd.to_datetime(df_to_plot.index).strftime("%Y")
    fig, ax = plt.subplots(figsize=(16, height))
    sns.heatmap(df_to_plot.isnull().T,
                xticklabels=365,
                cmap='gray',
                cbar=False,
                ax=ax)
    ax.grid(visible=True, ls='--')
    return ax

def normalize_data(df):
    return df / df.max()

In [None]:
market = 'GF'
df = read_raw_data(market)
df = manipulate_dataframe(df)
display(df.index[0])
display(df.index[-1])
display(df.shape)

make_plot(manipulate_dataframe(df), market)

In [None]:
dummy = normalize_data(df.iloc[-10000:-400, :100])
make_miniplot(dummy)

## Create a Ten-Year-Long Window

In [None]:
window_delta = pd.tseries.offsets.DateOffset(years=10)
window_start = dummy.first_valid_index()
window_end = window_start + window_delta
window = pd.Interval(window_start, window_end)
window

In [None]:
ax = make_miniplot(dummy, height=8)
ax.add_patch(Rectangle((4000, -10), 3200, 115,
                       edgecolor = 'red',
                       facecolor = 'red',
                       fill=True,
                       alpha=0.2,
                       lw=2))

## Slide the Window and Count Stocks

In [None]:
slide = window
shift = pd.Timedelta('30d')
stock_count = dict()
count = 0
while slide.right < dummy.last_valid_index():
    slide = slide + shift
    sliced_stocks = dummy.loc[slide.left : slide.right]
    prc_data = sliced_stocks.count() / len(sliced_stocks)
    # Select and count stocks with more than 90% of data in slide
    selected_stocks = sliced_stocks.loc[:, prc_data > 0.9].columns
    if len(selected_stocks) != count:
        count = len(selected_stocks)
        stock_count[slide] = count, selected_stocks.values
stock_count = pd.DataFrame.from_dict(stock_count, orient='index', columns=['Count', 'Stocks'])
stock_count

In [None]:
for interval, stocks in stock_count.iloc[-5: ].iterrows():
    fig = px.imshow(dummy.isnull().T.astype(int),
                    aspect="auto",
                    color_continuous_scale='PuBu_r',
                    title=f'N = {stocks["Count"]}')
    fig.add_vrect(x0=interval.left, x1=interval.right,
                  fillcolor="red", opacity=0.5,
                  line_width=0)
    for column in stocks['Stocks']:
        y = dummy.columns.get_loc(column)
        fig.add_shape(type="rect",
                      x0 = interval.left, x1 = interval.right,
                      y0 = y - 0.5, y1 = y + 0.5,
                      fillcolor="yellow", opacity=0.5,
                      line_width = 0)
    fig.show()

In [None]:
slide = window
shift = pd.Timedelta('30d')
stock_count = dict()
count = 0
delta = pd.Timedelta('5d')
while slide.right < dummy.last_valid_index():
    slide = slide + shift
    sliced_stocks = dummy.loc[slide.left : slide.right]
    left_distance = sliced_stocks.apply(pd.Series.first_valid_index) - slide.left
    right_distance = slide.right - sliced_stocks.apply(pd.Series.last_valid_index)
    selected_stocks = sliced_stocks.loc[:, (left_distance < delta) & (right_distance < delta)].columns
    if len(selected_stocks) != count:
        count = len(selected_stocks)
        stock_count[slide] = count, selected_stocks.values
stock_count = pd.DataFrame.from_dict(stock_count, orient='index', columns=['Count', 'Stocks'])
stock_count

In [None]:
for interval, stocks in stock_count.iloc[138:].iterrows():
    fig = px.imshow(dummy.isnull().T.astype(int),
                    aspect="auto",
                    color_continuous_scale='PuBu_r',
                    title=f'N = {stocks["Count"]}')
    fig.add_vrect(x0=interval.left, x1=interval.right,
                  fillcolor="red", opacity=0.5,
                  line_width=0)
    for column in stocks['Stocks']:
        y = dummy.columns.get_loc(column)
        fig.add_shape(type="rect",
                      x0 = interval.left, x1 = interval.right,
                      y0 = y - 0.5, y1 = y + 0.5,
                      fillcolor="yellow", opacity=0.5,
                      line_width = 0)
    fig.show()