In [None]:
import pandas as pd
import numpy as np

def get_most_volatile(prices):
    """Return the ticker symbol for the most volatile stock.
    
    Parameters
    ----------
    prices : pandas.DataFrame
        a pandas.DataFrame object with columns: ['ticker', 'date', 'price']
    
    Returns
    -------
    ticker : string
        ticker symbol for the most volatile stock
    """
    # TODO: Fill in this function.
    prices['log_returns'] = np.log(prices['price']/prices['price'].shift(1))
    most_volatile = None
    highest_vol = 0
    for val in prices['ticker'].values:
        standard_dev = prices[prices['ticker'] == val]['log_returns'].std()
        if standard_dev > highest_vol:
            highest_vol = standard_dev
            most_volatile = val
    
    return most_volatile

In [1]:
#rolling_windows
def calculate_simple_moving_average(rolling_window, close):
    """
    Compute the simple moving average.
    
    Parameters
    ----------
    rolling_window: int
        Rolling window length
    close : DataFrame
        Close prices for each ticker and date
    
    Returns
    -------
    simple_moving_average : DataFrame
        Simple moving average for each ticker and date
    """
    
    return close.rolling(window=rolling_window).mean()

In [2]:


def estimate_volatility(prices, l):
    """Create an exponential moving average model of the volatility of a stock
    price, and return the most recent (last) volatility estimate.
    
    Parameters
    ----------
    prices : pandas.Series
        A series of adjusted closing prices for a stock.
        
    l : float
        The 'lambda' parameter of the exponential moving average model. Making
        this value smaller will cause the model to weight older terms less 
        relative to more recent terms.
        
    Returns
    -------
    last_vol : float
        The last element of your exponential moving averge volatility model series.
    
    """
    
    alpha = 1 - l
    sq_log_returns = (np.log(prices) - np.log(prices.shift(1)))**2
    return np.sqrt(sq_log_returns.ewm(alpha=alpha).mean())[-1]

In [None]:
#GARCH AND ARCH MODELS
#ARMA ARIMA MODELS
#VVIX
#MEAN Reverting Strategy
#USMV
#SPLV

#Normalize momentum signal by some volatility metric

#turtle trading based on volatility

In [None]:
#Engle Granger test
#Get Hedge Ratio from linear Regression
#Calculate Spread and check if spread is stationary
#If spread is stationary, two series are cointegrated
#Use Augmented Dickey Fuller Test to test for stationarity

#short the spread, go short the asset that has increased relative to spread
#go long the spread, go long asset that has decreased relative to spread

In [3]:
def is_spread_stationary(spread, p_level=0.05):
    """
    Parameters
    ----------
    
    spread: obtained from linear combination of two series with a hedge ratio
    
    p_level: level of significance required to reject null hypothesis of non-stationarity
    
    returns:
        True if spread can be considered stationary
        False otherwise
    """
    #TODO: use the adfuller function to check the spread
    adf_result = adfuller(spread)
    
    #get the p-value
    pvalue = adf_result[1]
    
    print(f"pvalue {pvalue:.4f}")
    if pvalue <= p_level:
        print(f"pvalue is <= {p_level}, assume spread is stationary")
        return True
    else:
        print(f"pvalue is > {p_level}, assume spread is not stationary")
        return False

In [4]:
def get_high_lows_lookback(high, low, lookback_days):
    """
    Get the highs and lows in a lookback window.
    
    Parameters
    ----------
    high : DataFrame
        High price for each ticker and date
    low : DataFrame
        Low price for each ticker and date
    lookback_days : int
        The number of days to look back
    
    Returns
    -------
    lookback_high : DataFrame
        Lookback high price for each ticker and date
    lookback_low : DataFrame
        Lookback low price for each ticker and date
    """
    high_lookback = high.shift(1).rolling(lookback_days).max()
    low_lookback = low.shift(1).rolling(lookback_days).min()
    

    return high_lookback, low_lookback

In [5]:
def get_long_short(close, lookback_high, lookback_low):
    """
    Generate the signals long, short, and do nothing.
    
    Parameters
    ----------
    close : DataFrame
        Close price for each ticker and date
    lookback_high : DataFrame
        Lookback high price for each ticker and date
    lookback_low : DataFrame
        Lookback low price for each ticker and date
    
    Returns
    -------
    long_short : DataFrame
        The long, short, and do nothing signals for each ticker and date
    """
    
    signal_df = close.copy()

    signal_df[lookback_low > signal_df] = np.int(-1.0)
    signal_df[lookback_high < signal_df] = np.int(1.0)
    signal_df[~signal_df.isin([-1,1])] = 0
    
    return signal_df.astype(np.int)

In [None]:
#johansen test 
#coefficient1*stock1 + coefficient2*stock2 = spread

In [6]:
def clear_signals(signals, window_size):
    """
    Clear out signals in a Series of just long or short signals.
    
    
    Remove the number of signals down to 1 within the window size time period.
    
    Parameters
    ----------
    signals : Pandas Series
        The long, short, or do nothing signals
    window_size : int
        The number of days to have a single signal       
    
    Returns
    -------
    signals : Pandas Series
        Signals with the signals removed from the window size
    """
    # Start with buffer of window size
    # This handles the edge case of calculating past_signal in the beginning
    clean_signals = [0]*window_size
    
    for signal_i, current_signal in enumerate(signals):
        # Check if there was a signal in the past window_size of days
        has_past_signal = bool(sum(clean_signals[signal_i:signal_i+window_size]))
        # Use the current signal if there's no past signal, else 0/False
        clean_signals.append(not has_past_signal and current_signal)
        
    # Remove buffer
    clean_signals = clean_signals[window_size:]

    # Return the signals as a Series of Ints
    return pd.Series(np.array(clean_signals).astype(np.int), signals.index)


def filter_signals(signal, lookahead_days):
    """
    Filter out signals in a DataFrame.
    
    Parameters
    ----------
    signal : DataFrame
        The long, short, and do nothing signals for each ticker and date
    lookahead_days : int
        The number of days to look ahead
    
    Returns
    -------
    filtered_signal : DataFrame
        The filtered long, short, and do nothing signals for each ticker and date
    """    
    df_short = (signal < 0).astype(int).apply(lambda x: clear_signals(x, lookahead_days))
    df_long = (signal > 0).astype(int).apply(lambda x: clear_signals(x, lookahead_days))
    df_all = df_long  - df_short
        
    return df_all

def filter_signals_alternative1(signal, lookahead_days):
    """
    Filter out signals in a DataFrame.
    
    Parameters
    ----------
    signal : DataFrame
        The long, short, and do nothing signals for each ticker and date
    lookahead_days : int
        The number of days to look ahead
    
    Returns
    -------
    filtered_signal : DataFrame
        The filtered long, short, and do nothing signals for each ticker and date
    """
    
    filtered_signal = signal.copy()
    for ticker, ticker_signals in signal.T.iterrows():
        long_signals = ticker_signals.copy()
        long_signals[long_signals == -1] = 0
        short_signals = ticker_signals.copy()
        short_signals[long_signals == 1] = 0
        filtered_signal[ticker] = clear_signals(long_signals, lookahead_days) + clear_signals(short_signals, lookahead_days)

    return filtered_signal

def filter_signals_alternative2(signal, lookahead_days):
    """
    Filter out signals in a DataFrame.
    
    Parameters
    ----------
    signal : DataFrame
        The long, short, and do nothing signals for each ticker and date
    lookahead_days : int
        The number of days to look ahead
    
    Returns
    -------
    filtered_signal : DataFrame
        The filtered long, short, and do nothing signals for each ticker and date
    """
    
    return (
            signal.replace(-1, 0).apply(lambda x: clear_signals(x, lookahead_days), axis=0) + 
                    signal.replace(1, 0).apply(lambda x: clear_signals(x, lookahead_days), axis=0)
            )

def filter_signals_alternative3(signal, lookahead_days):
    """
    Filter out signals in a DataFrame.
    
    Parameters
    ----------
    signal : DataFrame
        The long, short, and do nothing signals for each ticker and date
    lookahead_days : int
        The number of days to look ahead
    
    Returns
    -------
    filtered_signal : DataFrame
        The filtered long, short, and do nothing signals for each ticker and date
    """
    
    return (
            (signal == 1).replace({True: 1, False: 0}).apply(clear_signals, args=(lookahead_days,)) + 
                (signal == -1).replace({True: -1, False: 0}).apply(clear_signals, args=(lookahead_days,))
           )

In [7]:
def get_lookahead_prices(close, lookahead_days):
    """
    Get the lookahead prices for `lookahead_days` number of days.
    
    Parameters
    ----------
    close : DataFrame
        Close price for each ticker and date
    lookahead_days : int
        The number of days to look ahead
    
    Returns
    -------
    lookahead_prices : DataFrame
        The lookahead prices for each ticker and date
    """
    
    return close.shift(-lookahead_days)

In [8]:
def get_return_lookahead(close, lookahead_prices):
    """
    Calculate the log returns from the lookahead days to the signal day.
    
    Parameters
    ----------
    close : DataFrame
        Close price for each ticker and date
    lookahead_prices : DataFrame
        The lookahead prices for each ticker and date
    
    Returns
    -------
    lookahead_returns : DataFrame
        The lookahead log returns for each ticker and date
    """
    
    return np.log(lookahead_prices) - np.log(close)

In [9]:
def get_signal_return(signal, lookahead_returns):
    """
    Compute the signal returns.
    
    Parameters
    ----------
    signal : DataFrame
        The long, short, and do nothing signals for each ticker and date
    lookahead_returns : DataFrame
        The lookahead log returns for each ticker and date
    
    Returns
    -------
    signal_return : DataFrame
        Signal returns for each ticker and date
    """
    
    return signal*lookahead_returns

In [10]:
from scipy.stats import kstest


def calculate_kstest(long_short_signal_returns):
    """
    Calculate the KS-Test against the signal returns with a long or short signal.
    
    Parameters
    ----------
    long_short_signal_returns : DataFrame
        The signal returns which have a signal.
        This DataFrame contains two columns, "ticker" and "signal_return"
    
    Returns
    -------
    ks_values : Pandas Series
        KS static for all the tickers
    p_values : Pandas Series
        P value for all the tickers
    """
    
    #get the mean and std for the series of returns 
    mu = long_short_signal_returns['signal_return'].mean()
    sigma = long_short_signal_returns['signal_return'].std(ddof=0)
    
    
    ks_values = pd.Series()
    p_values = pd.Series()
    #for each ticker, Test to see where its return is withing the distribution
    grouped = long_short_signal_returns.groupby('ticker')
    for name, group in grouped:
        sample = group['signal_return'].values
        
        test_stat, p_value = kstest(sample, 'norm', (mu, sigma))
        ks_values[name] = test_stat
        p_values[name] = p_value
    

    return ks_values, p_values

def calculate_kstest_alternative1(long_short_signal_returns):
    """
    Calculate the KS-Test against the signal returns with a long or short signal.
    
    Parameters
    ----------
    long_short_signal_returns : DataFrame
        The signal returns which have a signal.
        This DataFrame contains two columns, "ticker" and "signal_return"
    
    Returns
    -------
    ks_values : Pandas Series
        KS static for all the tickers
    p_values : Pandas Series
        P value for all the tickers
    """

    g_mu,g_std = long_short_signal_returns.mean(), long_short_signal_returns.std()

    grp = pd.DataFrame(long_short_signal_returns.groupby('ticker')['signal_return'].apply(list))
    rzlt = pd.DataFrame(grp['signal_return'].map(lambda x: kstest(x, 'norm', args=(g_mu,g_std))))
    rzlt['k'] = rzlt['signal_return'].map(lambda x: x[0])
    rzlt['p'] = rzlt['signal_return'].map(lambda x: x[1])

    return  rzlt['k'], rzlt['p']


In [12]:
def find_outliers(ks_values, p_values, ks_threshold, pvalue_threshold=0.05):
    """
    Find outlying symbols using KS values and P-values
    
    Parameters
    ----------
    ks_values : Pandas Series
        KS static for all the tickers
    p_values : Pandas Series
        P value for all the tickers
    ks_threshold : float
        The threshold for the KS statistic
    pvalue_threshold : float
        The threshold for the p-value
    
    Returns
    -------
    outliers : set of str
        Symbols that are outliers
    """
    tickers = set()
    for i in range(len(ks_values)):
        ticker = ks_values.index[i]
        if ks_values[i] > ks_threshold and p_values[i] < pvalue_threshold:
            tickers.add(ticker)
    return tickers

def find_outliers_alternative1(ks_values, p_values, ks_threshold, pvalue_threshold=0.05):
    """
    Find outlying symbols using KS values and P-values
    
    Parameters
    ----------
    ks_values : Pandas Series
        KS static for all the tickers
    p_values : Pandas Series
        P value for all the tickers
    ks_threshold : float
        The threshold for the KS statistic
    pvalue_threshold : float
        The threshold for the p-value
    
    Returns
    -------
    outliers : set of str
        Symbols that are outliers
    """

    #this is cool, to find the intersection of the two conditions. 
    return set(ks_values[ks_values > ks_threshold].index).intersection(p_values[p_values < pvalue_threshold].index)

