In [None]:
import pandas as pd
import numpy as np
import yfinance as yf
import itertools

# Useful tool

In [None]:
def get_stock(ticker):
    # Get data on this ticker
    tickerData = yf.Ticker(ticker)

    # Get the historical prices for this ticker
    stocks = tickerData.history(period='1d', start='2021-1-1', end='2023-1-1')

    stocks.reset_index(inplace=True)
    stocks.Date = pd.to_datetime(stocks.Date).dt.date
    return stocks

In [None]:
stocks = get_stock('MSFT')

In [None]:
def cumulative_reward_long(df, func):
    a = func(df)
    buy_signal = a[0]
    sell_signal = a[1]
    buy_signal = [x for x in buy_signal if x is not np.nan]
    sell_signal = [x for x in sell_signal if x is not np.nan]

    # Assume we buy and sell at the close of the day
    stock_Return = []
    for i in range(0, len(sell_signal)):
        stock_Return.append(sell_signal[i] - buy_signal[i])
    
    return np.sum(stock_Return)

In [None]:
def cumulative_reward_short(df, func):
    a = func(df)
    short_signal = a[0]
    sell_signal = a[1]
    short_signal = [x for x in short_signal if x is not np.nan]
    sell_signal = [x for x in sell_signal if x is not np.nan]

    # Assume we sell and then buy at the close of the day
    stock_Return = []
    for i in range(0, len(sell_signal)):
        stock_Return.append(short_signal[i] - sell_signal[i])
    
    return np.sum(stock_Return)

In [None]:
def split_into_array(s):
    # Remove the leading and trailing square brackets and single quotes
    s = s[1:-1]

    # Split the string into a list of sentences
    integers = s.split(", ")

    return integers

def merge_arrays(series):
    return list(itertools.chain(*series.tolist()))

def prepare_dataset(path, columns):
    df = pandas.read_csv(path)
    
    df['Predictions'] = df['Predictions'].apply(split_into_array).apply(np.array).apply(lambda x: x.astype(int))
    df['Positive'] = df['Positive'].apply(split_into_array).apply(np.array).apply(lambda x: x.astype(float))
    df['Negative'] = df['Negative'].apply(split_into_array).apply(np.array).apply(lambda x: x.astype(float))
    df['Neutral'] = df['Neutral'].apply(split_into_array).apply(np.array).apply(lambda x: x.astype(float))
    df['published'] = pd.to_datetime(df['published']).dt.date
    
    columns_df = columns.copy()
    columns_df.insert(0, 'published')
    
    input = df[columns_df]
    input = input.copy()
    input.rename(columns={'published': 'Date'}, inplace=True)
    
    grouped_df = input.groupby('Date')[columns].agg(merge_arrays)    
    grouped_df = pd.merge(grouped_df, stocks[['Date', 'Close']], on='Date', how='left')

    grouped_df = grouped_df.dropna()
    grouped_df = grouped_df.reset_index(drop=True)

    return grouped_df

# Simple positive negative strategys

In [None]:
def buy_sell_sentiment(signal):
    Buy = [np.nan for i in range(0, len(signal))]
    Sell = [np.nan for i in range(0, len(signal))]
    flag = 0

    for i in range(0, len(signal)):
        if signal['Predictions'][i] == 0:  # Positive sentiment
            if flag != 1:
                Buy[i] = signal['Close'][i]
                flag = 1
        elif signal['Predictions'][i] == 1:  # Negative sentiment
            if flag != 0:
                Sell[i] = signal['Close'][i]
                flag = 0
    return (Buy, Sell)

In [None]:
def short_sell_sentiment(signal):
    Short = [np.nan for i in range(0, len(signal))]
    Sell = [np.nan for i in range(0, len(signal))]
    flag = 0

    for i in range(0, len(signal)):
        if signal['Predictions'][i] == 1:  # Negative sentiment
            if flag != 1:
                Short[i] = signal['Close'][i]
                flag = 1
        elif signal['Predictions'][i] == 0:  # Positive sentiment
            if flag != 0:
                Sell[i] = signal['Close'][i]
                flag = 0
    return (Short, Sell)

Simple first implementation we look at the sentiment of the news and if it is positive we buy and if it is negative we sell. We will use the sentiment of the news from the previous day to make our decision, by mean of the sentences in a same day.

In [None]:
simple_news_dataset = prepare_dataset("/Users/tony/Desktop/Antoine/Semestre/9.Financial ML/ProjectFinancialNews/data/msft_roberta_output.csv", ['Predictions'])

In [None]:
simple_news_dataset

In [None]:
from collections import Counter

def take_max_count_prediction(lst):
    counts = Counter(lst)
    max_count = max(counts.values())
    modes = [k for k, v in counts.items() if v == max_count]
    
    if len(modes) == 1:
        return modes[0]
    elif len(modes) == 3:
        return 2
    elif 0 in modes and 1 in modes:
        return 2
    elif 0 in modes and 2 in modes:
        return 0
    elif 1 in modes and 2 in modes:
        return 1
    else:
        return 2

simple_news_dataset['Predictions'] = simple_news_dataset['Predictions'].apply(take_max_count_prediction)

In [None]:
simple_news_dataset.head()

In [None]:
print("Long Cumulative Return : ", cumulative_reward_long(simple_news_dataset, buy_sell_sentiment))
print("Short Cumulative Return : ", cumulative_reward_short(simple_news_dataset, short_sell_sentiment))

Now we take the maximum of the different sentences per day between positive and Negative

In [None]:
max_sum_score = prepare_dataset("/Users/tony/Desktop/Antoine/Semestre/9.Financial ML/ProjectFinancialNews/data/msft_roberta_output.csv", ['Positive', 'Negative', 'Neutral'])
max_sum_score.head()

In [None]:
max_sum_score[['Positive', 'Negative', 'Neutral']] = max_sum_score[['Positive', 'Negative', 'Neutral']].applymap(sum)
max_sum_score.head()

In [None]:
# Python
def take_max_count_prediction(row):
    values = {'Positive': row['Positive'], 'Negative': row['Negative'], 'Neutral': row['Neutral']}
    max_key = max(values, key=values.get)
    
    if max_key == 'Positive':
        return 0
    elif max_key == 'Negative':
        return 1
    else:
        return 2

max_sum_score['Predictions'] = max_sum_score.apply(take_max_count_prediction, axis=1)

In [None]:
max_sum_score.head()

In [None]:
print("Long Cumulative Return : ", cumulative_reward_long(max_sum_score, buy_sell_sentiment))
print("Short Cumulative Return : ", cumulative_reward_short(max_sum_score, short_sell_sentiment))

We perform less with this technique

We take some threshold only the value greater than this into account, with mean of the day

In [None]:
threshold_input = prepare_dataset("/Users/tony/Desktop/Antoine/Semestre/9.Financial ML/ProjectFinancialNews/data/msft_roberta_output.csv", ['Positive', 'Negative', 'Neutral'])
THRESHOLD = 0.5

In [None]:
def replace_below_threshold(arr, threshold):
    return [x for x in arr if x >= threshold]

columns = ['Positive', 'Negative', 'Neutral']

for col in columns:
    threshold_input[col] = threshold_input[col].apply(lambda arr: replace_below_threshold(arr, threshold=THRESHOLD))

In [None]:
threshold_input.head()

In [None]:
threshold_input[['Positive', 'Negative', 'Neutral']] = threshold_input[['Positive', 'Negative', 'Neutral']].applymap(len)

In [None]:
threshold_input.head()

In [None]:
threshold_input['Predictions'] = threshold_input.apply(take_max_count_prediction, axis=1)

In [None]:
threshold_input.head()

In [None]:
print("Long Cumulative Return : ", cumulative_reward_long(threshold_input, buy_sell_sentiment))
print("Short Cumulative Return : ", cumulative_reward_short(threshold_input, short_sell_sentiment))

In [None]:
def threshold_test():
    THRESHOLD_ARRAY = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.85, 0.9, 0.95, 0.99]
    for THRESHOLD in THRESHOLD_ARRAY:
        threshold_input = prepare_dataset("/Users/tony/Desktop/Antoine/Semestre/9.Financial ML/ProjectFinancialNews/data/msft_roberta_output.csv", ['Positive', 'Negative', 'Neutral'])
        
        columns = ['Positive', 'Negative', 'Neutral']
        for col in columns:
            threshold_input[col] = threshold_input[col].apply(lambda arr: replace_below_threshold(arr, threshold=THRESHOLD))        
            
        threshold_input[['Positive', 'Negative', 'Neutral']] = threshold_input[['Positive', 'Negative', 'Neutral']].applymap(len)
        
        threshold_input['Predictions'] = threshold_input.apply(take_max_count_prediction, axis=1)
        
        print("Long Cumulative Return : ", cumulative_reward_long(threshold_input, buy_sell_sentiment))
        print("Short Cumulative Return : ", cumulative_reward_short(threshold_input, short_sell_sentiment))

In [None]:
threshold_test()

Weird, better is the filter the worst is the result

# Momemntum X News trading strategy

We are going to use the better we saw at this stage the Threshold 0.7

## With MACD

## AND need both signals to be positive

In [None]:
def buy_sell_momentum_sentiment(signal):
    Buy = [np.nan for i in range(0, len(signal))]
    Sell = [np.nan for i in range(0, len(signal))]
    flag = 0

    for i in range(0, len(signal)):
        if signal['MACD'][i] > signal['Signal Line'][i] and signal['Predictions'][i] == 0:  # Positive momentum and sentiment
            if flag != 1:
                Buy[i] = signal['Close'][i]
                flag = 1
        elif signal['MACD'][i] < signal['Signal Line'][i] and signal['Predictions'][i] == 1:  # Negative momentum and sentiment
            if flag != 0:
                Sell[i] = signal['Close'][i]
                flag = 0

    return (Buy, Sell)

In [None]:
def short_sell_momentum_sentiment(signal):
    Short = [np.nan for i in range(0, len(signal))]
    Sell = [np.nan for i in range(0, len(signal))]
    flag = 0

    for i in range(0, len(signal)):
        if signal['MACD'][i] < signal['Signal Line'][i] and signal['Predictions'][i] == 1:  # Negative momentum and sentiment
            if flag != 1:
                Short[i] = signal['Close'][i]
                flag = 1
        elif signal['MACD'][i] > signal['Signal Line'][i] and signal['Predictions'][i] == 0:  # Positive momentum and sentiment
            if flag != 0:
                Sell[i] = signal['Close'][i]
                flag = 0

    return (Short, Sell)

In [None]:
stocks = get_stock('MSFT')

In [None]:
# Calculate Short Term Exponential Moving Average
ShortEMA = stocks.Close.ewm(span=12, adjust=False).mean() 
# Calculate Long Term Exponential Moving Average
LongEMA = stocks.Close.ewm(span=26, adjust=False).mean() 
# Calculate MACD line
MACD = ShortEMA - LongEMA
# Calculate Signal line
signal = MACD.ewm(span=9, adjust=False).mean()

In [None]:
stocks['MACD'] = MACD
stocks['Signal Line'] = signal

In [None]:
THRESHOLD = 0.7

MACD_news_input = prepare_dataset("/Users/tony/Desktop/Antoine/Semestre/9.Financial ML/ProjectFinancialNews/data/msft_roberta_output.csv", ['Positive', 'Negative', 'Neutral'])
MACD_news_input = pd.merge(MACD_news_input, stocks[['Date', 'MACD', 'Signal Line']], on='Date', how='left')

columns = ['Positive', 'Negative', 'Neutral']

for col in columns:
    MACD_news_input[col] = MACD_news_input[col].apply(lambda arr: replace_below_threshold(arr, threshold=THRESHOLD))        
    
MACD_news_input[['Positive', 'Negative', 'Neutral']] = MACD_news_input[['Positive', 'Negative', 'Neutral']].applymap(len)

MACD_news_input['Predictions'] = MACD_news_input.apply(take_max_count_prediction, axis=1)

print("Threshold : ", THRESHOLD, " Long Cumulative Return : ", cumulative_reward_long(MACD_news_input, buy_sell_momentum_sentiment))
print("Threshold : ", THRESHOLD, " Short Cumulative Return : ", cumulative_reward_short(MACD_news_input, short_sell_momentum_sentiment))


## OR need one of the signal to be positive

In [None]:
def buy_sell_momentum_sentiment(signal):
    Buy = [np.nan for i in range(0, len(signal))]
    Sell = [np.nan for i in range(0, len(signal))]
    flag = 0

    for i in range(0, len(signal)):
        if signal['MACD'][i] > signal['Signal Line'][i] or signal['Predictions'][i] == 0:  # Positive momentum and sentiment
            if flag != 1:
                Buy[i] = signal['Close'][i]
                flag = 1
        elif signal['MACD'][i] < signal['Signal Line'][i] or signal['Predictions'][i] == 1:  # Negative momentum and sentiment
            if flag != 0:
                Sell[i] = signal['Close'][i]
                flag = 0

    return (Buy, Sell)

In [None]:
def short_sell_momentum_sentiment(signal):
    Short = [np.nan for i in range(0, len(signal))]
    Sell = [np.nan for i in range(0, len(signal))]
    flag = 0

    for i in range(0, len(signal)):
        if signal['MACD'][i] < signal['Signal Line'][i] or signal['Predictions'][i] == 1:  # Negative momentum and sentiment
            if flag != 1:
                Short[i] = signal['Close'][i]
                flag = 1
        elif signal['MACD'][i] > signal['Signal Line'][i] or signal['Predictions'][i] == 0:  # Positive momentum and sentiment
            if flag != 0:
                Sell[i] = signal['Close'][i]
                flag = 0

    return (Short, Sell)

In [None]:
stocks = get_stock('MSFT')

In [None]:
# Calculate Short Term Exponential Moving Average
ShortEMA = stocks.Close.ewm(span=12, adjust=False).mean() 
# Calculate Long Term Exponential Moving Average
LongEMA = stocks.Close.ewm(span=26, adjust=False).mean() 
# Calculate MACD line
MACD = ShortEMA - LongEMA
# Calculate Signal line
signal = MACD.ewm(span=9, adjust=False).mean()

In [None]:
stocks['MACD'] = MACD
stocks['Signal Line'] = signal

In [None]:
THRESHOLD = 0.7

MACD_news_input = prepare_dataset("/Users/tony/Desktop/Antoine/Semestre/9.Financial ML/ProjectFinancialNews/data/msft_roberta_output.csv", ['Positive', 'Negative', 'Neutral'])
MACD_news_input = pd.merge(MACD_news_input, stocks[['Date', 'MACD', 'Signal Line']], on='Date', how='left')

columns = ['Positive', 'Negative', 'Neutral']

for col in columns:
    MACD_news_input[col] = MACD_news_input[col].apply(lambda arr: replace_below_threshold(arr, threshold=THRESHOLD))        
    
MACD_news_input[['Positive', 'Negative', 'Neutral']] = MACD_news_input[['Positive', 'Negative', 'Neutral']].applymap(len)

MACD_news_input['Predictions'] = MACD_news_input.apply(take_max_count_prediction, axis=1)

print("Threshold : ", THRESHOLD, " Long Cumulative Return : ", cumulative_reward_long(MACD_news_input, buy_sell_momentum_sentiment))
print("Threshold : ", THRESHOLD, " Short Cumulative Return : ", cumulative_reward_short(MACD_news_input, short_sell_momentum_sentiment))

## With RSI

## AND need both signals to be positive

In [None]:
def buy_sell_RSI_sentiment(signal, upper=70, lower=30):
    Buy = [np.nan for i in range(0, len(signal))]
    Sell = [np.nan for i in range(0, len(signal))]
    flag = 0

    for i in range(0, len(signal)):
        if signal['RSI'][i] < lower and signal['Predictions'][i] == 0:
            if flag != 1:
                Buy[i] = signal['Close'][i]
                flag = 1
        elif signal['RSI'][i] > upper and signal['Predictions'][i] == 1:
            if flag != 0:
                Sell[i] = signal['Close'][i]
                flag = 0

    return (Buy, Sell)

In [None]:
def short_sell_RSI_sentiment(signal, upper=70, lower=30):
    Short = [np.nan for i in range(0, len(signal))]
    Sell = [np.nan for i in range(0, len(signal))]
    flag = 0

    for i in range(0, len(signal)):
        if signal['RSI'][i] > upper and signal['Predictions'][i] == 1:
            if flag != 1:
                Short[i] = signal['Close'][i]
                flag = 1
        elif signal['RSI'][i] < lower and signal['Predictions'][i] == 0:
            if flag != 0:
                Sell[i] = signal['Close'][i]
                flag = 0

    return (Short, Sell)

In [None]:
def calculate_RSI(data, time_window):
    diff = data.diff(1).dropna()        # diff in one field(one day)

    #this preservers dimensions off diff values
    up_chg = 0 * diff
    down_chg = 0 * diff
    
    # up change is equal to the positive difference, otherwise equal to zero
    up_chg[diff > 0] = diff[ diff>0 ]
    
    # down change is equal to negative deifference, otherwise equal to zero
    down_chg[diff < 0] = diff[ diff < 0 ]

    # we set com=time_window-1 so we get decay alpha=1/time_window
    up_chg_avg   = up_chg.ewm(com=time_window-1 , min_periods=time_window).mean()
    down_chg_avg = down_chg.ewm(com=time_window-1 , min_periods=time_window).mean()
    
    rs = abs(up_chg_avg/down_chg_avg)
    rsi = 100 - 100/(1+rs)
    return rsi

stocks = get_stock('MSFT')

stocks['RSI'] = calculate_RSI(stocks['Close'], 14)

In [None]:
THRESHOLD = 0.7

RSI_news_input = prepare_dataset("/Users/tony/Desktop/Antoine/Semestre/9.Financial ML/ProjectFinancialNews/data/msft_roberta_output.csv", ['Positive', 'Negative', 'Neutral'])
RSI_news_input = pd.merge(RSI_news_input, stocks[['Date', 'RSI']], on='Date', how='left')

columns = ['Positive', 'Negative', 'Neutral']

for col in columns:
    RSI_news_input[col] = RSI_news_input[col].apply(lambda arr: replace_below_threshold(arr, threshold=THRESHOLD))        
    
RSI_news_input[['Positive', 'Negative', 'Neutral']] = RSI_news_input[['Positive', 'Negative', 'Neutral']].applymap(len)

RSI_news_input['Predictions'] = RSI_news_input.apply(take_max_count_prediction, axis=1)

print("Threshold : ", THRESHOLD, " Long Cumulative Return : ", cumulative_reward_long(RSI_news_input, buy_sell_RSI_sentiment))
print("Threshold : ", THRESHOLD, " Short Cumulative Return : ", cumulative_reward_short(RSI_news_input, short_sell_RSI_sentiment))

## OR need one of the signal to be positive

In [None]:
def buy_sell_RSI_sentiment(signal, upper=70, lower=30):
    Buy = [np.nan for i in range(0, len(signal))]
    Sell = [np.nan for i in range(0, len(signal))]
    flag = -1

    for i in range(0, len(signal)):
        if signal['RSI'][i] < lower or signal['Predictions'][i] == 0:
            if flag != 1:
                Buy[i] = signal['Close'][i]
                flag = 1
        elif signal['RSI'][i] > upper or signal['Predictions'][i] == 1:
            if flag != 0:
                Sell[i] = signal['Close'][i]
                flag = 0

    return (Buy, Sell)

In [None]:
def short_sell_RSI_sentiment(signal, upper=70, lower=30):
    Short = [np.nan for i in range(0, len(signal))]
    Sell = [np.nan for i in range(0, len(signal))]
    flag = 0

    for i in range(0, len(signal)):
        if signal['RSI'][i] > upper or signal['Predictions'][i] == 1:
            if flag != 1:
                Short[i] = signal['Close'][i]
                flag = 1
        elif signal['RSI'][i] < lower or signal['Predictions'][i] == 0:
            if flag != 0:
                Sell[i] = signal['Close'][i]
                flag = 0

    return (Short, Sell)

In [None]:
THRESHOLD = 0.7

RSI_news_input = prepare_dataset("/Users/tony/Desktop/Antoine/Semestre/9.Financial ML/ProjectFinancialNews/data/msft_roberta_output.csv", ['Positive', 'Negative', 'Neutral'])
RSI_news_input = pd.merge(RSI_news_input, stocks[['Date', 'RSI']], on='Date', how='left')

columns = ['Positive', 'Negative', 'Neutral']

for col in columns:
    RSI_news_input[col] = RSI_news_input[col].apply(lambda arr: replace_below_threshold(arr, threshold=THRESHOLD))        
    
RSI_news_input[['Positive', 'Negative', 'Neutral']] = RSI_news_input[['Positive', 'Negative', 'Neutral']].applymap(len)

RSI_news_input['Predictions'] = RSI_news_input.apply(take_max_count_prediction, axis=1)

print("Threshold : ", THRESHOLD, " Long Cumulative Return : ", cumulative_reward_long(RSI_news_input, buy_sell_RSI_sentiment))
print("Threshold : ", THRESHOLD, " Short Cumulative Return : ", cumulative_reward_short(RSI_news_input, short_sell_RSI_sentiment))