In [314]:
import pandas
import numpy as np
import yfinance as yf
import itertools


# Useful tool

In [315]:
# Define the ticker symbol
tickerSymbol = 'MSFT'

# Get data on this ticker
tickerData = yf.Ticker(tickerSymbol)

# Get the historical prices for this ticker
stocks = tickerData.history(period='1d', start='2021-1-1', end='2023-1-1')

stocks.reset_index(inplace=True)
stocks.Date = pd.to_datetime(stocks.Date).dt.date
stocks.head()

Unnamed: 0,Date,Open,High,Low,Close,Volume,Dividends,Stock Splits
0,2021-01-04,216.710025,217.167734,209.19193,211.996613,37130100,0.0,0.0
1,2021-01-05,211.577865,212.804921,210.058667,212.201126,23823000,0.0,0.0
2,2021-01-06,206.621004,210.828028,206.397024,206.698914,35930700,0.0,0.0
3,2021-01-07,208.442055,213.603444,208.120699,212.580902,27694500,0.0,0.0
4,2021-01-08,212.960726,214.811044,211.353886,213.876144,22956200,0.0,0.0


In [316]:
def cumulative_reward(df, func):
    a = func(df)
    buy_signal = a[0]
    sell_signal = a[1]
    buy_signal = [x for x in buy_signal if x is not np.nan]
    sell_signal = [x for x in sell_signal if x is not np.nan]

    # Assume we buy and sell at the close of the day
    stock_Return = []
    for i in range(0, len(sell_signal)):
        stock_Return.append(sell_signal[i] - buy_signal[i])
    
    return np.sum(stock_Return)

In [317]:
def split_into_array(s):
    # Remove the leading and trailing square brackets and single quotes
    s = s[1:-1]

    # Split the string into a list of sentences
    integers = s.split(", ")

    return integers

def merge_arrays(series):
    return list(itertools.chain(*series.tolist()))

def prepare_dataset(path, columns):
    df = pandas.read_csv(path)
    
    df['Predictions'] = df['Predictions'].apply(split_into_array).apply(np.array).apply(lambda x: x.astype(int))
    df['Positive'] = df['Positive'].apply(split_into_array).apply(np.array).apply(lambda x: x.astype(float))
    df['Negative'] = df['Negative'].apply(split_into_array).apply(np.array).apply(lambda x: x.astype(float))
    df['Neutral'] = df['Neutral'].apply(split_into_array).apply(np.array).apply(lambda x: x.astype(float))
    df['published'] = pd.to_datetime(df['published']).dt.date
    
    columns_df = columns.copy()
    columns_df.insert(0, 'published')
    
    input = df[columns_df]
    input = input.copy()
    input.rename(columns={'published': 'Date'}, inplace=True)
    
    grouped_df = input.groupby('Date')[columns].agg(merge_arrays)    
    grouped_df = pd.merge(grouped_df, stocks[['Date', 'Close']], on='Date', how='left')

    grouped_df = grouped_df.dropna()
    grouped_df = grouped_df.reset_index(drop=True)

    return grouped_df

# Simple positive negative strategys

In [318]:
def buy_sell_sentiment(signal):
    Buy = [np.nan for i in range(0, len(signal))]
    Sell = [np.nan for i in range(0, len(signal))]
    flag = -1

    for i in range(0, len(signal)):
        if signal['Predictions'][i] == 0:  # Positive sentiment
            if flag != 1:
                Buy[i] = signal['Close'][i]
                flag = 1
        elif signal['Predictions'][i] == 1:  # Negative sentiment
            if flag != 0:
                Sell[i] = signal['Close'][i]
                flag = 0
    return (Buy, Sell)

Simple first implementation we look at the sentiment of the news and if it is positive we buy and if it is negative we sell. We will use the sentiment of the news from the previous day to make our decision, by mean of the sentences in a same day.

In [319]:
simple_news_dataset = prepare_dataset("/Users/tony/Desktop/Antoine/Semestre/9.Financial ML/ProjectFinancialNews/data/msft_roberta_output.csv", ['Predictions'])

In [320]:
simple_news_dataset

Unnamed: 0,Date,Predictions,Close
0,2021-01-04,"[2, 0, 2, 2, 2, 0, 2]",211.996613
1,2021-01-05,"[2, 2, 1, 2, 2, 0, 2, 2, 0, 2, 0, 2, 2, 2, 0, ...",212.201126
2,2021-01-06,"[0, 0, 2, 2, 2, 0, 0, 2, 0, 2, 2, 2, 2]",206.698914
3,2021-01-07,"[2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 1, 2, 2, 2, ...",212.580902
4,2021-01-08,"[2, 2, 2, 2, 2, 2, 2, 2]",213.876144
...,...,...,...
498,2022-12-23,"[1, 2, 2, 2, 1, 0, 0, 2, 1, 0, 0, 0, 2, 2, 1, ...",236.631805
499,2022-12-27,"[2, 2, 0, 0, 2, 0]",234.877365
500,2022-12-28,"[0, 0, 2, 2, 2, 0, 0, 0, 0, 0, 1, 1, 2, 2, 2, ...",232.468719
501,2022-12-29,"[2, 0, 2, 2, 2, 0, 1, 2, 2, 2, 2, 1, 2, 1, 1, ...",238.891769


In [321]:
# Python
from collections import Counter

def take_max_count_prediction(lst):
    counts = Counter(lst)
    max_count = max(counts.values())
    modes = [k for k, v in counts.items() if v == max_count]
    
    if len(modes) == 1:
        return modes[0]
    elif len(modes) == 3:
        return 2
    elif 0 in modes and 1 in modes:
        return 2
    elif 0 in modes and 2 in modes:
        return 0
    elif 1 in modes and 2 in modes:
        return 1
    else:
        return 2

simple_news_dataset['Predictions'] = simple_news_dataset['Predictions'].apply(take_max_count_prediction)

In [322]:
simple_news_dataset.head()

Unnamed: 0,Date,Predictions,Close
0,2021-01-04,2,211.996613
1,2021-01-05,2,212.201126
2,2021-01-06,2,206.698914
3,2021-01-07,2,212.580902
4,2021-01-08,2,213.876144


In [323]:
print("Cumulative Return : ", cumulative_reward(simple_news_dataset, buy_sell_sentiment))

Cumulative Return :  52.32212829589844


Now we take the maximum of the different sentences per day between positive and Negative

In [324]:
max_sum_score = prepare_dataset("/Users/tony/Desktop/Antoine/Semestre/9.Financial ML/ProjectFinancialNews/data/msft_roberta_output.csv", ['Positive', 'Negative', 'Neutral'])

In [325]:
max_sum_score.head()

Unnamed: 0,Date,Positive,Negative,Neutral,Close
0,2021-01-04,"[5.761160355177708e-05, 0.7729113101959229, 8....","[8.459905802737921e-05, 0.0005494784563779831,...","[0.9998577833175659, 0.2265392392873764, 0.999...",211.996613
1,2021-01-05,"[6.80618395563215e-05, 7.606166764162481e-05, ...","[6.462157034548e-05, 6.527246296172962e-05, 0....","[0.99986732006073, 0.9998586177825928, 0.00241...",212.201126
2,2021-01-06,"[0.9927652478218079, 0.9984225034713745, 5.471...","[0.00025216813082806766, 0.0001982791873160749...","[0.006982621271163225, 0.0013791749952360988, ...",206.698914
3,2021-01-07,"[7.9099896538537e-05, 5.7219538575736806e-05, ...","[0.0005839336663484573, 0.00021778183872811496...","[0.9993368983268738, 0.9997250437736511, 0.999...",212.580902
4,2021-01-08,"[4.7886129323160276e-05, 4.737492781714536e-05...","[6.148114334791899e-05, 6.220681098056957e-05,...","[0.9998905658721924, 0.9998904466629028, 0.999...",213.876144


We take some threshold only the value greater than this into account, with mean of the day

# Momemntum X News trading strategy

## With MACD

In [5]:
def buy_sell_momentum_sentiment(signal):
    Buy = []
    Sell = []
    flag = -1

    for i in range(0, len(signal)):
        if signal['MACD'][i] > signal['Signal Line'][i] and signal['Sentiment'][i] == 0:  # Positive momentum and sentiment
            Sell.append(np.nan)
            if flag != 1:
                Buy.append(signal['Close'][i])
                flag = 1
            else:
                Buy.append(np.nan)
        elif signal['MACD'][i] < signal['Signal Line'][i] and signal['Sentiment'][i] == 1:  # Negative momentum and sentiment
            Buy.append(np.nan)
            if flag != 0:
                Sell.append(signal['Close'][i])
                flag = 0
            else:
                Sell.append(np.nan)
        else:  # Neutral or conflicting momentum and sentiment
            Buy.append(np.nan)
            Sell.append(np.nan)

    return (Buy, Sell)

## With RSI

In [None]:
def buy_sell_RSI_sentiment(signal, upper=70, lower=30):
    Buy = []
    Sell = []

    for i in range(0, len(signal)):
        if signal['RSI'][i] > upper and signal['Sentiment'][i] == 1:  # Overbought and negative sentiment
            Sell.append(signal['Close'][i])
            Buy.append(np.nan)
        elif signal['RSI'][i] < lower and signal['Sentiment'][i] == 0:  # Oversold and positive sentiment
            Buy.append(signal['Close'][i])
            Sell.append(np.nan)
        else:  # Neutral or conflicting RSI and sentiment
            Buy.append(np.nan)
            Sell.append(np.nan)

    return (Buy, Sell)