IBEX35 Forecasting

In [1]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor as RFR

"""
Initialize datasets
"""

df_market = pd.read_csv('datasets/train.csv')
df_tweets = pd.read_csv('datasets/tweets_from2015_#Ibex35.csv')
df_test = pd.read_csv('datasets/test_x.csv')

In [2]:
df_market.shape

(6554, 8)

In [3]:
df_market.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,Target
0,1994-01-03,3615.199951,3654.699951,3581.0,3654.5,3654.496338,0.0,0
1,1994-01-04,3654.5,3675.5,3625.100098,3630.300049,3630.296387,0.0,1
2,1994-01-05,3625.199951,3625.199951,3583.399902,3621.199951,3621.196289,0.0,1
3,1994-01-06,,,,,,,0
4,1994-01-07,3621.199951,3644.399902,3598.699951,3636.399902,3636.39624,0.0,1


In [4]:
df_tweets.shape

(9801, 3)

In [5]:
df_tweets.head()

Unnamed: 0,tweetDate,handle,text
0,Sat Apr 09 14:47:45 +0000 2022,abelac62,He hecho el repaso de todos los componentes de...
1,Thu Apr 07 19:14:36 +0000 2022,LluisPerarnau,Els projectes que han presentat les empreses d...
2,Mon Apr 04 16:48:45 +0000 2022,Pegaso121080,"Por si no lo has visto, o no lo encuentras en ..."
3,Tue Apr 05 07:23:16 +0000 2022,zonavalue,📈 #BOLSA: El #Ibex35 abre en 🟢 \n\n🇪🇸 #Ibex35 ...
4,Thu Mar 31 16:07:43 +0000 2022,EPeconomia,"El #Ibex35 retrocede un 0,4% en marzo y un 3,0..."


In [6]:
print(df_market.isnull().sum()) # amount of days without register

Date           0
Open         133
High         133
Low          133
Close        133
Adj Close    133
Volume       133
Target         0
dtype: int64


In [7]:
null_map = df_market.isnull()

def get_pos_no_null_forward(null_map, column, i, pos_forward):
    while null_map[column][i + pos_forward] == True:
        pos_forward += 1
    return pos_forward

def get_pos_no_null_backward(null_map, column, i, pos_backward):
    while null_map[column][i - pos_backward] == True:
        pos_backward += 1
    return pos_backward

columns = df_market.columns

for column in columns:
    if column != 'Date' and column != 'Target':
        for i in range(0, len(null_map)):
            pos_backward = 1
            pos_forward = 1
            if null_map[column][i] == True:
                pos_backward = get_pos_no_null_backward(null_map, column, i, pos_backward)
                pos_forward = get_pos_no_null_forward(null_map, column, i, pos_forward)
                df_market.at[i, column] = (df_market[column][i - pos_backward] + df_market[column][i + pos_forward]) / 2

In [8]:
print(df_market.isnull().sum())

Date         0
Open         0
High         0
Low          0
Close        0
Adj Close    0
Volume       0
Target       0
dtype: int64


In [9]:
x = pd.DataFrame()

x['HL'] = df_market['High'] - df_market['Low']
x['OC'] = df_market['Open'] - df_market['Close']
x['7MA'] = df_market['Close'].rolling(window=7, min_periods=1).mean()
x['14MA'] = df_market['Close'].rolling(window=14, min_periods=1).mean()
x['21MA'] = df_market['Close'].rolling(window=21,min_periods=1).mean()
x['STDV'] = df_market['Close'].rolling(window=7, min_periods=1).std()
x.at[0, 'STDV'] = x['STDV'][1]

In [10]:
model = RFR(oob_score = True)

df_test = pd.read_csv('datasets/test_x.csv')

print (df_test)

model.fit(x, df_market['Close'])

x_test = pd.DataFrame()

x_test['HL'] = df_test['High'] - df_test['Low']
x_test['OC'] = df_test['Open'] - df_test['Close']
x_test['7MA'] = df_test['Close'].rolling(window=7, min_periods=1).mean()
x_test['14MA'] = df_test['Close'].rolling(window=14, min_periods=1).mean()
x_test['21MA'] = df_test['Close'].rolling(window=21,min_periods=1).mean()
x_test['STDV'] = df_test['Close'].rolling(window=7, min_periods=1).std()
x_test.at[0, 'STDV'] = x_test['STDV'][1]


     test_index        Date         Open         High          Low  \
0          6557  2019-06-05  9136.799805  9173.400391  9095.000000   
1          6558  2019-06-06  9169.200195  9246.200195  9136.700195   
2          6559  2019-06-07  9186.700195  9261.400391  9185.700195   
3          6560  2019-06-10  9284.200195  9302.200195  9248.099609   
4          6561  2019-06-11  9288.599609  9332.500000  9273.400391   
..          ...         ...          ...          ...          ...   
721        7278  2022-03-25  8314.099609  8363.200195  8286.500000   
722        7279  2022-03-28  8354.400391  8485.700195  8354.400391   
723        7280  2022-03-29  8451.000000  8621.000000  8419.700195   
724        7281  2022-03-30  8583.299805  8597.400391  8508.900391   
725        7282  2022-03-31  8562.599609  8588.299805  8445.099609   

           Close    Adj Close       Volume  
0    9150.500000  9150.500000  158753000.0  
1    9169.200195  9169.200195  212720900.0  
2    9236.099609  9236.0

In [11]:
y_prediction = model.predict(x_test)

In [21]:
result_csv = pd.DataFrame()

result_csv['test_index'] = df_test['test_index']

In [22]:
def get_target_values(df_test, predicted_y):
    result = []
    for i in range(0, len(df_test)):
        if df_test['Close'][i] > predicted_y[i]:
            result.append(1)
        else:
            result.append(0)
    return result

In [23]:
result_csv['Target'] = get_target_values(df_test, y_prediction)

In [28]:
result_csv.to_csv('predictions.csv', index=False)

     test_index  Target
0          6557       0
1          6558       0
2          6559       1
3          6560       1
4          6561       1
..          ...     ...
721        7278       0
722        7279       1
723        7280       1
724        7281       1
725        7282       1

[726 rows x 2 columns]


In [43]:
result_json = pd.DataFrame(result_csv['Target'])
result_json.set_index(result_csv['test_index'], drop=True, append=False, inplace=False, verify_integrity=False)
result_json.to_json('predictions.json')