In [2]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_squared_error

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [4]:
sp500 = pd.read_excel('/content/drive/MyDrive/Colab Notebooks/S&P500Index.xlsx')

In [5]:
sp500['Dates'] = pd.to_datetime(sp500['Dates'], dayfirst=True)

In [6]:
start_date = pd.to_datetime('1980-01-01', dayfirst=True)
end_date = pd.to_datetime('2023-12-31', dayfirst=True)
sp500_filtered = sp500[(sp500['Dates'] >= start_date) & (sp500['Dates'] <= end_date)]

In [7]:
sp500_filtered['Pct Change'] = sp500_filtered['PX_LAST'].pct_change() * 100

In [8]:
sp500_filtered

Unnamed: 0,Dates,PX_LAST,Pct Change
0,1980-05-19,107.67,
1,1980-05-20,107.62,-0.046438
2,1980-05-21,107.72,0.092920
3,1980-05-22,109.01,1.197549
4,1980-05-23,110.62,1.476929
...,...,...,...
10858,2023-06-12,4338.93,0.932108
10859,2023-06-13,4369.01,0.693258
10860,2023-06-14,4372.59,0.081941
10861,2023-06-15,4425.84,1.217814


In [9]:
sp500_filtered['Pct Change']

0             NaN
1       -0.046438
2        0.092920
3        1.197549
4        1.476929
           ...   
10858    0.932108
10859    0.693258
10860    0.081941
10861    1.217814
10862   -0.367162
Name: Pct Change, Length: 10863, dtype: float64

In [10]:
sp500_fill= sp500_filtered.fillna(sp500_filtered.mean())

  sp500_fill= sp500_filtered.fillna(sp500_filtered.mean())


In [11]:
sp500_fill

Unnamed: 0,Dates,PX_LAST,Pct Change
0,1980-05-19,107.67,0.040660
1,1980-05-20,107.62,-0.046438
2,1980-05-21,107.72,0.092920
3,1980-05-22,109.01,1.197549
4,1980-05-23,110.62,1.476929
...,...,...,...
10858,2023-06-12,4338.93,0.932108
10859,2023-06-13,4369.01,0.693258
10860,2023-06-14,4372.59,0.081941
10861,2023-06-15,4425.84,1.217814


In [12]:
#Time series

import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor


sp500_filtered = sp500_filtered.fillna(sp500_filtered.mean())

# Convert the Date column to datetime format
sp500_filtered['Dates'] = pd.to_datetime(sp500_filtered['Dates'], dayfirst=True)
print(sp500_filtered)
# Extract the two columns of interest
features = sp500_filtered[['Dates','Pct Change']].copy()
print(features)
target = sp500_filtered['PX_LAST']

# Convert the Date column to numeric representation
features.loc[:, 'Dates'] = features['Dates'].astype(int) // 10**9

# Split the data into training and testing sets
split_index = int(len(features) * 0.8)
train_features, test_features = features.iloc[:split_index], features.iloc[split_index:]
train_target, test_target = target.iloc[:split_index], target.iloc[split_index:]


# Create and train the random forest regressor model
model = RandomForestRegressor(n_estimators=100, random_state=50)  # Adjust the parameters as desired
model.fit(train_features, train_target)

# Generate future predictions using Monte Carlo simulation
num_simulations = 5 # Number of Monte Carlo simulations
num_steps = 25  # Number of future steps to simulate

# Create an array of future dates based on the last date in the dataset
last_date = features['Dates'].iloc[-1]
future_dates = pd.date_range(start=last_date, periods=num_steps+1, inclusive='right').tolist()[1:]

# Convert the future dates to numeric representation
future_features = pd.DataFrame({'Dates': future_dates})
future_features.loc[:, 'Dates'] = future_features['Dates'].astype(int) // 10**9
future_features = future_features.values.reshape(-1, 1)



  sp500_filtered = sp500_filtered.fillna(sp500_filtered.mean())


           Dates  PX_LAST  Pct Change
0     1980-05-19   107.67    0.040660
1     1980-05-20   107.62   -0.046438
2     1980-05-21   107.72    0.092920
3     1980-05-22   109.01    1.197549
4     1980-05-23   110.62    1.476929
...          ...      ...         ...
10858 2023-06-12  4338.93    0.932108
10859 2023-06-13  4369.01    0.693258
10860 2023-06-14  4372.59    0.081941
10861 2023-06-15  4425.84    1.217814
10862 2023-06-16  4409.59   -0.367162

[10863 rows x 3 columns]
           Dates  Pct Change
0     1980-05-19    0.040660
1     1980-05-20   -0.046438
2     1980-05-21    0.092920
3     1980-05-22    1.197549
4     1980-05-23    1.476929
...          ...         ...
10858 2023-06-12    0.932108
10859 2023-06-13    0.693258
10860 2023-06-14    0.081941
10861 2023-06-15    1.217814
10862 2023-06-16   -0.367162

[10863 rows x 2 columns]


In [13]:
# Perform Monte Carlo simulation to generate multiple predictions
future_predictions = []
for i in range(num_simulations):
    # Sample a random noise for each simulation
    random_noise = np.random.normal(size=num_steps)

    # Convert the future dates to timestamps and then numeric representation
    future_dates_timestamp = pd.to_datetime(future_dates)
    future_dates_numeric = (future_dates_timestamp.astype(int) // 10**9).values

    # Ensure that both arrays have the same length
    num_samples = min(len(future_dates_numeric), len(random_noise))
    future_dates_numeric = future_dates_numeric[:num_samples]
    random_noise = random_noise[:num_samples]

    # Create a DataFrame with two columns: Dates and RandomNoise
    future_features_df = pd.DataFrame({'Dates': future_dates_numeric, 'Pct Change': random_noise})

    # Predict using the model for the shuffled future features
    prediction = model.predict(future_features_df)
    future_predictions.append(prediction)

# Now you have the future_predictions array containing Monte Carlo simulated future values
# You can convert it to a DataFrame if needed
future_predictions_df = pd.DataFrame(future_predictions)

print(future_predictions_df)


         0         1         2         3         4         5         6   \
0  107.7745  107.7973  108.3159  108.3220  107.8657  108.2920  107.7745   
1  108.2920  107.8823  107.8643  108.3220  107.7973  107.8823  107.7794   
2  108.3220  108.3220  108.3303  107.7973  107.8184  107.7973  107.7948   
3  107.8657  107.7423  107.7683  107.7745  107.7794  108.3143  107.8289   
4  108.3220  107.8823  107.7745  108.3220  107.7668  107.8692  107.8823   

         7         8         9   ...        14        15        16        17  \
0  108.2920  108.3220  107.7794  ...  107.7973  107.7745  107.8643  107.7745   
1  107.7973  107.7794  107.8643  ...  107.7423  107.7794  108.3303  107.7745   
2  108.2920  107.8823  107.8289  ...  107.7794  108.2920  108.3220  108.3143   
3  107.7794  107.7948  107.7794  ...  108.3143  107.7948  108.3220  107.7745   
4  107.7745  108.3159  107.7794  ...  107.8823  108.2920  108.3220  108.3303   

         18        19        20        21        22        23  
0  1