In [8]:
import pandas as pd

excel_file_path = '/content/sample_data/sample_coil.xlsx'
df = pd.read_excel(excel_file_path)

print(df.columns)

Index(['date', 'forex', 'inflation', 'sp500', 'EPU'], dtype='object')


# Task
Load the data from "/content/sample_data/sample_coil.xlsx", print the data, and then build a Random Forest Regressor model to predict 'forex' using 'date', 'inflation', 'sp500', and 'EPU' as features. Split the data into training and testing sets and evaluate the model.

## Prepare the data

### Subtask:
Convert the 'date' column to a numerical representation suitable for the model and define the features (X) and target (Y).


**Reasoning**:
Convert the 'date' column to datetime objects, then to a numerical representation, and define features and target variables.



In [4]:
df['date'] = pd.to_datetime(df['date'])
df['date'] = df['date'].apply(lambda x: x.toordinal())

X = df[['date', 'inflation', 'sp500', 'EPU']]
Y = df['forex']

print("Features (X) shape:", X.shape)
print("Target (Y) shape:", Y.shape)

Features (X) shape: (365, 4)
Target (Y) shape: (365,)


## Split the data

### Subtask:
Split the data into training and testing sets.


**Reasoning**:
Split the data into training and testing sets using `train_test_split` and print the shapes of the resulting sets.



In [5]:
from sklearn.model_selection import train_test_split

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

print("X_train shape:", X_train.shape)
print("X_test shape:", X_test.shape)
print("Y_train shape:", Y_train.shape)
print("Y_test shape:", Y_test.shape)

X_train shape: (292, 4)
X_test shape: (73, 4)
Y_train shape: (292,)
Y_test shape: (73,)


## Train the model

### Subtask:
Train a Random Forest Regressor model on the training data.


**Reasoning**:
Import RandomForestRegressor, instantiate the model, and train it using the training data.



In [6]:
from sklearn.ensemble import RandomForestRegressor

model = RandomForestRegressor(random_state=42)
model.fit(X_train, Y_train)

## Evaluate the model

### Subtask:
Evaluate the performance of the trained model on the testing data.


**Reasoning**:
Evaluate the performance of the trained model on the testing data by calculating and printing the Mean Squared Error and R-squared score.



In [7]:
from sklearn.metrics import mean_squared_error, r2_score

Y_pred = model.predict(X_test)

mse = mean_squared_error(Y_test, Y_pred)
r2 = r2_score(Y_test, Y_pred)

print(f"Mean Squared Error: {mse}")
print(f"R-squared: {r2}")

Mean Squared Error: 60279.348634246584
R-squared: 0.09517858581279781


In [9]:
import pandas as pd

# Get feature importances from the model
feature_importances = model.feature_importances_

# Create a pandas Series for better visualization
feature_importance_series = pd.Series(feature_importances, index=X_train.columns)

# Sort the features by importance
sorted_feature_importances = feature_importance_series.sort_values(ascending=False)

# Print the feature importances
print("Feature Importances:")
display(sorted_feature_importances)

Feature Importances:


Unnamed: 0,0
EPU,0.400737
date,0.27834
sp500,0.263532
inflation,0.057392
