In [None]:
# importing modules
import os
import pandas as pd
import numpy as np
import joblib as pkl
import seaborn as sns
import itertools
import concurrent.futures
import matplotlib.pyplot as plt
from sklearn.svm import SVR
from statsmodels.tsa.api import VAR
from sklearn.tree import DecisionTreeRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.linear_model import LinearRegression
from statsmodels.tsa.statespace.varmax import VARMAX
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.multioutput import MultiOutputRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from tensorflow.keras.models import Sequential,load_model
from tensorflow.keras.layers import     Dense, Dropout, BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping,ReduceLROnPlateau
from tensorflow.keras.layers import LSTM

In [None]:
df = pd.read_csv('final_df.csv')
df

In [None]:
df = df.drop(columns=['Unnamed: 0'])
df

In [None]:
df = df.apply(pd.to_numeric, errors='coerce')

In [None]:
X = df.drop(columns=['NATURAL GAS_Percent_Change', 'GOLD_Percent_Change',
       'WTI CRUDE_Percent_Change', 'BRENT CRUDE_Percent_Change',
       'SOYBEANS_Percent_Change', 'CORN_Percent_Change',
       'COPPER_Percent_Change', 'ALUMINIUM_Percent_Change',
       'ZINC_Percent_Change', 'NICKEL_Percent_Change', 'WHEAT_Percent_Change',
       'SUGAR_Percent_Change', 'COFFEE_Percent_Change',
       'COTTON_Percent_Change'])

y = df[['NATURAL GAS_Percent_Change', 'GOLD_Percent_Change',
       'WTI CRUDE_Percent_Change', 'BRENT CRUDE_Percent_Change',
       'SOYBEANS_Percent_Change', 'CORN_Percent_Change',
       'COPPER_Percent_Change', 'ALUMINIUM_Percent_Change',
       'ZINC_Percent_Change', 'NICKEL_Percent_Change', 'WHEAT_Percent_Change',
       'SUGAR_Percent_Change', 'COFFEE_Percent_Change',
       'COTTON_Percent_Change']]


In [None]:
# Split the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale the features
scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
results_df = pd.DataFrame(columns=[
    'Model',
    'Mean Squared Error',
    'Root Mean Squared Error',
    'Mean Absolute Error',
    'R-squared'
])


In [None]:
# SVM model tranning
model = MultiOutputRegressor(SVR())
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)  # Calculate RMSE
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f"Mean Squared Error: {mse}")
print(f"Root Mean Squared Error: {rmse}")
print(f"Mean Absolute Error: {mae}")
print(f"R-squared: {r2}")
with open(f"./models/svm.pkl", "wb") as file:
    pkl.dump(model, file)

# Update the results in the DataFrame
results_df.loc[0, 'Model'] = 'svm'
results_df.loc[0, 'Mean Absolute Error'] = mae
results_df.loc[0, 'Mean Squared Error'] = mse
results_df.loc[0, 'Root Mean Squared Error'] = rmse
results_df.loc[0, 'R-squared'] = r2

In [None]:
# DecisionTreeRegressor model tranning
model = MultiOutputRegressor(DecisionTreeRegressor())
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)  # Calculate RMSE
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f"Mean Squared Error: {mse}")
print(f"Root Mean Squared Error: {rmse}")
print(f"Mean Absolute Error: {mae}")
print(f"R-squared: {r2}")
with open(f"./models/DecisionTreeRegressor.pkl", "wb") as file:
    pkl.dump(model, file)

# Update the results in the DataFrame
results_df.loc[1, 'Model'] = 'DecisionTreeRegressor'
results_df.loc[1, 'Mean Absolute Error'] = mae
results_df.loc[1, 'Mean Squared Error'] = mse
results_df.loc[1, 'Root Mean Squared Error'] = rmse
results_df.loc[1, 'R-squared'] = r2

In [None]:
# KNeighborsRegressor model tranning
model = MultiOutputRegressor(KNeighborsRegressor())
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)  # Calculate RMSE
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f"Mean Squared Error: {mse}")
print(f"Root Mean Squared Error: {rmse}")
print(f"Mean Absolute Error: {mae}")
print(f"R-squared: {r2}")
with open(f"./models/KNeighborsRegressor.pkl", "wb") as file:
    pkl.dump(model, file)

# Update the results in the DataFrame
results_df.loc[2, 'Model'] = 'KNeighborsRegressor'
results_df.loc[2, 'Mean Absolute Error'] = mae
results_df.loc[2, 'Mean Squared Error'] = mse
results_df.loc[2, 'Root Mean Squared Error'] = rmse
results_df.loc[2, 'R-squared'] = r2

In [None]:
# Save the results to a CSV file
results_df.to_csv('ML_model_results.csv', index=False)

In [None]:
results_df =pd.read_csv('ML_model_results.csv')

In [None]:
# LinearRegression model tranning
model = MultiOutputRegressor(LinearRegression())
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)  # Calculate RMSE
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f"Mean Squared Error: {mse}")
print(f"Root Mean Squared Error: {rmse}")
print(f"Mean Absolute Error: {mae}")
print(f"R-squared: {r2}")
with open(f"./models/LinearRegression.pkl", "wb") as file:
    pkl.dump(model, file)

# Update the results in the DataFrame
results_df.loc[3, 'Model'] = 'LinearRegression'
results_df.loc[3, 'Mean Absolute Error'] = mae
results_df.loc[3, 'Mean Squared Error'] = mse
results_df.loc[3, 'Root Mean Squared Error'] = rmse
results_df.loc[3, 'R-squared'] = r2

In [None]:
# Save the results to a CSV file
results_df.to_csv('ML_model_results.csv', index=False)

In [None]:
results_df

In [None]:
results_df = pd.read_csv('ML_model_results.csv')

In [None]:
# Fit the VAR model using only y_train
model = VAR(y_train)
model_fitted = model.fit()

# Forecast future values
forecasted_values = model_fitted.forecast(y_train.values[-model_fitted.k_ar:], steps=len(y_test))

# Calculate evaluation metrics
mse = mean_squared_error(y_test, forecasted_values)
rmse = np.sqrt(mse)
mae = mean_absolute_error(y_test, forecasted_values)
r2 = r2_score(y_test, forecasted_values)

print(f"Mean Squared Error (MSE): {mse}")
print(f"Root Mean Squared Error (RMSE): {rmse}")
print(f"Mean Absolute Error (MAE): {mae}")
print(f"R-squared (R2): {r2}")

#save results
# Update the results in the DataFrame
results_df.loc[4, 'Model'] = 'VAR'
results_df.loc[4, 'Mean Squared Error'] = mse
results_df.loc[4, 'Root Mean Squared Error'] = rmse
results_df.loc[4, 'Mean Absolute Error'] = mae
results_df.loc[4, 'R-squared'] = r2

model_fitted.save('./models/var_model.pkl')

In [None]:
results_df

In [None]:
# Save the results to a CSV file
results_df.to_csv('ML_model_results.csv', index=False)

In [None]:
results_df = pd.read_csv('ML_model_results.csv')
results_df

In [None]:
p=1
q=1
d=1
# Fit a VARIMA model to the training data
model = VARMAX(y_train, order=(p, d, q))
results = model.fit()

# Make predictions on the test data
y_pred = results.forecast(steps=len(y_test))

# Calculate evaluation metrics
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Mean Squared Error (MSE): {mse}")
print(f"Root Mean Squared Error (RMSE): {rmse}")
print(f"Mean Absolute Error (MAE): {mae}")
print(f"R-squared (R2): {r2}")


In [None]:
#save results
# Update the results in the DataFrame
results_df.loc[5, 'Model'] = 'VARIMA'
results_df.loc[5, 'Mean Squared Error'] = mse
results_df.loc[5, 'Root Mean Squared Error'] = rmse
results_df.loc[5, 'Mean Absolute Error'] = mae
results_df.loc[5, 'R-squared'] = r2

model_fitted.save('./models/varima_model.pkl')

In [None]:
# Save the results to a CSV file
results_df.to_csv('ML_model_results.csv', index=False)

In [None]:
results_df = pd.read_csv('ML_model_results.csv')
results_df

In [None]:
# Define the VARMA order (p, q)
p, q = 1, 1
# Fit VARMA model
model = VARMAX(y_train, order=(p, q))
result = model.fit()

# Forecast using the VARMA model
forecast = result.forecast(steps=len(y_test))

# Calculate evaluation metrics
mse = mean_squared_error(y_test, forecast)
rmse = np.sqrt(mse)
mae = mean_absolute_error(y_test, forecast)
r2 = r2_score(y_test, forecast)

print(f"Mean Squared Error (MSE): {mse}")
print(f"Root Mean Squared Error (RMSE): {rmse}")
print(f"Mean Absolute Error (MAE): {mae}")
print(f"R-squared (R2): {r2}")

model_fitted.save('./models/varma_model.pkl')

In [None]:
results_df = pd.read_csv('ML_model_results.csv')
results_df

In [None]:
#save results
# Update the results in the DataFrame
results_df.loc[6, 'Model'] = 'VARMA'
results_df.loc[6, 'Mean Squared Error'] = mse
results_df.loc[6, 'Root Mean Squared Error'] = rmse
results_df.loc[6, 'Mean Absolute Error'] = mae
results_df.loc[6, 'R-squared'] = r2

# Save the results to a CSV file
results_df.to_csv('ML_model_results.csv', index=False)

In [None]:
# Scale the features
scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Reshape the data for LSTM input
X_train_reshaped = X_train_scaled.reshape(X_train_scaled.shape[0], 1, X_train_scaled.shape[1])
X_test_reshaped = X_test_scaled.reshape(X_test_scaled.shape[0], 1, X_test_scaled.shape[1])


# Define the model
model = Sequential()
model.add(LSTM(800, return_sequences=True, input_shape=(X_train_reshaped.shape[1], X_train_reshaped.shape[2])))
model.add(Dropout(0.05))
model.add(Dropout(0.05))
model.add(BatchNormalization())
model.add(LSTM(400, return_sequences=True))
model.add(Dropout(0.05))
model.add(Dropout(0.05))
model.add(BatchNormalization())
model.add(LSTM(200, return_sequences=True))
model.add(Dropout(0.05))
model.add(Dropout(0.05))
model.add(BatchNormalization())
model.add(LSTM(100))
model.add(Dropout(0.05))
model.add(Dropout(0.05))
model.add(BatchNormalization())
model.add(Dense(units=y_train.shape[1], activation='linear'))  # Linear activation for regression

model.compile(loss='mse', optimizer='adam', metrics=['accuracy'])

# Callbacks
lr_scheduler = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=8, min_lr=1e-20, verbose=1)
early_stopping = EarlyStopping(patience=30, restore_best_weights=True)

# Fitting the model
history = model.fit(X_train_reshaped, y_train, epochs=1000, batch_size=4096, callbacks=[early_stopping, lr_scheduler],
                    validation_data=(X_test_reshaped, y_test), verbose=1)

# Save the entire model
model.save("./models/lstm.h5")

In [None]:
# Assuming you've trained the model and obtained predictions
forecast = model.predict(X_test_reshaped)

# Calculate evaluation metrics
mse = mean_squared_error(y_test, forecast)
rmse = np.sqrt(mse)
mae = mean_absolute_error(y_test, forecast)
r2 = r2_score(y_test, forecast)

print(f"Mean Squared Error (MSE): {mse}")
print(f"Root Mean Squared Error (RMSE): {rmse}")
print(f"Mean Absolute Error (MAE): {mae}")
print(f"R-squared (R2): {r2}")

Results

In [None]:
# Bar graph
data = pd.read_csv('ML_model_results.csv')

# List of models for x-axis
models = data['Model']

# Metrics for comparison
mse = data['Mean Squared Error']
rmse = data['Root Mean Squared Error']
mae = data['Mean Absolute Error']
r_squared = data['R-squared']

# Plotting
fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(30, 24))

# Mean Squared Error
axes[0, 0].bar(models, mse, color='blue')
axes[0, 0].set_title('Mean Squared Error')

# Root Mean Squared Error
axes[0, 1].bar(models, rmse, color='green')
axes[0, 1].set_title('Root Mean Squared Error')

# Mean Absolute Error
axes[1, 0].bar(models, mae, color='orange')
axes[1, 0].set_title('Mean Absolute Error')

# R-squared
axes[1, 1].bar(models, r_squared, color='red')
axes[1, 1].set_title('R-squared')

# Adjust layout
plt.tight_layout()

# Show the plot
plt.show()


In [None]:


data = pd.read_csv('ML_model_results.csv')

# Set the style for seaborn
sns.set(style="whitegrid")

# Melt the DataFrame to long format for easier plotting
melted_data = pd.melt(data, id_vars=['Model'], var_name='Metric', value_name='Value')

# Create a facet grid for better visualization
g = sns.catplot(x='Model', y='Value', hue='Metric', data=melted_data, kind='bar', height=6, aspect=5)

# Set plot titles and labels
g.fig.suptitle('Model Comparison Metrics', y=1)
g.set(xlabel='Model', ylabel='Value')

# Show the plot
plt.show()


In [None]:
import seaborn as sns

# Set the style for seaborn
sns.set(style="whitegrid")

# Melt the DataFrame to long format for easier plotting
melted_data = pd.melt(data, id_vars=['Model'], var_name='Metric', value_name='Value')

# Create a grouped boxplot using Seaborn
plt.figure(figsize=(12, 8))
sns.boxplot(x='Metric', y='Value', hue='Model', data=melted_data, palette='Set3')
plt.title('Model Comparison Metrics')
plt.show()


In [None]:
# radar chart (also known as a spider chart or star plot)

# List of models
models = data['Model']

# Metrics for comparison
metrics = ['Mean Squared Error', 'Root Mean Squared Error', 'Mean Absolute Error', 'R-squared']

# Number of metrics
num_metrics = len(metrics)

# Compute angles for each axis
angles = np.linspace(0, 2 * np.pi, num_metrics, endpoint=False).tolist()
angles += angles[:1]

# Plotting
fig, ax = plt.subplots(figsize=(10, 10), subplot_kw=dict(polar=True))

# Plot each model
for i in range(len(models)):
    values = data[metrics].iloc[i].values.tolist()
    values += values[:1]
    ax.plot(angles, values, label=models[i])

# Add legend
ax.legend(loc='upper right', bbox_to_anchor=(0.1, 0.1))

# Show the plot
plt.title('Model Comparison Metrics')
plt.show()

Actual vs Prediction

In [None]:
import pandas as pd

# Your code to read and preprocess the data
df = pd.read_csv('final_df.csv')
df = df.drop(columns=['Unnamed: 0'])
df = df.apply(pd.to_numeric, errors='coerce')

df = df[['NATURAL GAS_Percent_Change', 'GOLD_Percent_Change',
        'WTI CRUDE_Percent_Change', 'BRENT CRUDE_Percent_Change',
        'SOYBEANS_Percent_Change', 'CORN_Percent_Change',
        'COPPER_Percent_Change', 'ALUMINIUM_Percent_Change',
        'ZINC_Percent_Change', 'NICKEL_Percent_Change', 'WHEAT_Percent_Change',
        'SUGAR_Percent_Change', 'COFFEE_Percent_Change',
        'COTTON_Percent_Change']]

'''# Use vectorized operations to fill in the remaining values in 'df1'
df1.iloc[0:, :] = (1 + df.iloc[0:, :] / 100).cumprod() * first_row.values'''

df.to_csv('Actual.csv')
df


In [None]:
models =  ['LinearRegression', 'KNeighborsRegressor', 'DecisionTreeRegressor', 'SVM']

for mod in models:
    # Load the saved model
    model_path = f'./models/{mod}.pkl'
    with open(model_path, 'rb') as file:
        model = pkl.load(file)

    scaler = MinMaxScaler()
    X_scaled = scaler.fit_transform(X)

    # Predict on the entire dataset (historical data)
    y_pred = model.predict(X_scaled)

    # Assuming 'y' is your output variable names
    df = pd.DataFrame({col.replace('_Percent_Change', ''): y_pred[:, i] for i, col in enumerate(y.columns)})
    df.to_csv(f'{mod}_histrolic_price_prediction.csv')



In [None]:
import pandas as pd
import pickle as pkl

models = ['var_model', 'varima_model', 'varima_model']

# Your code to read and preprocess the data
df = pd.read_csv('final_df.csv')
df = df.drop(columns=['Unnamed: 0'])
df = df.apply(pd.to_numeric, errors='coerce')

df = df[['NATURAL GAS_Percent_Change', 'GOLD_Percent_Change',
         'WTI CRUDE_Percent_Change', 'BRENT CRUDE_Percent_Change',
         'SOYBEANS_Percent_Change', 'CORN_Percent_Change',
         'COPPER_Percent_Change', 'ALUMINIUM_Percent_Change',
         'ZINC_Percent_Change', 'NICKEL_Percent_Change', 'WHEAT_Percent_Change',
         'SUGAR_Percent_Change', 'COFFEE_Percent_Change',
         'COTTON_Percent_Change']]

for mod in models:
    # Load the saved model
    model_path = f'./models/{mod}.pkl'
    with open(model_path, 'rb') as file:
        model = pkl.load(file)

    # Forecast using the VARMA model
    # Specify the number of steps to forecast
    num_steps = len(df)  # You may adjust this based on your needs

    # Assuming 'y' is your input variable, replace it with the appropriate variable in your dataset
    y_pred = model.forecast(y=df.values, steps=num_steps)

    # Assuming 'y' is your output variable names
    df_result = pd.DataFrame({col.replace('_Percent_Change', ''): y_pred[:, i] for i, col in enumerate(df.columns)})
    df_result.to_csv(f'{mod}_historical_price_prediction.csv')


In [None]:
# Load the saved model
model_path = f'./models/lstm.h5'
model = load_model(model_path)

# Your code to read and preprocess the data
df = pd.read_csv('final_df.csv')
df = df.drop(columns=['Unnamed: 0'])
df = df.apply(pd.to_numeric, errors='coerce')

X = df.drop(columns=['NATURAL GAS_Percent_Change', 'GOLD_Percent_Change',
       'WTI CRUDE_Percent_Change', 'BRENT CRUDE_Percent_Change',
       'SOYBEANS_Percent_Change', 'CORN_Percent_Change',
       'COPPER_Percent_Change', 'ALUMINIUM_Percent_Change',
       'ZINC_Percent_Change', 'NICKEL_Percent_Change', 'WHEAT_Percent_Change',
       'SUGAR_Percent_Change', 'COFFEE_Percent_Change',
       'COTTON_Percent_Change'])

y = df[['NATURAL GAS_Percent_Change', 'GOLD_Percent_Change',
       'WTI CRUDE_Percent_Change', 'BRENT CRUDE_Percent_Change',
       'SOYBEANS_Percent_Change', 'CORN_Percent_Change',
       'COPPER_Percent_Change', 'ALUMINIUM_Percent_Change',
       'ZINC_Percent_Change', 'NICKEL_Percent_Change', 'WHEAT_Percent_Change',
       'SUGAR_Percent_Change', 'COFFEE_Percent_Change',
       'COTTON_Percent_Change']]


scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

X_scaled = X_scaled.reshape(X_scaled.shape[0], 1, X_scaled.shape[1])

# Predict on the entire dataset (
y_pred = model.predict(X_scaled)

# Assuming 'y' is your output variable names
df = pd.DataFrame({col.replace('_Percent_Change', ''): y_pred[:, i] for i, col in enumerate(y.columns)})
df.to_csv(f'LSTM_histrolic_price_prediction.csv')

Future Price Prediction

In [None]:
def calculate_rolling_stats(data, col, win):
    rolling_mean = data[col].rolling(window=win).mean()
    rolling_std = data[col].rolling(window=win).std()
    historical_volatility = data[col].rolling(window=win).std() * (252 ** 0.5)
    return rolling_mean, rolling_std, historical_volatility

def feature_eng(df1, num_lags=[1, 3, 5, 7, 9, 20, 21, 100, 200], window_size=[5, 10, 20, 21, 100, 200]):
    df2 = df1.copy()
    
    for col in df1.columns:
        for num in num_lags:
            for lag in range(1, num + 1):
                df2[f'{col}_Lag{lag}'] = df1[col].shift(lag)

        with concurrent.futures.ThreadPoolExecutor() as executor:
            results = [executor.submit(calculate_rolling_stats, df1, col, win) for win in window_size]
            rolling_stats = [result.result() for result in results]

        for i, win in enumerate(window_size):
            df2[f'{col}_SMA{win}'], df2[f'{col}_STD{win}'], df2[f'{col}_HV'] = rolling_stats[i]

            # Moving Averages EMA
            df2[f'{col}_EMA{win}'] = df1[col].ewm(span=win, adjust=False).mean()

        # RSI Calculation
        delta = df1[col]
        gain = delta.where(delta > 0, 0)
        loss = -delta.where(delta < 0, 0)
        rs = gain / loss
        df2[f'{col}_RSI'] = 100 - (100 / (1 + rs))

        # MACD Calculation
        df2[f'{col}_EMA12'] = df1[col].ewm(span=12, adjust=False).mean()
        df2[f'{col}_EMA26'] = df1[col].ewm(span=26, adjust=False).mean()
        df2[f'{col}_MACD'] = df2[f'{col}_EMA12'] - df2[f'{col}_EMA26']

        # Stochastic Oscillator
        min_val, max_val = df1[col].min(), df1[col].max()
        df2[f'{col}_Stochastic_Oscillator'] = ((df1[col] - min_val) / (max_val - min_val)) * 100

    # Get all combinations of column pairs
    column_pairs = list(itertools.combinations(df1.columns, 2))

    for col1, col2 in column_pairs:
        # Inter-Commodity Spread Calculation:
        df2[f'{col1}_{col2}_Spread'] = df1[col1] - df1[col2]

        # Ratio Calculation between Commodities:
        df2[f'{col1}_{col2}_Ratio'] = df1[col1] / df1[col2]

    return df2

# You can create placeholder values for future_data with the same columns

df = pd.read_csv('df.csv')
df = df.drop(columns=['Date'])
df = df.dropna()
num_future_points = 30
future_indices = np.arange(df.index[-1] + 1, df.index[-1] + num_future_points + 1)

# Create a DataFrame for future data with numerical indices
future_data = pd.DataFrame(index=future_indices)

scaler = MinMaxScaler()

# Load the saved model
model_path = './models/LinearRegression.pkl'
with open(model_path, 'rb') as file:
    model = pkl.load(file)



# Assuming you want future_data to be a DataFrame with the same columns as df
future_data = pd.DataFrame(columns=df.columns)
df1 = pd.DataFrame(columns=df.columns)
df1 = df1.apply(pd.to_numeric, errors='coerce')
    
for column in df.columns:
    df1[column] = ((df[column] - df[column].shift(1)) / df[column].shift(1)) * 100
for i in range(num_future_points):

    df1 = df1.dropna()
    df1 = df1.iloc[-300:]
    df2 = feature_eng(df1)
    df2 = df2.dropna()
    # Scale the future data using the same scaler used for historical data
    df2 = df2.drop(columns=['NATURAL GAS', 'GOLD','WTI CRUDE', 'BRENT CRUDE',
                                'SOYBEANS', 'CORN','COPPER', 'ALUMINIUM',
                                'ZINC', 'NICKEL', 'WHEAT','SUGAR', 'COFFEE',
                                'COTTON'])
        

    X_scaled_future = scaler.fit_transform(df2.values)

    # Predict the next value in the future
    y_pred_future = model.predict(X_scaled_future[-1].reshape(1, -1))


    # Assign the predicted values to the corresponding columns in the last row
    df.loc[df.index[-1], df.columns[:14]] = y_pred_future

    # Assign the same value for 'Dollar_Index' anda 'BOND_10Y' (e.g., mean of y_pred_future)
    common_value = np.mean(y_pred_future)  # You can use any other logic to determine this value
    df.loc[df.index[-1], ['Dollar_Index', 'BOND_10Y']] = common_value
    
df = df.iloc[-num_future_points:]
df.to_csv('LinearRegression.csv')

In [None]:
def calculate_rolling_stats(data, col, win):
    rolling_mean = data[col].rolling(window=win).mean()
    rolling_std = data[col].rolling(window=win).std()
    historical_volatility = data[col].rolling(window=win).std() * (252 ** 0.5)
    return rolling_mean, rolling_std, historical_volatility

def feature_eng(df1, num_lags=[1, 3, 5, 7, 9, 20, 21, 100, 200], window_size=[5, 10, 20, 21, 100, 200]):
    df2 = df1.copy()
    
    for col in df1.columns:
        for num in num_lags:
            for lag in range(1, num + 1):
                df2[f'{col}_Lag{lag}'] = df1[col].shift(lag)

        with concurrent.futures.ThreadPoolExecutor() as executor:
            results = [executor.submit(calculate_rolling_stats, df1, col, win) for win in window_size]
            rolling_stats = [result.result() for result in results]

        for i, win in enumerate(window_size):
            df2[f'{col}_SMA{win}'], df2[f'{col}_STD{win}'], df2[f'{col}_HV'] = rolling_stats[i]

            # Moving Averages EMA
            df2[f'{col}_EMA{win}'] = df1[col].ewm(span=win, adjust=False).mean()

        # RSI Calculation
        delta = df1[col]
        gain = delta.where(delta > 0, 0)
        loss = -delta.where(delta < 0, 0)
        rs = gain / loss
        df2[f'{col}_RSI'] = 100 - (100 / (1 + rs))

        # MACD Calculation
        df2[f'{col}_EMA12'] = df1[col].ewm(span=12, adjust=False).mean()
        df2[f'{col}_EMA26'] = df1[col].ewm(span=26, adjust=False).mean()
        df2[f'{col}_MACD'] = df2[f'{col}_EMA12'] - df2[f'{col}_EMA26']

        # Stochastic Oscillator
        min_val, max_val = df1[col].min(), df1[col].max()
        df2[f'{col}_Stochastic_Oscillator'] = ((df1[col] - min_val) / (max_val - min_val)) * 100

    # Get all combinations of column pairs
    column_pairs = list(itertools.combinations(df1.columns, 2))

    for col1, col2 in column_pairs:
        # Inter-Commodity Spread Calculation:
        df2[f'{col1}_{col2}_Spread'] = df1[col1] - df1[col2]

        # Ratio Calculation between Commodities:
        df2[f'{col1}_{col2}_Ratio'] = df1[col1] / df1[col2]

    return df2

# You can create placeholder values for future_data with the same columns

df = pd.read_csv('df.csv')
df = df.drop(columns=['Date'])
df = df.dropna()
num_future_points = 30
future_indices = np.arange(df.index[-1] + 1, df.index[-1] + num_future_points + 1)

# Create a DataFrame for future data with numerical indices
future_data = pd.DataFrame(index=future_indices)

scaler = MinMaxScaler()

# Load the saved model
model_path = './models/KNeighborsRegressor.pkl'
with open(model_path, 'rb') as file:
    model = pkl.load(file)



# Assuming you want future_data to be a DataFrame with the same columns as df
future_data = pd.DataFrame(columns=df.columns)
df1 = pd.DataFrame(columns=df.columns)
df1 = df1.apply(pd.to_numeric, errors='coerce')
    
for column in df.columns:
    df1[column] = ((df[column] - df[column].shift(1)) / df[column].shift(1)) * 100
for i in range(num_future_points):

    df1 = df1.dropna()
    df1 = df1.iloc[-300:]
    df2 = feature_eng(df1)
    df2 = df2.dropna()
    # Scale the future data using the same scaler used for historical data
    df2 = df2.drop(columns=['NATURAL GAS', 'GOLD','WTI CRUDE', 'BRENT CRUDE',
                                'SOYBEANS', 'CORN','COPPER', 'ALUMINIUM',
                                'ZINC', 'NICKEL', 'WHEAT','SUGAR', 'COFFEE',
                                'COTTON'])
        

    X_scaled_future = scaler.fit_transform(df2.values)

    # Predict the next value in the future
    y_pred_future = model.predict(X_scaled_future[-1].reshape(1, -1))


    # Assign the predicted values to the corresponding columns in the last row
    df.loc[df.index[-1], df.columns[:14]] = y_pred_future

    # Assign the same value for 'Dollar_Index' anda 'BOND_10Y' (e.g., mean of y_pred_future)
    common_value = np.mean(y_pred_future)  # You can use any other logic to determine this value
    df.loc[df.index[-1], ['Dollar_Index', 'BOND_10Y']] = common_value
    
df = df.iloc[-num_future_points:]
df.to_csv('knn.csv')

In [None]:
def calculate_rolling_stats(data, col, win):
    rolling_mean = data[col].rolling(window=win).mean()
    rolling_std = data[col].rolling(window=win).std()
    historical_volatility = data[col].rolling(window=win).std() * (252 ** 0.5)
    return rolling_mean, rolling_std, historical_volatility

def feature_eng(df1, num_lags=[1, 3, 5, 7, 9, 20, 21, 100, 200], window_size=[5, 10, 20, 21, 100, 200]):
    df2 = df1.copy()
    
    for col in df1.columns:
        for num in num_lags:
            for lag in range(1, num + 1):
                df2[f'{col}_Lag{lag}'] = df1[col].shift(lag)

        with concurrent.futures.ThreadPoolExecutor() as executor:
            results = [executor.submit(calculate_rolling_stats, df1, col, win) for win in window_size]
            rolling_stats = [result.result() for result in results]

        for i, win in enumerate(window_size):
            df2[f'{col}_SMA{win}'], df2[f'{col}_STD{win}'], df2[f'{col}_HV'] = rolling_stats[i]

            # Moving Averages EMA
            df2[f'{col}_EMA{win}'] = df1[col].ewm(span=win, adjust=False).mean()

        # RSI Calculation
        delta = df1[col]
        gain = delta.where(delta > 0, 0)
        loss = -delta.where(delta < 0, 0)
        rs = gain / loss
        df2[f'{col}_RSI'] = 100 - (100 / (1 + rs))

        # MACD Calculation
        df2[f'{col}_EMA12'] = df1[col].ewm(span=12, adjust=False).mean()
        df2[f'{col}_EMA26'] = df1[col].ewm(span=26, adjust=False).mean()
        df2[f'{col}_MACD'] = df2[f'{col}_EMA12'] - df2[f'{col}_EMA26']

        # Stochastic Oscillator
        min_val, max_val = df1[col].min(), df1[col].max()
        df2[f'{col}_Stochastic_Oscillator'] = ((df1[col] - min_val) / (max_val - min_val)) * 100

    # Get all combinations of column pairs
    column_pairs = list(itertools.combinations(df1.columns, 2))

    for col1, col2 in column_pairs:
        # Inter-Commodity Spread Calculation:
        df2[f'{col1}_{col2}_Spread'] = df1[col1] - df1[col2]

        # Ratio Calculation between Commodities:
        df2[f'{col1}_{col2}_Ratio'] = df1[col1] / df1[col2]

    return df2

# You can create placeholder values for future_data with the same columns

df = pd.read_csv('df.csv')
df = df.drop(columns=['Date'])
df = df.dropna()
num_future_points = 30
future_indices = np.arange(df.index[-1] + 1, df.index[-1] + num_future_points + 1)

# Create a DataFrame for future data with numerical indices
future_data = pd.DataFrame(index=future_indices)

scaler = MinMaxScaler()

# Load the saved model
model_path = './models/svm.pkl'
with open(model_path, 'rb') as file:
    model = pkl.load(file)



# Assuming you want future_data to be a DataFrame with the same columns as df
future_data = pd.DataFrame(columns=df.columns)
df1 = pd.DataFrame(columns=df.columns)
df1 = df1.apply(pd.to_numeric, errors='coerce')
    
for column in df.columns:
    df1[column] = ((df[column] - df[column].shift(1)) / df[column].shift(1)) * 100
for i in range(num_future_points):

    df1 = df1.dropna()
    df1 = df1.iloc[-300:]
    df2 = feature_eng(df1)
    df2 = df2.dropna()
    # Scale the future data using the same scaler used for historical data
    df2 = df2.drop(columns=['NATURAL GAS', 'GOLD','WTI CRUDE', 'BRENT CRUDE',
                                'SOYBEANS', 'CORN','COPPER', 'ALUMINIUM',
                                'ZINC', 'NICKEL', 'WHEAT','SUGAR', 'COFFEE',
                                'COTTON'])
        

    X_scaled_future = scaler.fit_transform(df2.values)

    # Predict the next value in the future
    y_pred_future = model.predict(X_scaled_future[-1].reshape(1, -1))


    # Assign the predicted values to the corresponding columns in the last row
    df.loc[df.index[-1], df.columns[:14]] = y_pred_future

    # Assign the same value for 'Dollar_Index' anda 'BOND_10Y' (e.g., mean of y_pred_future)
    common_value = np.mean(y_pred_future)  # You can use any other logic to determine this value
    df.loc[df.index[-1], ['Dollar_Index', 'BOND_10Y']] = common_value
    
df = df.iloc[-num_future_points:]
df.to_csv('Svm.csv')

In [None]:
def calculate_rolling_stats(data, col, win):
    rolling_mean = data[col].rolling(window=win).mean()
    rolling_std = data[col].rolling(window=win).std()
    historical_volatility = data[col].rolling(window=win).std() * (252 ** 0.5)
    return rolling_mean, rolling_std, historical_volatility

def feature_eng(df1, num_lags=[1, 3, 5, 7, 9, 20, 21, 100, 200], window_size=[5, 10, 20, 21, 100, 200]):
    df2 = df1.copy()
    
    for col in df1.columns:
        for num in num_lags:
            for lag in range(1, num + 1):
                df2[f'{col}_Lag{lag}'] = df1[col].shift(lag)

        with concurrent.futures.ThreadPoolExecutor() as executor:
            results = [executor.submit(calculate_rolling_stats, df1, col, win) for win in window_size]
            rolling_stats = [result.result() for result in results]

        for i, win in enumerate(window_size):
            df2[f'{col}_SMA{win}'], df2[f'{col}_STD{win}'], df2[f'{col}_HV'] = rolling_stats[i]

            # Moving Averages EMA
            df2[f'{col}_EMA{win}'] = df1[col].ewm(span=win, adjust=False).mean()

        # RSI Calculation
        delta = df1[col]
        gain = delta.where(delta > 0, 0)
        loss = -delta.where(delta < 0, 0)
        rs = gain / loss
        df2[f'{col}_RSI'] = 100 - (100 / (1 + rs))

        # MACD Calculation
        df2[f'{col}_EMA12'] = df1[col].ewm(span=12, adjust=False).mean()
        df2[f'{col}_EMA26'] = df1[col].ewm(span=26, adjust=False).mean()
        df2[f'{col}_MACD'] = df2[f'{col}_EMA12'] - df2[f'{col}_EMA26']

        # Stochastic Oscillator
        min_val, max_val = df1[col].min(), df1[col].max()
        df2[f'{col}_Stochastic_Oscillator'] = ((df1[col] - min_val) / (max_val - min_val)) * 100

    # Get all combinations of column pairs
    column_pairs = list(itertools.combinations(df1.columns, 2))

    for col1, col2 in column_pairs:
        # Inter-Commodity Spread Calculation:
        df2[f'{col1}_{col2}_Spread'] = df1[col1] - df1[col2]

        # Ratio Calculation between Commodities:
        df2[f'{col1}_{col2}_Ratio'] = df1[col1] / df1[col2]

    return df2

# You can create placeholder values for future_data with the same columns

df = pd.read_csv('df.csv')
df = df.drop(columns=['Date'])
df = df.dropna()
num_future_points = 30
future_indices = np.arange(df.index[-1] + 1, df.index[-1] + num_future_points + 1)

# Create a DataFrame for future data with numerical indices
future_data = pd.DataFrame(index=future_indices)

scaler = MinMaxScaler()

# Load the saved model
model_path = './models/DecisionTreeRegressor.pkl'
with open(model_path, 'rb') as file:
    model = pkl.load(file)



# Assuming you want future_data to be a DataFrame with the same columns as df
future_data = pd.DataFrame(columns=df.columns)
df1 = pd.DataFrame(columns=df.columns)
df1 = df1.apply(pd.to_numeric, errors='coerce')
    
for column in df.columns:
    df1[column] = ((df[column] - df[column].shift(1)) / df[column].shift(1)) * 100
for i in range(num_future_points):

    df1 = df1.dropna()
    df1 = df1.iloc[-300:]
    df2 = feature_eng(df1)
    df2 = df2.dropna()
    # Scale the future data using the same scaler used for historical data
    df2 = df2.drop(columns=['NATURAL GAS', 'GOLD','WTI CRUDE', 'BRENT CRUDE',
                                'SOYBEANS', 'CORN','COPPER', 'ALUMINIUM',
                                'ZINC', 'NICKEL', 'WHEAT','SUGAR', 'COFFEE',
                                'COTTON'])
        

    X_scaled_future = scaler.fit_transform(df2.values)

    # Predict the next value in the future
    y_pred_future = model.predict(X_scaled_future[-1].reshape(1, -1))


    # Assign the predicted values to the corresponding columns in the last row
    df.loc[df.index[-1], df.columns[:14]] = y_pred_future

    # Assign the same value for 'Dollar_Index' anda 'BOND_10Y' (e.g., mean of y_pred_future)
    common_value = np.mean(y_pred_future)  # You can use any other logic to determine this value
    df.loc[df.index[-1], ['Dollar_Index', 'BOND_10Y']] = common_value
    
df = df.iloc[-num_future_points:]
df.to_csv('DecisionTreeRegressor.csv')