In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
df=pd.read_csv('features_raw.csv')

In [None]:
df.drop("Unnamed: 32",axis=1,inplace=True)

In [None]:
df

In [None]:
df.columns

In [None]:
plt.figure(figsize=(10,10))
sns.boxplot(df)


In [None]:

# Calculate Q1, Q3, and IQR for each column
Q1 = df.quantile(0.25)
Q3 = df.quantile(0.75)
IQR = Q3 - Q1

# Define lower and upper bounds for outliers
lower_bound = Q1 - 1.5 * IQR
upper_bound = Q3 + 1.5 * IQR

# Remove outliers
df_filtered = df[~((df < lower_bound) | (df > upper_bound)).any(axis=1)]

# Visualize the filtered data
plt.figure(figsize=(10,10))
sns.boxplot(df_filtered)
plt.show()


In [None]:
df

In [None]:
from sklearn.model_selection import train_test_split

# Split the data into training and testing sets
train_data, test_data = train_test_split(df_filtered, test_size=0.2, random_state=42)

# Save the training data to a CSV file
train_data.to_csv('train_data.csv', index=False)

# Save the testing data to a CSV file
test_data.to_csv('test_data.csv', index=False)


In [None]:
import pandas as pd

# Load the training data
train_data = pd.read_csv('train_data.csv')

# Calculate the predicted attention values based on Fz, Pz, P3, and Cz
train_data['predicted_attention_values'] = (train_data['Fz'] + train_data['Pz'] + train_data['P3'] + train_data['Cz']) / 4

# Normalize the predicted attention values to the range of 0 to 1
train_data['predicted_attention_values'] = (train_data['predicted_attention_values'] - train_data['predicted_attention_values'].min()) / (train_data['predicted_attention_values'].max() - train_data['predicted_attention_values'].min())

# Display the updated training data
print(train_data)


In [None]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import MinMaxScaler

# Select the features and target variable
X_train = train_data[['Fz', 'Pz', 'P3', 'Cz']]
y_train = train_data['predicted_attention_values']

# Create a Random Forest Regressor model
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)

# Train the model
rf_model.fit(X_train, y_train)

# Make predictions on the training data
train_data['predicted_attention_values_rf'] = rf_model.predict(X_train)

# Normalize the predicted values to the range of 0 to 1
scaler = MinMaxScaler()
train_data['predicted_attention_values_rf'] = scaler.fit_transform(train_data[['predicted_attention_values_rf']])

# Display the updated training data
print(train_data)


In [None]:
import xgboost as xgb
from sklearn.preprocessing import MinMaxScaler

# Select the features and target variable
X_train = train_data[['Fz', 'Pz', 'P3', 'Cz']]
y_train = train_data['predicted_attention_values']

# Create an XGBoost Regressor model
xgb_model = xgb.XGBRegressor(n_estimators=100, random_state=42)

# Train the model
xgb_model.fit(X_train, y_train)

# Make predictions on the training data
train_data['predicted_attention_values_xgb'] = xgb_model.predict(X_train)

# Normalize the predicted values to the range of 0 to 1
scaler = MinMaxScaler()
train_data['predicted_attention_values_xgb'] = scaler.fit_transform(train_data[['predicted_attention_values_xgb']])

# Display the updated training data
print(train_data)


In [None]:
from sklearn.metrics import mean_squared_error, r2_score

# Calculate the mean squared error
mse = mean_squared_error(train_data['predicted_attention_values'], train_data['predicted_attention_values_xgb'])
print(f'Mean Squared Error: {mse}')

# Calculate the R-squared score
r2 = r2_score(train_data['predicted_attention_values'], train_data['predicted_attention_values_xgb'])
print(f'R-squared: {r2}')


In [None]:
# Load the testing data
test_data = pd.read_csv('test_data.csv')

# Calculate the predicted attention values based on Fz, Pz, P3, and Cz
test_data['predicted_attention_values'] = (test_data['Fz'] + test_data['Pz'] + test_data['P3'] + test_data['Cz']) / 4

# Normalize the predicted attention values to the range of 0 to 1
test_data['predicted_attention_values'] = (test_data['predicted_attention_values'] - test_data['predicted_attention_values'].min()) / (test_data['predicted_attention_values'].max() - test_data['predicted_attention_values'].min())

# Select the features and target variable for testing data
X_test = test_data[['Fz', 'Pz', 'P3', 'Cz']]
y_test = test_data['predicted_attention_values']

# Make predictions on the testing data using the trained XGBoost model
test_data['predicted_attention_values_xgb'] = xgb_model.predict(X_test)

# Normalize the predicted values to the range of 0 to 1
scaler = MinMaxScaler()
test_data['predicted_attention_values_xgb'] = scaler.fit_transform(test_data[['predicted_attention_values_xgb']])

# Calculate the mean squared error for testing data
mse_test = mean_squared_error(test_data['predicted_attention_values'], test_data['predicted_attention_values_xgb'])
print(f'Mean Squared Error (Test Data): {mse_test}')

# Calculate the R-squared score for testing data
r2_test = r2_score(test_data['predicted_attention_values'], test_data['predicted_attention_values_xgb'])
print(f'R-squared (Test Data): {r2_test}')

# Calculate the mean squared error for training data (already calculated in the previous code)
print(f'Mean Squared Error (Training Data): {mse}')

# Calculate the R-squared score for training data (already calculated in the previous code)
print(f'R-squared (Training Data): {r2}')


In [None]:

# Make predictions on the testing data using the trained Random Forest model
test_data['predicted_attention_values_rf'] = rf_model.predict(X_test)

# Normalize the predicted values to the range of 0 to 1
scaler = MinMaxScaler()
test_data['predicted_attention_values_rf'] = scaler.fit_transform(test_data[['predicted_attention_values_rf']])

# Calculate the mean squared error for testing data (Random Forest)
mse_test_rf = mean_squared_error(test_data['predicted_attention_values'], test_data['predicted_attention_values_rf'])
print(f'Mean Squared Error (Test Data - Random Forest): {mse_test_rf}')

# Calculate the R-squared score for testing data (Random Forest)
r2_test_rf = r2_score(test_data['predicted_attention_values'], test_data['predicted_attention_values_rf'])
print(f'R-squared (Test Data - Random Forest): {r2_test_rf}')

# Calculate the mean squared error for training data (Random Forest)
mse_train_rf = mean_squared_error(train_data['predicted_attention_values'], train_data['predicted_attention_values_rf'])
print(f'Mean Squared Error (Training Data - Random Forest): {mse_train_rf}')

# Calculate the R-squared score for training data (Random Forest)
r2_train_rf = r2_score(train_data['predicted_attention_values'], train_data['predicted_attention_values_rf'])
print(f'R-squared (Training Data - Random Forest): {r2_train_rf}')


In [None]:

from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error, r2_score

# Create a Decision Tree Regressor model
dt_model = DecisionTreeRegressor(random_state=42)

# Train the model
dt_model.fit(X_train, y_train)

# Make predictions on the training data
train_data['predicted_attention_values_dt'] = dt_model.predict(X_train)

# Normalize the predicted values to the range of 0 to 1
scaler = MinMaxScaler()
train_data['predicted_attention_values_dt'] = scaler.fit_transform(train_data[['predicted_attention_values_dt']])

# Calculate the mean squared error for training data (Decision Tree)
mse_train_dt = mean_squared_error(train_data['predicted_attention_values'], train_data['predicted_attention_values_dt'])
print(f'Mean Squared Error (Training Data - Decision Tree): {mse_train_dt}')

# Calculate the R-squared score for training data (Decision Tree)
r2_train_dt = r2_score(train_data['predicted_attention_values'], train_data['predicted_attention_values_dt'])
print(f'R-squared (Training Data - Decision Tree): {r2_train_dt}')


# Make predictions on the testing data using the trained Decision Tree model
test_data['predicted_attention_values_dt'] = dt_model.predict(X_test)

# Normalize the predicted values to the range of 0 to 1
scaler = MinMaxScaler()
test_data['predicted_attention_values_dt'] = scaler.fit_transform(test_data[['predicted_attention_values_dt']])

# Calculate the mean squared error for testing data (Decision Tree)
mse_test_dt = mean_squared_error(test_data['predicted_attention_values'], test_data['predicted_attention_values_dt'])
print(f'Mean Squared Error (Test Data - Decision Tree): {mse_test_dt}')

# Calculate the R-squared score for testing data (Decision Tree)
r2_test_dt = r2_score(test_data['predicted_attention_values'], test_data['predicted_attention_values_dt'])
print(f'R-squared (Test Data - Decision Tree): {r2_test_dt}')


In [None]:
# Create a dictionary to store the model performance metrics
data = {
    'Model': ['Random Forest', 'XGBoost', 'Decision Tree'],
    'MSE (Train)': [mse_train_rf, mse, mse_train_dt],
    'R-squared (Train)': [r2_train_rf, r2, r2_train_dt],
    'MSE (Test)': [mse_test_rf, mse_test, mse_test_dt],
    'R-squared (Test)': [r2_test_rf, r2_test, r2_test_dt]
}

# Create a pandas DataFrame from the dictionary
df_results = pd.DataFrame(data)

# Display the DataFrame
print(df_results)


In [None]:
# Create a separate DataFrame for each model
rf_results = df_results[df_results['Model'] == 'Random Forest']
xgb_results = df_results[df_results['Model'] == 'XGBoost']
dt_results = df_results[df_results['Model'] == 'Decision Tree']

# Transpose the DataFrames for better heatmap visualization
rf_results = rf_results.set_index('Model').transpose()
xgb_results = xgb_results.set_index('Model').transpose()
dt_results = dt_results.set_index('Model').transpose()

# Create heatmaps for each model
plt.figure(figsize=(8, 6))
sns.heatmap(rf_results, annot=True, cmap='viridis', fmt=".4f")
plt.title('Random Forest Model Performance')
plt.show()

plt.figure(figsize=(8, 6))
sns.heatmap(xgb_results, annot=True, cmap='viridis', fmt=".4f")
plt.title('XGBoost Model Performance')
plt.show()

plt.figure(figsize=(8, 6))
sns.heatmap(dt_results, annot=True, cmap='viridis', fmt=".4f")
plt.title('Decision Tree Model Performance')
plt.show()
