In [54]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error

In [141]:
# loading the data

df = pd.read_csv("data/Bz-MCB-intra-inter-energy-1500.csv")
df = df.iloc[:, 1:] # dropping the first column (trajectory)
print('Shape: ',df.shape)
df.head(10)

Shape:  (1000, 3)


Unnamed: 0,inter,intra,dissociation time
0,17.387572,210.731937,0.8
1,32.141205,197.435697,2.47
2,22.877001,205.800929,1.41
3,20.956015,207.706662,1.32
4,13.70996,216.019285,0.61
5,19.780247,209.440958,2.04
6,10.664808,217.914553,0.76
7,20.821918,207.967448,0.65
8,13.733133,215.27624,1.48
9,20.026744,207.947948,0.79


In [142]:
# normalization using Min-Max Scaler
scaler = MinMaxScaler(feature_range=(0, 1))
df = pd.DataFrame(scaler.fit_transform(df), columns=df.columns)
df.head(10)

Unnamed: 0,inter,intra,dissociation time
0,0.33844,0.641236,0.027273
1,0.705182,0.31383,0.153788
2,0.474895,0.519815,0.073485
3,0.427144,0.566742,0.066667
4,0.247023,0.771431,0.012879
5,0.397917,0.609447,0.121212
6,0.171327,0.8181,0.024242
7,0.42381,0.573163,0.015909
8,0.247599,0.753134,0.078788
9,0.404044,0.572683,0.026515


In [155]:
# generating polynomial features

x1 = df.iloc[:, 0]   # column 1
x2 = df.iloc[:, 1]   # column 2
y  = df.iloc[:, 2]   # column 3

df_poly = pd.DataFrame({
    'x1': x1,
    'x2': 1 - x2,
    'y': y
})

df_poly.head(10)

Unnamed: 0,x1,x2,y
0,0.33844,0.358764,0.027273
1,0.705182,0.68617,0.153788
2,0.474895,0.480185,0.073485
3,0.427144,0.433258,0.066667
4,0.247023,0.228569,0.012879
5,0.397917,0.390553,0.121212
6,0.171327,0.1819,0.024242
7,0.42381,0.426837,0.015909
8,0.247599,0.246866,0.078788
9,0.404044,0.427317,0.026515


In [156]:
# splitting data into independent(X) and dependent(y) variables
X = df_poly.iloc[:, :-1]   # from 2nd column up to second-last
y = df_poly.iloc[:, -1]     # last column as dependent

# train test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# fit model
model = LinearRegression()
model.fit(X_train, y_train)

print("\nIntercept (β0):", model.intercept_)
for col, coef in zip(X.columns, model.coef_):
    print(f"Coefficient for {col}:", coef)

y_pred = model.predict(X_test)

print("\nR2:", r2_score(y_test, y_pred))
print("RMSE:", np.sqrt(mean_squared_error(y_test, y_pred)))
print("MAE:", mean_absolute_error(y_test, y_pred))


Intercept (β0): 0.10512528496745582
Coefficient for x1: -0.1968764451221576
Coefficient for x2: 0.09831841662710991

R2: 0.006131998495457869
RMSE: 0.10541566488913151
MAE: 0.05542851013664448
