In [3]:
import pandas as pd
import plotly.express as px

base_fee_data = pd.read_csv('csv_inputs/base_fee_stats.csv')

In [4]:
from sklearn.model_selection import train_test_split

# Features (X) and target variable (y)
X = base_fee_data[['num_user_txs', 'total_gas_used']]
y = base_fee_data['avg_base_fee_gwei']

# Splitting the data into training and testing sets (80% training, 20% testing)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

X_train.shape, X_test.shape


((60, 2), (16, 2))

In [5]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# Initialize and train the linear regression model
regressor = LinearRegression()
regressor.fit(X_train, y_train)

# Predict on the testing set
y_pred = regressor.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

mse, r2

(0.0003390565704256438, 0.4500222022879765)

In [6]:
# Extracting the coefficients and intercept from the trained model
beta_0 = regressor.intercept_
beta_1 = regressor.coef_[0]
beta_2 = regressor.coef_[1]

beta_0, beta_1, beta_2


(-0.07846279713421994, 1.574970727194004e-08, 5.522242698642936e-13)

$$
\text{avg\_base\_fee\_gwei} = -0.0785 + (1.575 \times 10^{-8}) \times \text{num\_user\_txs} + (5.522 \times 10^{-13}) \times \text{total\_gas\_used}
$$
