In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

In [None]:
import numpy as np
import pandas as pd

In [None]:
file_path = '/content/drive/My Drive/output.csv'
df = pd.read_csv(file_path)
df.head()

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LinearRegression
import tensorflow as tf

In [None]:
#extracting features
features = ['avg_transaction_value', 'token_transfers_count', 'gas_used', 'transaction_count']
X = df[features].values
y = df['avg_gas_price'].values

In [None]:
#normalizing
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)
y_scaled = scaler.fit_transform(y.reshape(-1, 1))

#splitting the data into train and test
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_scaled, test_size=0.2, shuffle=False)

In [None]:
#custom loss function that penalizes sub-zero predictions
def custom_mse_loss(y_true, y_pred):
    squared_difference = tf.square(y_true - y_pred)
    penalty = tf.square(tf.nn.relu(-y_pred))

    # adding penalty to loss
    total_loss = tf.reduce_mean(squared_difference + penalty)

    return total_loss

model = LinearRegression()
model.fit(X_train, y_train)

In [None]:
y_pred = model.predict(X_test)

In [None]:
# evaluation metrics
from sklearn.metrics import mean_squared_error
mse = mean_squared_error(y_test, y_pred)
print('Mean Squared Error:', mse)

rmse = mean_squared_error(y_test, y_pred, squared=False)
print('Root Mean Squared Error:', rmse)

from sklearn.metrics import mean_absolute_error
mae = mean_absolute_error(y_test, y_pred)
print('Mean Absolute Error:', mae)


In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

# predicted gas prices
y_pred = model.predict(X_test)

# inverse transofrm for actual values
y_test_actual = scaler.inverse_transform(y_test.reshape(-1, 1)).ravel()
y_pred_actual = scaler.inverse_transform(y_pred).ravel()

#plotting actual vs. predicted
plt.figure(figsize=(10, 6))
plt.plot(y_test_actual, label='Actual')
plt.plot(y_pred_actual, label='Predicted')
plt.xlabel('Time')
plt.ylabel('Gas Price')
plt.title('Actual vs. Predicted Gas Prices')
plt.legend()
plt.show()

# residuals plot
residuals = y_test_actual - y_pred_actual
plt.figure(figsize=(10, 6))
plt.plot(residuals)
plt.axhline(y=0, color='r', linestyle='--')
plt.xlabel('Time')
plt.ylabel('Residuals')
plt.title('Residuals Plot')
plt.show()

# density plot of residuals
plt.figure(figsize=(10, 6))
sns.histplot(residuals, kde=True)
plt.xlabel('Residuals')
plt.ylabel('Density')
plt.title('Density Plot of Residuals')
plt.show()
