In [23]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
import pandas as pd
import numpy as np
import logging
from scipy.ndimage import zoom
from torch.utils.data import DataLoader, TensorDataset
from sklearn.metrics import mean_squared_error as mse

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [24]:
df = pd.read_csv('validator_data.csv')

In [25]:
df.head()

Unnamed: 0,id,total_validators,p_active_slots,p_created_blocks,non_empty_block,empty_block,vanilla_blocks,a_active_epochs,a_attested_epochs,a_attested_source,...,sync_rate,block_reward,validation_reward,total_reward,proposal_effectiveness,correctness,attestation_rate,att_effectiveness,effectiveness,partition_date
0,Sigma Prime,10001,83,83,82.0,0.0,51.0,2250225,2249558,2248718,...,97.957517,6.56827,26.959453,33.527723,0.987952,0.992777,0.999704,0.971819,0.977869,2024-01-17
1,Simply Staking,9428,88,88,88.0,0.0,0.0,2121300,2117898,2117410,...,98.022129,11.981733,25.53441,37.516143,1.0,0.988021,0.998396,0.950203,0.968877,2024-01-17
2,Chorus One,8853,77,77,77.0,0.0,0.0,1991925,1988552,1987913,...,97.778459,7.538779,23.694192,31.23297,1.0,0.991658,0.998307,0.951957,0.969973,2024-01-17
3,CryptoManufaktur,10001,80,80,80.0,0.0,0.0,2250225,2249817,2249157,...,99.339419,12.02185,27.143702,39.165552,1.0,0.995019,0.999819,0.975927,0.984954,2024-01-17
4,Attestant (BVI) Limited,10001,69,69,69.0,0.0,8.0,2250225,2250199,2249164,...,98.925253,6.736894,26.254547,32.99144,1.0,0.995301,0.999988,0.977189,0.985743,2024-01-17


In [63]:
from sklearn.preprocessing import OneHotEncoder

encoder = OneHotEncoder()
encoded_validator = encoder.fit_transform(df[['id']])
feature_names = encoder.get_feature_names_out(input_features=['id'])

encoded_validator_df = pd.DataFrame(encoded_validator, columns=feature_names)
encoded_validator_df

ValueError: Shape of passed values is (5388, 1), indices imply (5388, 42)

In [53]:
df['partition_date'] = pd.to_datetime(df['partition_date'])
df['year'] = df['partition_date'].dt.year
df['month'] = df['partition_date'].dt.month
df['day'] = df['partition_date'].dt.day

In [49]:
X = df.drop(columns=['effectiveness'])
X.head()

Unnamed: 0,id,total_validators,p_active_slots,p_created_blocks,non_empty_block,empty_block,vanilla_blocks,a_active_epochs,a_attested_epochs,a_attested_source,...,inc_distance,sync_rate,block_reward,validation_reward,total_reward,proposal_effectiveness,correctness,attestation_rate,att_effectiveness,partition_date
0,Sigma Prime,10001,83,83,82.0,0.0,51.0,2250225,2249558,2248718,...,1.021263,97.957517,6.56827,26.959453,33.527723,0.987952,0.992777,0.999704,0.971819,2024-01-17
1,Simply Staking,9428,88,88,88.0,0.0,0.0,2121300,2117898,2117410,...,1.038131,98.022129,11.981733,25.53441,37.516143,1.0,0.988021,0.998396,0.950203,2024-01-17
2,Chorus One,8853,77,77,77.0,0.0,0.0,1991925,1988552,1987913,...,1.03994,97.778459,7.538779,23.694192,31.23297,1.0,0.991658,0.998307,0.951957,2024-01-17
3,CryptoManufaktur,10001,80,80,80.0,0.0,0.0,2250225,2249817,2249157,...,1.019378,99.339419,12.02185,27.143702,39.165552,1.0,0.995019,0.999819,0.975927,2024-01-17
4,Attestant (BVI) Limited,10001,69,69,69.0,0.0,8.0,2250225,2250199,2249164,...,1.018523,98.925253,6.736894,26.254547,32.99144,1.0,0.995301,0.999988,0.977189,2024-01-17


In [43]:
Y = df['effectiveness']

In [44]:
from sklearn.model_selection import train_test_split

In [51]:
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

In [52]:
def train_model(
    X_train: pd.DataFrame,
    X_test: pd.DataFrame,
    Y_train: pd.DataFrame,
    Y_test: pd.DataFrame,
):
    model = nn.Sequential(
        nn.Linear(X_train.shape[1], 128),
        nn.ReLU(),
        nn.Linear(128, 64),
        nn.ReLU(),
        nn.Linear(64, 1),
    )

    # Loss and optimizer
    criterion = nn.MSELoss()
    optimizer = optim.RMSprop(model.parameters())

    # Convert data to PyTorch tensors
    X_tensor = torch.tensor(X_train.values, dtype=torch.float32)
    y_tensor = torch.tensor(Y_train.values.reshape(-1, 1), dtype=torch.float32)
    X_test_tensor = torch.tensor(X_test.values, dtype=torch.float32)

    # Training loop
    epochs_trial = np.arange(100, 400, 4)
    batch_trial = np.arange(100, 400, 4)
    DL_pred = []
    DL_RMSE = []

    for i, j, k in zip(range(4), epochs_trial, batch_trial):
        for epoch in range(j):
            optimizer.zero_grad()
            outputs = model(X_tensor)
            loss = criterion(outputs, y_tensor)
            loss.backward()
            optimizer.step()

        with torch.no_grad():
            DL_predict = model(X_test_tensor).numpy()
            DL_RMSE.append(
                np.sqrt(mse(Y_test.values / 100, DL_predict.flatten() / 100))
            )
            DL_pred.append(DL_predict)
            print("DL_RMSE_{}:{:.6f}".format(i + 1, DL_RMSE[i]))

    return model

In [64]:
def main():
    model = train_model(X_train, X_test, y_train, y_test)


if __name__ == "__main__":
    main()

TypeError: can't convert np.ndarray of type numpy.object_. The only supported types are: float64, float32, float16, complex64, complex128, int64, int32, int16, int8, uint64, uint32, uint16, uint8, and bool.