<a href="https://www.kaggle.com/code/shreyanshmanavshukla/crop-recomendation-and-blockchain-integration?scriptVersionId=252488266" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

In [None]:
!nvidia-smi';

# Libraries

In [None]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
import os
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# Data Loading



In [None]:
import os
"""
N - ratio of Nitrogen content in soil
P - ratio of Phosphorous content in soil
K - ratio of Potassium content in soil
temperature - temperature in degree Celsius
humidity - relative humidity in %
ph - ph value of the soil
rainfall - rainfall in mm
"""
def get_data():
    return pd.read_csv('/kaggle/input/crop-recommendation-dataset/Crop_recommendation.csv')
meta=get_data()


In [None]:
meta.info()

# Data Preprocessing

In [None]:
Labels = meta['label'].unique()
print("No. of labels:",len(Labels))
Labels = {label: i for i, label in enumerate(Labels)}
Labels_Decoder = {i: label for i, label in enumerate(Labels)}
Labels



In [None]:
Labels_Decoder[0]

In [None]:
# (warning)run only once..
meta['label'] = meta['label'].map(Labels)
meta['label'].unique()

In [None]:
meta.head(1000)

## Visualiztion

In [None]:
sns.pairplot(meta, hue='label', vars=['N', 'P', 'K', 'temperature', 'humidity', 'ph', 'rainfall'])
plt.suptitle('Pairwise Relationships Between Features', y=1.02)
plt.show()


In [None]:
plt.figure(figsize=(10, 8))
correlation_matrix = meta[['N', 'P', 'K', 'temperature', 'humidity', 'ph', 'rainfall']].corr()
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', fmt='.2f')
plt.title('Feature Correlation Heatmap')
# plt.savefig("Features_Correlation.png",dpi=600)
plt.show()


# Feature Selection

In [None]:
# Assuming 'meta' is your DataFrame and 'X' contains the features and 'y' contains the labels
X = meta.drop(columns=['label'])  # Features
y = meta['label']

In [None]:
y

## Data Splitting

In [None]:

# Split the data into training and test sets (e.g., 80% training and 20% testing)
Train_set, Val_set = train_test_split(meta, test_size=0.3, random_state=42)
Test_set, Val_set = train_test_split(Val_set, test_size=0.5, random_state=42)


In [None]:
len(Test_set)

In [None]:
Train_set

In [None]:
X_train, y_train = Train_set.drop(columns=['label']), Train_set['label']
X_val, y_val = Val_set.drop(columns=['label']), Val_set['label']
X_test, y_test = Test_set.drop(columns=['label']), Test_set['label']

## Scaling(Optional)

In [None]:
scaler = StandardScaler()

numerical_columns = X_train.select_dtypes(include=['float64', 'int64']).columns
# Fit and transform the training data
X_train[numerical_columns] = scaler.fit_transform(X_train[numerical_columns])

# Transform the validation and test data using the same scaler (don't refit!)
X_val[numerical_columns] = scaler.transform(X_val[numerical_columns])
X_test[numerical_columns] = scaler.transform(X_test[numerical_columns])

In [None]:
y_test

In [None]:
X_train['rainfall'].max()

# Training

# ML Models

## Logistic Regression

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix

In [None]:
# Create a Logistic Regression model with softmax (multi-class)
LR_model = LogisticRegression(multi_class='multinomial', solver='lbfgs', max_iter=100)

In [None]:
# Fit the model on the training data
LR_model = LR_model.fit(X_train, y_train)

In [None]:
import joblib
# Save the model
joblib.dump(LR_model, "logistic_regression_model.pkl")
print("Model saved as logistic_regression_model.pkl")

In [None]:
# For model evaluation metrics
from sklearn.metrics import (
    accuracy_score, 
    precision_score, 
    recall_score, 
    f1_score, 
    roc_auc_score, 
    average_precision_score, 
    cohen_kappa_score, 
    log_loss
)
def evaluate_model(model, X, y, dataset_type="train"):
    """
    Evaluates the performance of a logistic regression model on a given dataset.

    Parameters:
        model (object): Trained logistic regression model.
        X (array): Features of the dataset.
        y (array): True labels of the dataset.
        dataset_type (str): Type of dataset being evaluated ("train" or "val").

    Returns:
        dict: Dictionary containing evaluation metrics.
    """
    
    from sklearn.preprocessing import label_binarize

    # Predict probabilities and class labels
    y_pred_prob = model.predict_proba(X)
    y_pred = model.predict(X)

    # Binarize labels for multiclass PRC computation
    classes = model.classes_
    y_binarized = label_binarize(y, classes=classes)

    # Calculate metrics for multiclass
    accuracy = accuracy_score(y, y_pred)
    precision = precision_score(y, y_pred, average='weighted')
    recall = recall_score(y, y_pred, average='weighted')
    f1 = f1_score(y, y_pred, average='weighted')
    roc_auc = roc_auc_score(y_binarized, y_pred_prob, multi_class='ovr', average='weighted')  # Multiclass AUC

    # PRC AUC for each class
    prc_aucs = []
    for i in range(len(classes)):
        prc_aucs.append(average_precision_score(y_binarized[:, i], y_pred_prob[:, i]))
    prc_auc = sum(prc_aucs) / len(prc_aucs)  # Average across classes

    kappa = cohen_kappa_score(y, y_pred)
    loss = log_loss(y, y_pred_prob)

    # Prepare results
    results = {
        "Dataset Type": dataset_type,
        "Accuracy": accuracy,
        "Precision": precision,
        "Recall": recall,
        "F1 Score": f1,
        "ROC AUC": roc_auc,
        "PRC AUC": prc_auc,
        "Kappa Coefficient": kappa,
        "Log Loss": loss,
    }

    return results


In [None]:
evaluate_model(model, X_val,y_val,"Val" )

## Decision Tree

In [None]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import AdaBoostClassifier

In [None]:
# Create the base Decision Tree model
model = DecisionTreeClassifier()


In [None]:
# Train the model
DT_model = model.fit(X_train, y_train)

In [None]:
evaluate_model(DT_model, X_val, y_val, dataset_type="val")

In [None]:
import joblib
# Save the model
joblib.dump(DT_model, "DT_model.pkl")
print("Model saved as DT_model.pkl")

## SVM

In [None]:
from sklearn.svm import SVC
# Create the SVC model
model = SVC(probability=True)



In [None]:
# Train the model
SVM_model = model.fit(X_train, y_train)

In [None]:
evaluate_model(SVM_model, X_val, y_val, dataset_type="val")

In [None]:
import joblib
# Save the model
joblib.dump(SVM_model, "SVM_model.pkl")
print("Model saved as SVM_model.pkl")

In [None]:
from sklearn.metrics import accuracy_score
# Training accuracy
y_train_pred = model.predict(X_train)
train_accuracy = accuracy_score(y_train, y_train_pred)

# Validation accuracy
y_val_pred = model.predict(X_val)
val_accuracy = accuracy_score(y_val, y_val_pred)

print(f"Training Accuracy: {train_accuracy:.4f}")
print(f"Validation Accuracy: {val_accuracy:.4f}")

## Random Forest

In [None]:
from sklearn.ensemble import RandomForestClassifier

In [None]:
model = RandomForestClassifier(n_estimators=100, random_state=42)

In [None]:
# Train the model
RF_model =model.fit(X_train, y_train)

In [None]:
evaluate_model(RF_model, X_val, y_val, dataset_type="val")

In [None]:
from sklearn.metrics import accuracy_score
# Training accuracy
y_train_pred = model.predict(X_train)
train_accuracy = accuracy_score(y_train, y_train_pred)

# Validation accuracy
y_val_pred = model.predict(X_val)
val_accuracy = accuracy_score(y_val, y_val_pred)

print(f"Training Accuracy: {train_accuracy:.4f}")
print(f"Validation Accuracy: {val_accuracy:.4f}")

In [None]:
import joblib
# Save the model
joblib.dump(RF_model, "random_forest_model.pkl")
print("Model saved as random_forest_model.pkl")

In [None]:
import joblib
# Load the model
loaded_model = joblib.load("random_forest_model.pkl")
print("Model loaded successfully")

# Make predictions
predictions = loaded_model.predict(X_test)
predictions[0]


## KNN

In [None]:
from sklearn.neighbors import KNeighborsClassifier

In [None]:
# Create the KNN model
model = KNeighborsClassifier(n_neighbors=5)  # You can change n_neighbors to any other number

# Train the model
KNN_model = model.fit(X_train, y_train)

In [None]:
evaluate_model(KNN_model, X_val, y_val, dataset_type="val")

In [None]:
import joblib
# Save the model
joblib.dump(KNN_model, "KNN_model.pkl")
print("Model saved as KNN_model.pkl")

In [None]:
from sklearn.metrics import accuracy_score
# Training accuracy
y_train_pred = model.predict(X_train)
train_accuracy = accuracy_score(y_train, y_train_pred)

# Validation accuracy
y_val_pred = model.predict(X_val)
val_accuracy = accuracy_score(y_val, y_val_pred)

print(f"Training Accuracy: {train_accuracy:.4f}")
print(f"Validation Accuracy: {val_accuracy:.4f}")

## XGBoost

In [None]:
!pip install xgboost


In [None]:
import xgboost as xgb
# Create the XGBoost model
model = xgb.XGBClassifier(n_estimators=100, use_label_encoder=False, eval_metric='mlogloss')

# Train the model
XG_model = model.fit(X_train, y_train)

In [None]:
evaluate_model(XG_model, X_val, y_val, dataset_type="val")

In [None]:
from sklearn.metrics import accuracy_score
# Training accuracy
y_train_pred = model.predict(X_train)
train_accuracy = accuracy_score(y_train, y_train_pred)

# Validation accuracy
y_val_pred = model.predict(X_val)
val_accuracy = accuracy_score(y_val, y_val_pred)

print(f"Training Accuracy: {train_accuracy:.4f}")
print(f"Validation Accuracy: {val_accuracy:.4f}")

In [None]:
import joblib
# Save the model
joblib.dump(XG_model, "XG_model.pkl")
print("Model saved as XG_model.pkl")

# DL Model

## MLP

In [None]:

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

In [None]:
from tensorflow.keras.utils import to_categorical

# One-hot encode labels using Keras
y_train_encoded = to_categorical(y_train, num_classes=22)
y_val_encoded = to_categorical(y_val, num_classes=22)
y_test_encoded = to_categorical(y_test, num_classes=22)


In [None]:
# Build the MLP model
model = Sequential()
model.add(Dense(32, input_dim=X_train.shape[1], activation='relu'))  # Input and first hidden layer
model.add(Dense(64, activation='relu'))  # Second hidden layer
model.add(Dense(22, activation='softmax'))  # Output layer for 22 classes


In [None]:
model.summary()

In [None]:

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
ANN_model = model.fit(X_train, y_train_encoded, epochs=10, batch_size=32, validation_data=(X_val, y_val_encoded))


In [None]:
y_test.iloc[1]

In [None]:
import joblib
# Save the model
joblib.dump(model, "ANN_model.pkl")
print("Model saved as ANN_model.pkl")

In [None]:
y_test_pred = Models[6].predict(X_test)[1]
max_index = np.argmax(y_test_pred)
max_index

In [None]:
def evaluate_model(model, X, y, dataset_type="train"):
    """
    Evaluates the performance of a Keras Sequential model on a given dataset.

    Parameters:
        model (object): Trained Keras Sequential model.
        X (array): Features of the dataset.
        y (array): True labels of the dataset.
        dataset_type (str): Type of dataset being evaluated ("train" or "val").

    Returns:
        dict: Dictionary containing evaluation metrics.
    """
    from sklearn.metrics import (
        accuracy_score, precision_score, recall_score, f1_score, roc_auc_score,
        average_precision_score, log_loss, cohen_kappa_score
    )
    from sklearn.preprocessing import label_binarize
    import numpy as np

    # Predict probabilities and class labels
    y_pred_prob = model.predict(X)  # Keras model's predict method
    y_pred = np.argmax(y_pred_prob, axis=1)  # Convert probabilities to class labels

    # Binarize labels for multiclass PRC computation
    classes = np.unique(y)
    y_binarized = label_binarize(y, classes=classes)

    # Calculate metrics for multiclass
    accuracy = accuracy_score(y, y_pred)
    precision = precision_score(y, y_pred, average='weighted')
    recall = recall_score(y, y_pred, average='weighted')
    f1 = f1_score(y, y_pred, average='weighted')
    roc_auc = roc_auc_score(y_binarized, y_pred_prob, multi_class='ovr', average='weighted')

    # PRC AUC for each class
    prc_aucs = []
    for i in range(len(classes)):
        prc_aucs.append(average_precision_score(y_binarized[:, i], y_pred_prob[:, i]))
    prc_auc = sum(prc_aucs) / len(prc_aucs)

    kappa = cohen_kappa_score(y, y_pred)
    loss = log_loss(y, y_pred_prob)

    # Prepare results
    results = {
        "Dataset Type": dataset_type,
        "Accuracy": accuracy,
        "Precision": precision,
        "Recall": recall,
        "F1 Score": f1,
        "ROC AUC": roc_auc,
        "PRC AUC": prc_auc,
        "Kappa Coefficient": kappa,
        "Log Loss": loss,
    }

    return results


In [None]:
evaluate_model(Models[6], X_val, y_val, dataset_type="val")

In [None]:
from sklearn.metrics import accuracy_score
# Training accuracy
y_train_pred = model.predict(X_train)
y_train_pred = np.argmax(y_train_pred,axis=1)
train_accuracy = accuracy_score(y_train, y_train_pred)

# Validation accuracy
y_val_pred = model.predict(X_val)
y_val_pred = np.argmax(y_val_pred,axis=1)
val_accuracy = accuracy_score(y_val, y_val_pred)

print(f"Training Accuracy: {train_accuracy:.4f}")
print(f"Validation Accuracy: {val_accuracy:.4f}")

In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Data
models = ["Logistic Regression", "Decision Tree", "SVM", "Random Forest", "KNN", "XGBoost", "ANN"]
metrics = ["Accuracy", "Precision", "Recall", "F1 Score", "ROC AUC", "PRC AUC", "Kappa Coeff.", "Log Loss"]
data = [
    [0.951, 0.952, 0.951, 0.951, 0.999, 0.983, 0.949, 0.246],  # Logistic Regression
    [0.985, 0.985, 0.984, 0.984, 0.992, 0.971, 0.984, 0.546],  # Decision Tree
    [0.954, 0.961, 0.954, 0.954, 0.999, 0.991, 0.952, 0.195],  # SVM
    [0.993, 0.994, 0.994, 0.994, 0.999, 0.997, 0.993, 0.064],  # Random Forest
    [0.963, 0.967, 0.963, 0.963, 0.996, 0.976, 0.962, 0.276],  # KNN
    [0.982, 0.983, 0.982, 0.982, 0.999, 0.998, 0.981, 0.068],  # XGBoost
    [0.936, 0.944, 0.936, 0.935, 0.998, 0.978, 0.933, 0.206],  # ANN
]

# Transform data for grouped bar chart
data = np.array(data)
x = np.arange(len(models))  # Model indices
bar_width = 0.1  # Width of each bar
offsets = np.arange(-len(metrics)//2, len(metrics)//2) * bar_width

# Colors for metrics
colors = ['b', 'g', 'r', 'c', 'm', 'y', 'k', 'orange']

# Create grouped bar chart
plt.figure(figsize=(15, 8))
for i, metric in enumerate(metrics):
    plt.bar(x + offsets[i], data[:, i], bar_width, label=metric, color=colors[i % len(colors)])

# Customizations
plt.xticks(x, models, rotation=45, ha="right")
plt.ylabel("Score")
plt.title("Comparison of Models Across Metrics")
plt.legend(loc="upper left", bbox_to_anchor=(1, 1), title="Metrics")
plt.grid(axis="y", linestyle="--", alpha=0.7)
plt.tight_layout()
plt.savefig("Comparision Bar.png",dpi=1000)
# Show the plot
plt.show()


## LSTM

In [None]:
X_train = X_train.to_numpy()  # Or X_train.values
# X_val = X_val.to_numpy()

# Reshape to (samples, timesteps, features)
X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))  # Shape: (1540, 7, 1)
X_val = X_val.reshape((X_val.shape[0], X_val.shape[1], 1))          # Shape: (val_samples, 7, 1)


In [None]:
X_train.shape

In [None]:
from keras.models import Sequential
from keras.layers import LSTM, Dense

# Build the LSTM model
model = Sequential()

# Add an LSTM layer
model.add(LSTM(64, input_shape=(X_train.shape[1], X_train.shape[2]), activation='tanh', return_sequences=False))

# Add a Dense hidden layer
model.add(Dense(64, activation='relu'))

# Output layer for 22 classes
model.add(Dense(22, activation='softmax'))

# Compile the model
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# Summary of the model
model.summary()


In [None]:
print("X_train shape:", X_train.shape)  # Should be (1540, 7, 1)
print("y_train_encoded shape:", y_train_encoded.shape)  # Should be (1540, 22)


In [None]:
from keras.backend import clear_session
clear_session()

In [None]:
model.fit(X_train, y_train_encoded, epochs=10, batch_size=32)


# BlockChain

In [None]:
!pip install web3 eth-tester[py-evm] 




In [None]:
import eth_tester
print(eth_tester.__version__)


In [None]:
import joblib
# Load the model
LR_model = joblib.load("logistic_regression_model.pkl")
DT_model = joblib.load("DT_model.pkl")
SVM_model = joblib.load("SVM_model.pkl")
RF_model = joblib.load("random_forest_model.pkl")
KNN_model = joblib.load("KNN_model.pkl")
XG_model = joblib.load("XG_model.pkl")
ANN_model = joblib.load("ANN_model.pkl")

print("Models loaded successfully")

Models = [LR_model,DT_model,SVM_model,RF_model,KNN_model,XG_model,ANN_model]


In [None]:
from web3 import Web3
from web3 import EthereumTesterProvider

from eth_tester import EthereumTester
from eth_account import Account
import time

In [None]:
# Use an in-memory blockchain (Local test network)
eth_tester = EthereumTester()
web3 = Web3(EthereumTesterProvider(eth_tester))
# Generate a test account
# account = web3.eth.account.create()
account =  web3.eth.accounts[0] 

print("✅ Local Test Ethereum Account Created!")
print("Address:",account)
# print("Private Key (Keep Secret!):", account.key.hex())

# Check balance (should be 0 in local test env)
balance = web3.eth.get_balance(account)
print("Balance in ETH:", web3.from_wei(balance, 'ether'))

In [None]:
#ABI and bytecode generated from Soldity
with open("/kaggle/input/solidity/abi.txt", "r") as file:
    ABI = file.read()

with open("/kaggle/input/solidity/Bytecode.txt", "r") as file:
    bytecode = file.read()

In [None]:
contract.constructor().estimate_gas()

In [None]:
# Build the transaction
contract = web3.eth.contract(abi=ABI, bytecode=bytecode)
initial = time.time()
transaction = contract.constructor().build_transaction({
    'gas': contract.constructor().estimate_gas(),
    'from': account,
    'nonce':  web3.eth.get_transaction_count(account),
    'gasPrice': web3.to_wei('10', 'gwei')
})

# Send the transaction
tx_hash = web3.eth.send_transaction(transaction)
Final = time.time()
Transaction_time = Final - initial

# Wait for the transaction to be mined
receipt = web3.eth.wait_for_transaction_receipt(tx_hash)
receipt = dict(receipt)
receipt['TransactionTime'] = Transaction_time

TransactionHistory = pd.DataFrame([receipt])
TransactionHistory

In [None]:
# Interact with the deployed contract
contract_instance = web3.eth.contract(address=receipt['contractAddress'], abi=ABI)

In [None]:
# Function to add crop data
def add_crop_data(N, P, K, temperature, humidity, pH, rainfall,recomendation):
    initial = time.time()
    transaction = contract_instance.functions.storeValue(N, P, K, temperature, humidity, pH, rainfall,recomendation).build_transaction({
        'gas': 2000000,
        'gasPrice': web3.to_wei('20', 'gwei'),
        'nonce': web3.eth.get_transaction_count(account),
    })
    
    tx_hash = web3.eth.send_transaction(transaction)
    Final = time.time()
    # Wait for transaction receipt (confirmation)
    txn_receipt = web3.eth.wait_for_transaction_receipt(tx_hash)
    Transaction_time = Final - initial
    reciept = dict(txn_receipt)
    reciept['TransactionTime'] = Transaction_time
    return reciept



def Recommendation(model, data):
    # Ensure data is reshaped properly for prediction
    data = np.array(data).reshape(1, -1)  # Reshape if necessary
    
    # Get the second output of model prediction
    prediction = model.predict(data)  
    max_index = np.argmax(prediction)  
    
    # Decode the label
    prediction_label = Labels_Decoder[max_index]
    
    return str(prediction_label)


def DeciToInt(x,n=10**10):
    return int(x*n)
    
def IntToDeci(x,n=10**10):    
    return x/n

In [None]:
Recommendation(Models[6],X_test.iloc[0])

In [None]:
# TransactionHistory = dict
for (_, row), y in zip(X_test.iterrows(), y_test):
    # print(row)
    reciept = add_crop_data(
        DeciToInt(row['N']), 
        DeciToInt(row['P']), 
        DeciToInt(row['K']), 
        DeciToInt(row['temperature']), 
        DeciToInt(row['humidity']), 
        DeciToInt(row['ph']), 
        DeciToInt(row['rainfall']), 
        Recommendation(Models[6],row)  # Use `row` instead of `X_val`
    )
    # break

# reciept['y_value'] = y  # Store y_test value in the receipt
    reciept["ActualCrop"] = Labels_Decoder[y]
    History = pd.DataFrame([reciept])
    TransactionHistory = pd.concat([TransactionHistory, History], ignore_index=True)
    # break
# # History

In [None]:
pd.set_option('display.max_columns', None)

In [None]:

TransactionHistory
TransactionHistory = TransactionHistory.iloc[1:].reset_index(drop=True)
TransactionHistory

In [None]:
# Save to a CSV file
# TransactionHistory.to_csv('TransactionHistory.csv', index=False)

In [None]:
import datetime
def show_last_data():
    last_data = contract_instance.functions.showLastValue().call()
    return last_data
def Owner():
    address = contract_instance.functions.owner().call()
    return address
    

def show_all_data():
    all_data = contract_instance.functions.showAllValues().call()
    
    # Convert data to Pandas DataFrame
    df = pd.DataFrame(all_data, columns=[
        "Nitrogen", "Phosphorus", "Potassium", "Temperature",
        "Humidity", "pH", "Rainfall", "Recommendation", "Timestamp"
    ])
    
    # Apply IntToDeci() function to numerical columns
    columns_to_convert = ["Nitrogen", "Phosphorus", "Potassium", "Temperature", "Humidity", "pH", "Rainfall"]
    df[columns_to_convert] = df[columns_to_convert].apply(lambda col: col.map(IntToDeci))
    
    # Convert Timestamp to human-readable format
    df["Timestamp"] = df["Timestamp"].apply(lambda x: datetime.datetime.utcfromtimestamp(x).strftime('%Y-%m-%d %H:%M:%S'))

    return df



In [None]:
show_all_data()

In [None]:
Recomendations = show_all_data()

History = pd.concat([TransactionHistory,Recomendations],axis=1)
History = History.loc[:, ~History.columns.duplicated()]

# Convert Timestamp to datetime format (handle errors & NaNs)
History["Timestamp"] = pd.to_datetime(History["Timestamp"], errors='coerce')

# Fill NaN timestamps with a default time (or remove NaN rows)
# TransactionHistory["Timestamp"] = TransactionHistory["Timestamp"].fillna(pd.Timestamp("1970-01-01 00:00:00"))

# Sort by Timestamp in ascending order
History = History.sort_values(by="Timestamp", ascending=True, na_position='last')




History

In [None]:
# Save to a CSV file
History.to_csv('ANN_model_TransactionHistory.csv', index=False)
History

In [None]:
History.columns

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import networkx as nx

# Load Ethereum transaction dataset
file_path = "/kaggle/working/TransactionHistory.csv"  # Update this if needed
df = pd.read_csv(file_path)
# Convert Timestamp to datetime format
df['Timestamp'] = pd.to_datetime(df['Timestamp'])
# Ensure numerical columns are in the correct format
df['effectiveGasPrice'] = pd.to_numeric(df['effectiveGasPrice'], errors='coerce')
df['gasUsed'] = pd.to_numeric(df['gasUsed'], errors='coerce')

# Extract date for daily analysis
df['Date'] = df['Timestamp'].dt.date  

In [None]:
lr = pd.read_csv("/kaggle/working/LR_model_TransactionHistory.csv")
dt = pd.read_csv("/kaggle/working/DT_model_TransactionHistory.csv")
svm = pd.read_csv("/kaggle/working/SVM_model_TransactionHistory.csv")
rf = pd.read_csv("/kaggle/working/RF_model_TransactionHistory.csv")
knn = pd.read_csv("/kaggle/working/KNN_model_TransactionHistory.csv")
xg = pd.read_csv("/kaggle/working/XG_model_TransactionHistory.csv")
ann = pd.read_csv("/kaggle/working/ANN_model_TransactionHistory.csv")


In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from scipy.stats import norm

plt.figure(figsize=(8, 5))
transaction_times = lr["TransactionTime"]
# Compute mean and standard deviation
mu1, sigma = np.mean(transaction_times), np.std(transaction_times)
mu = mu1
# Generate normal distribution curve
x = np.linspace(mu - 4*sigma, mu + 4*sigma, 1000)
y = norm.pdf(x, mu, sigma)
# Plot the normal distribution

plt.plot(x, y, label=f'Logistic Regression\n(μ={mu:.5f}, σ={sigma:.5f})', color='blue')
plt.axvline(mu, linestyle='dashed', label='Mean (μ)',color='blue')

transaction_times = dt["TransactionTime"]
# Compute mean and standard deviation
mu2, sigma = np.mean(transaction_times), np.std(transaction_times)
mu = mu2
# Generate normal distribution curve
x = np.linspace(mu - 4*sigma, mu + 4*sigma, 1000)
y = norm.pdf(x, mu, sigma)
# Plot the normal distribution
plt.plot(x, y, label=f'Decision tree\n(μ={mu:.5f}, σ={sigma:.5f})', color='orange')
plt.axvline(mu, linestyle='dashed', label='Mean (μ)',color='orange')

transaction_times = svm["TransactionTime"]
# Compute mean and standard deviation
mu3, sigma = np.mean(transaction_times), np.std(transaction_times)
mu = mu3
# Generate normal distribution curve
x = np.linspace(mu - 4*sigma, mu + 4*sigma, 1000)
y = norm.pdf(x, mu, sigma)
# Plot the normal distribution
plt.plot(x, y, label=f'SVM\n(μ={mu:.5f}, σ={sigma:.5f})', color='violet')
plt.axvline(mu, linestyle='dashed', label='Mean (μ)',color='violet')

transaction_times = rf["TransactionTime"]
# Compute mean and standard deviation
mu4, sigma = np.mean(transaction_times), np.std(transaction_times)
mu = mu4
# Generate normal distribution curve
x = np.linspace(mu - 4*sigma, mu + 4*sigma, 1000)
y = norm.pdf(x, mu, sigma)
# Plot the normal distribution
plt.plot(x, y, label=f'Random Forest\n(μ={mu:.5f}, σ={sigma:.5f})', color='red')
plt.axvline(mu, linestyle='dashed', label='Mean (μ)',color='red')

transaction_times = knn["TransactionTime"]
# Compute mean and standard deviation
mu5, sigma = np.mean(transaction_times), np.std(transaction_times)
mu = mu5
# Generate normal distribution curve
x = np.linspace(mu - 4*sigma, mu + 4*sigma, 1000)
y = norm.pdf(x, mu, sigma)
# Plot the normal distribution
plt.plot(x, y, label=f'KNN\n(μ={mu:.5f}, σ={sigma:.5f})', color='brown')
plt.axvline(mu, linestyle='dashed', label='Mean (μ)',color='brown')

transaction_times = xg["TransactionTime"]
# Compute mean and standard deviation
mu6, sigma = np.mean(transaction_times), np.std(transaction_times)
mu = mu6
# Generate normal distribution curve
x = np.linspace(mu - 4*sigma, mu + 4*sigma, 1000)
y = norm.pdf(x, mu, sigma)
# Plot the normal distribution
plt.plot(x, y, label=f'Random Forest\n(μ={mu:.5f}, σ={sigma:.5f})', color='green')
plt.axvline(mu, linestyle='dashed', label='Mean (μ)',color='green')

transaction_times = ann["TransactionTime"]
# Compute mean and standard deviation
mu7, sigma = np.mean(transaction_times), np.std(transaction_times)
mu = mu7
# Generate normal distribution curve
x = np.linspace(mu - 4*sigma, mu + 4*sigma, 1000)
y = norm.pdf(x, mu, sigma)
# Plot the normal distribution
plt.plot(x, y, label=f'ANN\n(μ={mu:.5f}, σ={sigma:.5f})', color='maroon')
plt.axvline(mu, linestyle='dashed', label='Mean (μ)',color='maroon')


# List of means (replace with actual values from your models)
means = [mu1,mu2,mu3,mu4,mu5,mu6,mu7]  

# Calculate overall mean
overall_mean = np.mean(means)  



plt.axvline(mu, linestyle='dashed', label=f'Overall Mean (μ={overall_mean:.5f})',color='black')

# Labels and title
plt.xlabel('Transaction Time')
plt.ylabel('Probability Density')
# plt.title('Normal Distribution of Transaction Time')
plt.legend(ncol=2, loc='upper center', bbox_to_anchor=(0.4, -0.15))
plt.grid()

# Show plot
plt.savefig("Normal Distribution of Transaction Time.png", bbox_inches='tight', dpi=600) 
plt.show()


In [None]:
# Compute mean gas used for each model
mean_gas_used = [
    lr["gasUsed"].mean(),
    dt["gasUsed"].mean(),
    svm["gasUsed"].mean(),
    rf["gasUsed"].mean(),
    knn["gasUsed"].mean(),
    xg["gasUsed"].mean(),
    ann["gasUsed"].mean()
]

# Calculate the overall mean of all model means
overall_mean_gas = sum(mean_gas_used) / len(mean_gas_used)
print(f"Overall Mean Gas Used: {overall_mean_gas}")


In [None]:
# Combine all dataframes into one
all_data = pd.concat([lr, dt, svm, rf, knn, xg, ann], ignore_index=True)

# Calculate overall Transaction Success Rate (%)
successful_transactions = ((all_data['status'] == 1) & (all_data['ActualCrop'] == all_data['Recommendation'])).sum()
total_transactions = len(all_data)

overall_success_rate = (successful_transactions / total_transactions) * 100 if total_transactions > 0 else 0

print(f"Overall Transaction Success Rate: {overall_success_rate:.5f}%")