Note: you may need to restart the kernel to use updated packages.


ERROR: Could not find a version that satisfies the requirement solcx (from versions: none)
ERROR: No matching distribution found for solcx


In [5]:
!pip show py-solc-x
from solcx import compile_standard, install_solc

# Example usage
install_solc('0.8.0')







Name: py-solc-x
Version: 2.0.3
Summary: Python wrapper and version management tool for the solc Solidity compiler.
Home-page: https://github.com/ApeWorX/py-solc-x
Author: ApeWorX Ltd.
Author-email: admin@apeworx.io
License: MIT
Location: C:\Users\Dell\anaconda3\Lib\site-packages
Requires: packaging, requests
Required-by: 


<Version('0.8.0')>

In [None]:
// EmailSpamDetection.sol

pragma solidity ^0.8.0;

contract EmailSpamDetection {
    // Mapping from email hash to spam status (0 = ham, 1 = spam)
    mapping(string => uint256) public emailHashes;
    
    // Store the owner's address
    address public owner;
    
    // Modifier to restrict access to the owner only
    modifier onlyOwner() {
        require(msg.sender == owner, "Only the owner can store data.");
        _;
    }

    // Constructor to set the owner of the contract
    constructor() {
        owner = msg.sender;
    }

    // Store the email hash and its spam status (0 = ham, 1 = spam)
    function storeEmailHash(string memory emailHash, uint256 spamStatus) public onlyOwner {
        emailHashes[emailHash] = spamStatus;
    }

    // Get the spam status by email hash (0 = ham, 1 = spam)
    function getEmailHashStatus(string memory emailHash) public view returns (uint256) {
        return emailHashes[emailHash];
    }
}


In [None]:
# Step 1: Import Required Libraries
import pandas as pd
from web3 import Web3
from solcx import compile_standard, install_solc
from hashlib import sha3
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer

# Step 2: Initialize Web3 and Smart Contract Tools
w3 = Web3(Web3.HTTPProvider('HTTP://127.0.0.1:7545'))  # Ganache default RPC URL

if not w3.is_connected():
    raise Exception("❌ Connection to blockchain failed. Ensure Ganache or local node is running.")

# Step 3: Download NLTK Resources
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')

# Step 4: Define Email Preprocessing Functions
stop_words = set(stopwords.words('english'))
lemmatizer = WordNetLemmatizer()

def clean_text(text):
    """Clean and normalize input text."""
    text = str(text).lower()
    text = re.sub(r'<.*?>', '', text)
    text = re.sub(r'http\S+|www\S+|https\S+', '', text)
    text = re.sub(r'[^a-z\s]', '', text)
    text = re.sub(r'\s+', ' ', text).strip()
    return text

def tokenize_and_normalize(text):
    """Tokenize text, remove stopwords, and lemmatize each word."""
    words = word_tokenize(text)  # Tokenize into words
    filtered = [lemmatizer.lemmatize(word) for word in words if word not in stop_words]  # Remove stopwords and lemmatize
    return ' '.join(filtered)

def preprocess_email(text):
    """Complete preprocessing pipeline."""
    return tokenize_and_normalize(clean_text(text))

# Step 5: Load and Preprocess Dataset
df = pd.read_csv("emails.csv")
df.columns = df.columns.str.strip().str.lower()
df['text'] = df['text'].fillna('')
df['processed_email'] = df['text'].apply(preprocess_email)
df = df[df['processed_email'].str.strip() != '']

# Step 6: Feature Extraction (TF-IDF)
tfidf_vectorizer = TfidfVectorizer(max_features=5000)
X = tfidf_vectorizer.fit_transform(df['processed_email']).toarray()
y = df['spam']

# Step 7: Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Step 8: Train Logistic Regression Model
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

# Step 9: Evaluation Metrics
print(f"✅ Accuracy: {accuracy_score(y_test, y_pred):.4f}")
print("\n📋 Classification Report:\n", classification_report(y_test, y_pred))
print("\n🧮 Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

# Step 10: Smart Contract Source Code
contract_source_code = '''
pragma solidity ^0.8.0;

contract EmailSpamDetection {
    mapping(string => uint256) public emailHashes;
    address public owner;
    
    constructor() {
        owner = msg.sender;
    }

    modifier onlyOwner() {
        require(msg.sender == owner, "Only the owner can write to the blockchain.");
        _;
    }

    function storeEmailHash(string memory emailHash, uint256 spamStatus) public onlyOwner {
        emailHashes[emailHash] = spamStatus;
    }

    function getEmailHashStatus(string memory emailHash) public view returns (uint256) {
        return emailHashes[emailHash];
    }
}
'''

# Step 11: Compile the Contract using solcx
install_solc('0.8.0')  # Ensure correct Solidity version
compiled_sol = compile_standard({
    "language": "Solidity",
    "sources": {
        "EmailSpamDetection.sol": {
            "content": contract_source_code
        }
    },
    "settings": {
        "outputSelection": {
            "*": {
                "*": ["abi", "evm.bytecode"]
            }
        }
    }
}, solc_version='0.8.0')

# Extract ABI and Bytecode
abi = compiled_sol['contracts']['EmailSpamDetection.sol']['EmailSpamDetection']['abi']
bytecode = compiled_sol['contracts']['EmailSpamDetection.sol']['EmailSpamDetection']['evm']['bytecode']['object']

# Step 12: Deploy Smart Contract to Blockchain
account = w3.eth.accounts[0]  # First account in Ganache
EmailSpamContract = w3.eth.contract(abi=abi, bytecode=bytecode)

# Deploy contract
tx_hash = EmailSpamContract.constructor().transact({'from': account})
tx_receipt = w3.eth.wait_for_transaction_receipt(tx_hash)

contract = w3.eth.contract(address=tx_receipt.contractAddress, abi=abi)

print(f"✅ Smart Contract deployed at: {tx_receipt.contractAddress}")

# Step 13: Store Email Hashes and Spam Status
print("\n🔒 Storing email hashes on Blockchain...")
for idx, row in df.iterrows():
    email_hash = w3.keccak(text=row['processed_email']).hex()  # Keccak-256 Hash of the email
    spam_status = int(row['spam'])

    tx = contract.functions.storeEmailHash(email_hash, spam_status).transact({'from': account})
    w3.eth.wait_for_transaction_receipt(tx)

    print(f"Stored {email_hash} => {spam_status}")

print("\n🎉 All data stored securely on Private Blockchain!")

# Step 14: Retrieve and Verify Data from Blockchain
email_to_verify = df['processed_email'].iloc[0]  # Example email to verify
email_hash_to_verify = w3.keccak(text=email_to_verify).hex()

spam_status_on_blockchain = contract.functions.getEmailHashStatus(email_hash_to_verify).call()
print(f"\n📜 Verification: The email hash {email_hash_to_verify} is {'spam' if spam_status_on_blockchain == 1 else 'ham'}")
