In [1]:
import cv2
import numpy as np
from skimage.feature import hog
from scipy.fftpack import dct
import os
import joblib
from tqdm import tqdm
from concurrent.futures import ThreadPoolExecutor

def extract_patch_features(patch):
    patch_float = patch.astype(np.float32) / 255.0

    # Edge detection
    edges = cv2.Canny(patch, 100, 200)
    edge_feature = np.sum(edges) / (patch.shape[0] * patch.shape[1])

    # Block-like patterns using DCT
    dct_features = dct(dct(patch_float, axis=0, norm='ortho'), axis=1, norm='ortho')
    dct_features = dct_features[:8, :8].flatten()

    # Histogram of pixel intensities
    hist = cv2.calcHist([patch], [0], None, [32], [0, 256])
    hist_features = hist.flatten() / np.sum(hist)

    # HOG for texture
    hog_features = hog(patch, orientations=9, pixels_per_cell=(16, 16), cells_per_block=(2, 2), feature_vector=True)

    # Fast Fourier Transform for frequency analysis
    f_transform = np.fft.fft2(patch_float)
    f_transform = np.abs(np.fft.fftshift(f_transform))
    f_features = f_transform[:8, :8].flatten()

    return np.concatenate([
        [edge_feature],
        dct_features,
        hist_features,
        hog_features,
        f_features
    ])

def process_patch(img, y, x, patch_size):
    patch = img[y:y+patch_size, x:x+patch_size]
    return extract_patch_features(patch)

def extract_image_features(image_path, patch_size=128, stride=32):
    img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    if img is None:
        print(f"Failed to load image: {image_path}")
        return None

    patch_features = []
    patch_coords = [(y, x) for y in range(0, img.shape[0] - patch_size + 1, stride)
                            for x in range(0, img.shape[1] - patch_size + 1, stride)]

    with ThreadPoolExecutor() as executor:
        patch_features = list(executor.map(lambda coords: process_patch(img, coords[0], coords[1], patch_size), patch_coords))

    if not patch_features:
        return None

    patch_features = np.array(patch_features)

    # Aggregate patch features
    mean_features = np.mean(patch_features, axis=0)
    max_features = np.max(patch_features, axis=0)
    std_features = np.std(patch_features, axis=0)

    # High pixelation ratio (using edge and DCT features)
    edge_scores = patch_features[:, 0]
    dct_scores = np.sum(patch_features[:, 1:65], axis=1)
    high_edge_ratio = np.mean(edge_scores > np.median(edge_scores) * 1.5)
    high_dct_ratio = np.mean(dct_scores > np.median(dct_scores) * 1.5)

    return np.concatenate([
        mean_features, max_features, std_features,
        [high_edge_ratio, high_dct_ratio]
    ])

In [None]:
# def build_dataset(data_dir):
#     class_mapping = {'nonpixelated_img': 0, 'pixelated_img': 1}
#     data = []
#     labels = []

#     for class_name in os.listdir(data_dir):
#         class_dir = os.path.join(data_dir, class_name)
#         if not os.path.isdir(class_dir):
#             continue

#         class_label = class_mapping.get(class_name)
#         if class_label is None:
#             print(f"Skipping unknown class: {class_name}")
#             continue

#         for img_file in tqdm(os.listdir(class_dir), desc=f"Processing {class_name}"):
#             img_path = os.path.join(class_dir, img_file)
#             features = extract_image_features(img_path)
#             if features is not None:
#                 data.append(features)
#                 labels.append(class_label)

#     return np.array(data), np.array(labels)

# # Example usage
# if __name__ == "__main__":

#     print("Building dataset...")
#     X, y = build_dataset(r'C:\Users\KIIT\Desktop\Projects\dataset')

#     if len(X) == 0:
#         print("No valid data extracted. Please check your dataset.")
#     else:
#         print(f"Dataset built: {len(X)} samples, {X.shape[1]} features")

In [2]:
import joblib
import numpy as np
from sklearn.experimental import enable_hist_gradient_boosting  # noqa
from sklearn.ensemble import HistGradientBoostingClassifier
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.metrics import accuracy_score

# Load the dataset using joblib
file_path = r'C:\Users\KIIT\Desktop\Projects\model\dataset.pkl'
X, y = joblib.load(file_path)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)



In [3]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.ensemble import RandomForestClassifier

# Train a basic model
clf = RandomForestClassifier(n_estimators = 200, min_samples_split = 2, min_samples_leaf = 1, max_depth = 30, random_state=42)
clf.fit(X_train, y_train)

# Evaluate the model
y_pred = clf.predict(X_test)
print("Classification Report:")
print(classification_report(y_test, y_pred, target_names=['Not Pixelated', 'Pixelated']))
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

Classification Report:
               precision    recall  f1-score   support

Not Pixelated       0.97      0.99      0.98       715
    Pixelated       0.99      0.97      0.98       956

     accuracy                           0.98      1671
    macro avg       0.98      0.98      0.98      1671
 weighted avg       0.98      0.98      0.98      1671

Confusion Matrix:
[[705  10]
 [ 25 931]]


In [4]:
from sklearn.experimental import enable_hist_gradient_boosting  # noqa
from sklearn.ensemble import HistGradientBoostingClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
import numpy as np

# Create and fit the HistGradientBoostingClassifier
hclf = HistGradientBoostingClassifier(random_state=42)
hclf.fit(X_train, y_train)

# Evaluate the model
score = hclf.score(X_test, y_test)
print(f'Model accuracy: {score:.2f}')

y_pred = hclf.predict(X_test)
print("Classification Report:")
print(classification_report(y_test, y_pred, target_names=['Not Pixelated', 'Pixelated']))
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

Model accuracy: 0.99
Classification Report:
               precision    recall  f1-score   support

Not Pixelated       0.98      0.99      0.99       715
    Pixelated       0.99      0.99      0.99       956

     accuracy                           0.99      1671
    macro avg       0.99      0.99      0.99      1671
 weighted avg       0.99      0.99      0.99      1671

Confusion Matrix:
[[709   6]
 [ 13 943]]


In [5]:
import xgboost as xgb
xgbm = xgb.XGBClassifier(n_estimators= 200, max_depth= 4, learning_rate= 0.17428234843001944, subsample= 0.6129965942712094, colsample_bytree= 0.8184848884905318, gamma= 0.05718479848470478)
xgbm.fit(X_train, y_train)

# Evaluate the model
score = xgbm.score(X_test, y_test)
print(f'Model accuracy: {score:.2f}')

y_pred = xgbm.predict(X_test)

print("Classification Report:")
print(classification_report(y_test, y_pred))

Model accuracy: 0.99
Classification Report:
              precision    recall  f1-score   support

           0       0.99      0.99      0.99       715
           1       0.99      0.99      0.99       956

    accuracy                           0.99      1671
   macro avg       0.99      0.99      0.99      1671
weighted avg       0.99      0.99      0.99      1671



In [6]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.ensemble import StackingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.neural_network import MLPClassifier
from sklearn.experimental import enable_hist_gradient_boosting  # noqa
from sklearn.ensemble import HistGradientBoostingClassifier

# Define the Stacking Classifier
estimators = [
    ('rf', RandomForestClassifier(n_estimators = 200, min_samples_split = 2, min_samples_leaf = 1, max_depth = 30, random_state=42)),
    ('xgb', xgb.XGBClassifier(n_estimators= 200, max_depth= 4, learning_rate= 0.17428234843001944, subsample= 0.6129965942712094, colsample_bytree= 0.8184848884905318, gamma= 0.05718479848470478)),
    ('hclf', HistGradientBoostingClassifier(random_state=42))
]

stack = StackingClassifier(
    estimators=estimators,
    final_estimator=MLPClassifier(random_state=42),
    cv=5
)

# Train the model
stack.fit(X_train, y_train)

# Predict and evaluate
y_pred = stack.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy of Stacking Classifier: {accuracy:.2f}')
print("Classification Report:")
print(classification_report(y_test, y_pred))

Accuracy of Stacking Classifier: 0.99
Classification Report:
              precision    recall  f1-score   support

           0       0.99      0.99      0.99       715
           1       0.99      0.99      0.99       956

    accuracy                           0.99      1671
   macro avg       0.99      0.99      0.99      1671
weighted avg       0.99      0.99      0.99      1671



In [8]:
def predict_image(image_path):
    # Extract features from the new imagenp.
    features = extract_image_features(image_path)
    if features is None:
        print("Failed to extract features from the image.")
        return None
    
    # Reshape features to match the expected input format of the model
    features = features.reshape(1, -1)
    
    # Predict using the loaded model
    prediction = clf.predict(features)
    probability = clf.predict_proba(features)[0][1]

    hprediction = hclf.predict(features)
    hprobability = hclf.predict_proba(features)[0][1]

    xprediction = xgbm.predict(features)
    xprobability = xgbm.predict_proba(features)[0][1]

    sprediction = stack.predict(features)
    sprobability = stack.predict_proba(features)[0][1]
    
    
    return prediction[0], probability, hprediction[0], hprobability, xprediction[0], xprobability, sprediction[0], sprobability

In [9]:
# Example usage
if __name__ == "__main__":
    new_image_path = r"C:\Users\KIIT\Desktop\Projects\backiee-123754.jpg"
    
    prediction, probability, hprediction, hprobability, xprediction, xprobability, sprediction, sprobability = predict_image(new_image_path)
    
    print(f"CLF Prediction: {'Pixelated' if prediction == 1 else 'Not Pixelated'}")
    print(f"HCLF Prediction: {'Pixelated' if hprediction == 1 else 'Not Pixelated'}")
    print(f"XGB Prediction: {'Pixelated' if xprediction == 1 else 'Not Pixelated'}")
    print(f"Stack Prediction: {'Pixelated' if sprediction == 1 else 'Not Pixelated'}")

CLF Prediction: Not Pixelated
HCLF Prediction: Not Pixelated
XGB Prediction: Not Pixelated
Stack Prediction: Not Pixelated


In [48]:
joblib.dump((X, y), os.path.join(r'C:\Users\KIIT\Desktop\Projects\model', "dataset.pkl"))
print("Dataset saved.")

Dataset saved.


In [47]:
joblib.dump(stack, os.path.join(r'C:\Users\KIIT\Desktop\Projects\model', 'pixelation_model.pkl'))
print("Model saved.")

Model saved.


In [11]:
import joblib
import os

# Specify the path to the saved model
model_path = os.path.join(r'C:\Users\KIIT\Desktop\Projects\model', 'pixelation_model.pkl')

# Load the model from the file
loaded_model = joblib.load(model_path)
print("Model loaded successfully.")

Model loaded successfully.


In [15]:
features = extract_image_features(r'C:\Users\KIIT\Desktop\Projects\WhatsApp Image 2024-06-05 at 10.58.04_4627b58c.jpg')
features = features.reshape(1, -1)

In [16]:
# Make predictions with the loaded model
predictions = loaded_model.predict(features)
print("Predictions:", predictions)

Predictions: [1]


In [11]:
# from sklearn.model_selection import RandomizedSearchCV
# # Define the parameter grid
# param_grid = {
#     'n_estimators': [50, 100, 200],
#     'max_depth': [3, 4, 5, 6],
#     'learning_rate': [0.01, 0.05, 0.1, 0.2],
#     'subsample': [0.6, 0.8, 1.0],
#     'colsample_bytree': [0.6, 0.8, 1.0],
#     'gamma': [0, 0.1, 0.2, 0.3]
# }

# # Set up the grid search
# random_search = RandomizedSearchCV(estimator=xgbm, param_distributions=param_grid, n_iter=50, cv=3, scoring='accuracy', verbose=1, n_jobs=-1, random_state=42)

# # Fit the model
# random_search.fit(X_train, y_train)

# # Get the best parameters and the best score
# best_params = random_search.best_params_
# best_score = random_search.best_score_

# print("Best parameters found: ", best_params)
# print("Best accuracy found: ", best_score)

# # Train the model with the best parameters
# best_xgb = random_search.best_estimator_
# best_xgb.fit(X_train, y_train)

# # Make predictions on the test set
# y_pred = best_xgb.predict(X_test)

# # Evaluate the model
# accuracy = accuracy_score(y_test, y_pred)
# print(f"Test set accuracy: {accuracy:.2%}")

Fitting 3 folds for each of 50 candidates, totalling 150 fits
Best parameters found:  {'subsample': 0.8, 'n_estimators': 200, 'max_depth': 4, 'learning_rate': 0.2, 'gamma': 0.1, 'colsample_bytree': 0.8}
Best accuracy found:  0.9844304929711756
Test set accuracy: 98.76%


In [None]:
# import optuna


# # Define the objective function for Optuna
# def objective(trial):
#     param = {
#         'n_estimators': trial.suggest_int('n_estimators', 50, 200),
#         'max_depth': trial.suggest_int('max_depth', 3, 6),
#         'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.2),
#         'subsample': trial.suggest_uniform('subsample', 0.6, 1.0),
#         'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.6, 1.0),
#         'gamma': trial.suggest_uniform('gamma', 0, 0.3)
#     }
    
#     xgb_clf = xgb.XGBClassifier(**param, use_label_encoder=False, eval_metric='logloss')
#     xgb_clf.fit(X_train, y_train)
    
#     preds = xgb_clf.predict(X_test)
#     accuracy = accuracy_score(y_test, preds)
    
#     return accuracy

# # Create a study object and optimize the objective function
# study = optuna.create_study(direction='maximize')
# study.optimize(objective, n_trials=50)

# # Print the best parameters and the best score
# best_params = study.best_params
# best_score = study.best_value

# print("Best parameters found: ", best_params)
# print("Best accuracy found: ", best_score)

# # Train the model with the best parameters
# best_xgb = xgb.XGBClassifier(**best_params, use_label_encoder=False, eval_metric='logloss')
# best_xgb.fit(X_train, y_train)

# # Make predictions on the test set
# y_pred = best_xgb.predict(X_test)

# # Evaluate the model
# accuracy = accuracy_score(y_test, y_pred)
# print(f"Test set accuracy: {accuracy:.2%}")


In [10]:
# from sklearn.model_selection import GridSearchCV

# param_grid = {
#     'learning_rate': [0.01, 0.1, 0.2],
#     'max_iter': [100, 200, 300],
#     'max_leaf_nodes': [31, 63, 127],
#     'max_depth': [None, 3, 5, 7],
#     'min_samples_leaf': [20, 50, 100],
#     'l2_regularization': [0, 0.1, 1],
#     'max_bins': [255, 512]
# }


# # Initialize Grid Search
# grid_search = GridSearchCV(estimator=hclf, param_grid=param_grid, cv=5, scoring='accuracy', n_jobs=-1)

# # Fit Grid Search
# grid_search.fit(X_train, y_train)

# # Print the best parameters and the best score
# print("Best parameters found: ", grid_search.best_params_)
# print("Best cross-validation accuracy: {:.2f}".format(grid_search.best_score_))

# # Evaluate on the test set
# best_model = grid_search.best_estimator_
# y_pred = best_model.predict(X_test)
# accuracy = accuracy_score(y_test, y_pred)aa
# print("Test set accuracy: {:.2f}".format(accuracy))