In [1]:
!pip install kagglehub



In [2]:
import os

# Ensure the environment variable for Kaggle is set
os.environ["KAGGLE_CONFIG_DIR"] = os.path.expanduser("~/.kaggle")

# Confirm the Kaggle file is accessible
kaggle_path = os.path.expanduser("~/.kaggle/kaggle.json")
if os.path.exists(kaggle_path):
    print("Kaggle API key is correctly placed.")
else:
    print("Kaggle API key is missing. Please check the file path.")


Kaggle API key is correctly placed.


In [3]:
!pip install torch torchvision




In [8]:
import kagglehub

# Download the dataset
path = kagglehub.dataset_download("ananysrivastava/birde-drone")

print("Path to dataset files:", path)

Path to dataset files: C:\Users\Tushar Jain\.cache\kagglehub\datasets\ananysrivastava\birde-drone\versions\1


In [10]:
import os
import numpy as np
import joblib
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import StackingClassifier
from sklearn.metrics import accuracy_score
from torchvision import transforms, models
from torch.utils.data import DataLoader, Dataset
import torch
import torch.nn as nn
from PIL import Image
from tqdm import tqdm
import shutil
from IPython.display import FileLink

# Custom Dataset class
class CustomDataset(Dataset):
    def __init__(self, image_paths, labels, transform=None):  # Corrected __init__
        self.image_paths = image_paths
        self.labels = labels
        self.transform = transform

    def __len__(self):  # Corrected __len__
        return len(self.image_paths)

    def __getitem__(self, idx):  # Corrected __getitem__
        image = Image.open(self.image_paths[idx]).convert("RGB")
        label = self.labels[idx]

        if self.transform:
            image = self.transform(image)

        return image, label

# Transformations for data preprocessing
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Set paths and labels
base_dir = r"C:\Users\Tushar Jain\.cache\kagglehub\datasets\ananysrivastava\birde-drone\versions\1"
image_dirs = {
    'bird': [os.path.join(base_dir, 'bird_results', 'spectrograms'),
             os.path.join(base_dir, 'bird_results', 'cepstrograms'),
             os.path.join(base_dir, 'bird_results', 'cvds')],
    'drone+bird': [os.path.join(base_dir, 'bird+drone_results', 'spectrograms'),
                   os.path.join(base_dir, 'bird+drone_results', 'cepstrograms'),
                   os.path.join(base_dir, 'bird+drone_results', 'cvds')],
    'drone': [os.path.join(base_dir, 'drone2 results', 'spectrograms'),
              os.path.join(base_dir, 'drone2 results', 'cepstrograms'),
              os.path.join(base_dir, 'drone2 results', 'cvds')]
}

image_paths, labels = [], []
for label, (category, dirs) in enumerate(image_dirs.items()):
    for dir in dirs:
        if os.path.exists(dir):
            for filename in os.listdir(dir):
                if filename.endswith(('.png', '.jpg')):
                    image_paths.append(os.path.join(dir, filename))
                    labels.append(label)

# Split data
train_paths, val_paths, train_labels, val_labels = train_test_split(image_paths, labels, test_size=0.2, stratify=labels)

# Loaders
train_dataset = CustomDataset(train_paths, train_labels, transform=transform)
val_dataset = CustomDataset(val_paths, val_labels, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

# ResNet-50 for feature extraction and training
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
resnet = models.resnet50(pretrained=True)
num_classes = len(image_dirs)
resnet.fc = nn.Linear(resnet.fc.in_features, num_classes)
resnet = resnet.to(device)

# Loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(resnet.parameters(), lr=0.001)

# Training function
def train_resnet(model, dataloader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for images, labels in tqdm(dataloader, desc="Training"):
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * images.size(0)
        _, predicted = outputs.max(1)
        correct += predicted.eq(labels).sum().item()
        total += labels.size(0)
    epoch_loss = running_loss / total
    epoch_accuracy = correct / total * 100
    return epoch_loss, epoch_accuracy

# Validation function
def validate_resnet(model, dataloader, criterion, device):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in tqdm(dataloader, desc="Validation"):
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            running_loss += loss.item() * images.size(0)
            _, predicted = outputs.max(1)
            correct += predicted.eq(labels).sum().item()
            total += labels.size(0)
    epoch_loss = running_loss / total
    epoch_accuracy = correct / total * 100
    return epoch_loss, epoch_accuracy

# Train ResNet-50
num_epochs = 10
best_val_accuracy = 0.0
for epoch in range(num_epochs):
    print(f"Epoch {epoch+1}/{num_epochs}")
    train_loss, train_accuracy = train_resnet(resnet, train_loader, criterion, optimizer, device)
    val_loss, val_accuracy = validate_resnet(resnet, val_loader, criterion, device)
    print(f"Train Loss: {train_loss:.4f}, Train Accuracy: {train_accuracy:.2f}%")
    print(f"Val Loss: {val_loss:.4f}, Val Accuracy: {val_accuracy:.2f}%")
    if val_accuracy > best_val_accuracy:
        best_val_accuracy = val_accuracy
        torch.save(resnet.state_dict(), "resnet50_best_model.pth")
        print("Best model saved.")

# Feature extraction
resnet.fc = nn.Identity()  # Remove classification head for feature extraction
resnet.eval()
def extract_features(model, dataloader):
    features, labels = [], []
    with torch.no_grad():
        for images, targets in dataloader:
            images = images.to(device)
            outputs = model(images)
            features.append(outputs.cpu().numpy())
            labels.extend(targets.numpy())
    return np.vstack(features), np.array(labels)

train_features, train_labels = extract_features(resnet, train_loader)
val_features, val_labels = extract_features(resnet, val_loader)




Epoch 1/10


Training: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████| 122/122 [27:22<00:00, 13.46s/it]
Validation: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████| 31/31 [02:19<00:00,  4.51s/it]


Train Loss: 0.3664, Train Accuracy: 79.64%
Val Loss: 1.0077, Val Accuracy: 70.49%
Best model saved.
Epoch 2/10


Training: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████| 122/122 [25:53<00:00, 12.73s/it]
Validation: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████| 31/31 [02:49<00:00,  5.46s/it]


Train Loss: 0.2700, Train Accuracy: 84.55%
Val Loss: 0.4000, Val Accuracy: 84.11%
Best model saved.
Epoch 3/10


Training: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████| 122/122 [28:00<00:00, 13.77s/it]
Validation: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████| 31/31 [02:38<00:00,  5.12s/it]


Train Loss: 0.1981, Train Accuracy: 89.22%
Val Loss: 0.1587, Val Accuracy: 90.20%
Best model saved.
Epoch 4/10


Training: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████| 122/122 [24:52<00:00, 12.23s/it]
Validation: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████| 31/31 [02:26<00:00,  4.73s/it]


Train Loss: 0.3807, Train Accuracy: 79.39%
Val Loss: 0.5420, Val Accuracy: 78.22%
Epoch 5/10


Training: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████| 122/122 [26:17<00:00, 12.93s/it]
Validation: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████| 31/31 [02:27<00:00,  4.74s/it]


Train Loss: 0.2670, Train Accuracy: 82.66%
Val Loss: 0.2682, Val Accuracy: 81.84%
Epoch 6/10


Training: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████| 122/122 [23:28<00:00, 11.55s/it]
Validation: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████| 31/31 [02:19<00:00,  4.48s/it]


Train Loss: 0.2582, Train Accuracy: 82.46%
Val Loss: 0.2373, Val Accuracy: 83.38%
Epoch 7/10


Training: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████| 122/122 [29:06<00:00, 14.32s/it]
Validation: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████| 31/31 [02:17<00:00,  4.44s/it]


Train Loss: 0.2447, Train Accuracy: 83.33%
Val Loss: 0.2374, Val Accuracy: 83.38%
Epoch 8/10


Training: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████| 122/122 [25:11<00:00, 12.39s/it]
Validation: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████| 31/31 [02:09<00:00,  4.18s/it]


Train Loss: 0.2369, Train Accuracy: 83.57%
Val Loss: 0.6261, Val Accuracy: 82.46%
Epoch 9/10


Training: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████| 122/122 [21:43<00:00, 10.68s/it]
Validation: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████| 31/31 [02:18<00:00,  4.47s/it]


Train Loss: 0.2216, Train Accuracy: 86.79%
Val Loss: 0.2785, Val Accuracy: 86.69%
Epoch 10/10


Training: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████| 122/122 [28:13<00:00, 13.88s/it]
Validation: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████| 31/31 [02:17<00:00,  4.42s/it]


Train Loss: 0.2142, Train Accuracy: 88.47%
Val Loss: 0.1919, Val Accuracy: 90.71%
Best model saved.


In [11]:
# PCA for dimensionality reduction
pca = PCA(n_components=50)
train_features_pca = pca.fit_transform(train_features)
val_features_pca = pca.transform(val_features)
joblib.dump(pca, 'pca_model.pkl')

['pca_model.pkl']

In [12]:
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

# Train models
svm = SVC(kernel='linear', probability=True)
knn = KNeighborsClassifier(n_neighbors=5)
rf = RandomForestClassifier(n_estimators=100, random_state=42)
lr = LogisticRegression(max_iter=1000)

# Fit the models on PCA-transformed training features
svm.fit(train_features_pca, train_labels)
knn.fit(train_features_pca, train_labels)
rf.fit(train_features_pca, train_labels)
lr.fit(train_features_pca, train_labels)

# Evaluate the models on the PCA-transformed validation features
svm_preds = svm.predict(val_features_pca)
knn_preds = knn.predict(val_features_pca)
rf_preds = rf.predict(val_features_pca)
lr_preds = lr.predict(val_features_pca)

# Calculate accuracies
svm_accuracy = accuracy_score(val_labels, svm_preds)
knn_accuracy = accuracy_score(val_labels, knn_preds)
rf_accuracy = accuracy_score(val_labels, rf_preds)
lr_accuracy = accuracy_score(val_labels, lr_preds)

# Print the accuracies
print(f"SVM Accuracy: {svm_accuracy * 100:.2f}%")
print(f"KNN Accuracy: {knn_accuracy * 100:.2f}%")
print(f"Random Forest Accuracy: {rf_accuracy * 100:.2f}%")
print(f"Logistic Regression Accuracy: {lr_accuracy * 100:.2f}%")


SVM Accuracy: 93.09%
KNN Accuracy: 95.67%
Random Forest Accuracy: 96.49%
Logistic Regression Accuracy: 94.01%


In [13]:
import joblib
joblib.dump(svm, 'svm_model.pkl')
joblib.dump(knn, 'knn_model.pkl')
joblib.dump(rf, 'rf_model.pkl')
joblib.dump(lr, 'lr_model.pkl')


['lr_model.pkl']

In [14]:
import numpy as np
from sklearn.metrics import accuracy_score

# Get predictions from each model
svm_preds = svm.predict(val_features_pca)
knn_preds = knn.predict(val_features_pca)
rf_preds = rf.predict(val_features_pca)
lr_preds = lr.predict(val_features_pca)

# Stack predictions
all_preds = np.array([svm_preds, knn_preds, rf_preds, lr_preds])

# Perform majority voting
ensemble_preds = np.apply_along_axis(lambda x: np.bincount(x).argmax(), axis=0, arr=all_preds)

# Evaluate ensemble accuracy
ensemble_accuracy = accuracy_score(val_labels, ensemble_preds)
print(f"Ensemble Accuracy (Majority Voting): {ensemble_accuracy * 100:.2f}%")


Ensemble Accuracy (Majority Voting): 95.56%


In [15]:
# Define weights based on validation accuracy
weights = [svm_accuracy, knn_accuracy, rf_accuracy, lr_accuracy]

# Get probabilities for weighted voting
svm_probs = svm.predict_proba(val_features_pca)
knn_probs = knn.predict_proba(val_features_pca)
rf_probs = rf.predict_proba(val_features_pca)
lr_probs = lr.predict_proba(val_features_pca)

# Weighted sum of probabilities
ensemble_probs = (
    weights[0] * svm_probs +
    weights[1] * knn_probs +
    weights[2] * rf_probs +
    weights[3] * lr_probs
)

# Final predictions from the highest probability class
ensemble_preds = np.argmax(ensemble_probs, axis=1)

# Evaluate ensemble accuracy
ensemble_accuracy = accuracy_score(val_labels, ensemble_preds)
print(f"Ensemble Accuracy (Weighted Voting): {ensemble_accuracy * 100:.2f}%")


Ensemble Accuracy (Weighted Voting): 95.98%


In [16]:
from sklearn.ensemble import StackingClassifier

# Define the base models
base_estimators = [
    ('svm', svm),
    ('knn', knn),
    ('rf', rf),
    ('lr', lr)
]

# Define the meta-model
stack_model = StackingClassifier(
    estimators=base_estimators,
    final_estimator=LogisticRegression(max_iter=1000),
    cv=5
)

# Train the stacking model
stack_model.fit(train_features_pca, train_labels)

# Evaluate stacking model accuracy
stack_accuracy = stack_model.score(val_features_pca, val_labels)
print(f"Stacking Accuracy: {stack_accuracy * 100:.2f}%")


Stacking Accuracy: 96.18%
