In [8]:
from roboflow import Roboflow
rf = Roboflow(api_key="vk7D7FyOEOnxeUELRcfx")
project = rf.workspace("railway-track-kwxud").project("nrcan-final-water-fix")
version = project.version(18)
dataset = version.download("yolov5")

loading Roboflow workspace...
loading Roboflow project...


Downloading Dataset Version Zip in NRCan-final-water-fix-18 to yolov5pytorch:: 100%|██████████| 487140/487140 [00:51<00:00, 9385.74it/s] 





Extracting Dataset Version Zip to NRCan-final-water-fix-18 in yolov5pytorch:: 100%|██████████| 8296/8296 [00:01<00:00, 5649.69it/s]


### Data generation for ResNet

In [12]:
import os
import shutil

# Define the directories
labels_dir = '/Users/janekkorczynski/Desktop/BizHack/NRCan-final-water-fix-18/test/labels'
images_dir = '/Users/janekkorczynski/Desktop/BizHack/NRCan-final-water-fix-18/test/images'
non_destructive_dir = '/Users/janekkorczynski/Desktop/BizHack/DataRes/Test/Destructive'

# Ensure the non_destructive directory exists
if not os.path.exists(non_destructive_dir):
    os.makedirs(non_destructive_dir)

# Loop through the label files
for label_file in os.listdir(labels_dir):
    try:
        # Open and read the label file in binary mode
        with open(os.path.join(labels_dir, label_file), 'rb') as file:
            first_line = file.readline().decode('utf-8').strip()
            if first_line.startswith('0') or first_line.startswith('1'):
                # Corresponding image file name (assuming it's .jpg)
                image_file = label_file.replace('.txt', '.jpg')
                # Check if the corresponding image file exists
                if os.path.exists(os.path.join(images_dir, image_file)):
                    # Move the image file to the non_destructive folder
                    shutil.move(os.path.join(images_dir, image_file), os.path.join(non_destructive_dir, image_file))
                else:
                    print(f"Image file {image_file} not found for label {label_file}.")
    except UnicodeDecodeError:
        print(f"UnicodeDecodeError: Skipping file {label_file} due to encoding issues.")
    except IOError as e:
        print(f"IOError: Skipping file {label_file} due to error: {e}")

print("Non-destructive images moved successfully.")

Non-destructive images moved successfully.


### RestNet Model 

In [3]:
import os
import numpy as np
from tensorflow.keras.applications.resnet50 import ResNet50, preprocess_input
from tensorflow.keras.preprocessing.image import img_to_array, load_img
import joblib

# Define the classes exactly as they appear in your directories
classes = ["Defective", "Non defective"]

# Load the ResNet50 model without the top layer
base_model = ResNet50(weights='imagenet', include_top=False, pooling='avg')

# Function to extract features from images in a directory
def extract_features(directory):
    features = []
    labels = []
    class_counts = {class_name: 0 for class_name in classes}  # For counting samples per class
    for label in os.listdir(directory):
        if label not in classes:
            print(f"Skipping {label} as it is not in the defined classes.")
            continue
        label_dir = os.path.join(directory, label)
        if not os.path.isdir(label_dir):
            print(f"Skipping {label_dir} as it is not a directory.")
            continue
        for filename in os.listdir(label_dir):
            img_path = os.path.join(label_dir, filename)
            if not filename.lower().endswith(('.png', '.jpg', '.jpeg')):
                print(f"Skipping {img_path} as it is not an image file.")
                continue
            print(f"Processing {img_path}...")
            img = load_img(img_path, target_size=(224, 224))
            img_array = img_to_array(img)
            img_array = np.expand_dims(img_array, axis=0)
            img_array = preprocess_input(img_array)
            feature = base_model.predict(img_array)
            features.append(feature.flatten())
            labels.append(classes.index(label))
            class_counts[label] += 1  # Increment class count
    print(f"Class counts in {directory}: {class_counts}")  # Print class counts
    return np.array(features), np.array(labels)

# Directories containing your train and test datasets
train_dir = '/Users/janekkorczynski/Desktop/BizHack/DataRes/Train'
test_dir = '/Users/janekkorczynski/Desktop/BizHack/DataRes/Test'

# Extract features from the training set
train_features, train_labels = extract_features(train_dir)

# Extract features from the test set
test_features, test_labels = extract_features(test_dir)

# Check if there are at least 2 classes in the training data
unique_classes = np.unique(train_labels)
if len(unique_classes) < 2:
    raise ValueError("Training data must contain at least two classes.")

# Train a logistic regression classifier on the extracted features
from sklearn.linear_model import LogisticRegression

clf = LogisticRegression(max_iter=1000)
clf.fit(train_features, train_labels)

# Save the trained model
model_filename = 'logistic_regression_model.joblib'
joblib.dump(clf, model_filename)
print(f"Model saved as {model_filename}")

# Make predictions on the test set
predictions = clf.predict(test_features)

# Print the classification report
from sklearn.metrics import classification_report

print(classification_report(test_labels, predictions, target_names=classes))


Processing /Users/janekkorczynski/Desktop/BizHack/DataRes/Train/Defective/DJI_0907_png.rf.437d9a6c40c5bc9d463b2c2c5d8e1c7e.jpg...
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 592ms/step
Processing /Users/janekkorczynski/Desktop/BizHack/DataRes/Train/Defective/DJI_0991_rotated_jpg.rf.4b764dbb5143c594b485d5a077fce4d5.jpg...
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step
Processing /Users/janekkorczynski/Desktop/BizHack/DataRes/Train/Defective/DJI_0252_JPG.rf.9a2545d7fd0d092ce419c0fb72632aa4.jpg...
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step
Processing /Users/janekkorczynski/Desktop/BizHack/DataRes/Train/Defective/DJI_0809_JPG.rf.f06398726b9d6b3caf163929ca71e553.jpg...
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step
Processing /Users/janekkorczynski/Desktop/BizHack/DataRes/Train/Defective/DJI_0131_png.rf.25d18e1f3fb0b5f4a65cd91719012cda.jpg...
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [

In [16]:
!pip3 install opencv-python



In [4]:
import cv2
from tensorflow.keras.preprocessing.image import img_to_array, load_img

def extract_feature(img_path):
    img = load_img(img_path, target_size=(224, 224))
    img_array = img_to_array(img)
    img_array = np.expand_dims(img_array, axis=0)
    img_array = preprocess_input(img_array)
    feature = base_model.predict(img_array)
    return feature.flatten()

# Function to predict and overlay result on image
def predict_and_display(img_path):
    feature = extract_feature(img_path)
    prediction = clf.predict([feature])
    predicted_class = classes[prediction[0]]

    # Load the image using OpenCV
    img = cv2.imread(img_path)
    if img is None:
        print(f"Error loading image {img_path}")
        return

    # Put the prediction text on the image
    font = cv2.FONT_HERSHEY_SIMPLEX
    cv2.putText(img, predicted_class, (10, 30), font, 1, (0, 255, 0), 2, cv2.LINE_AA)

    # Display the image
    cv2.imshow('Prediction', img)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

# Path to the image you want to predict
img_path = '/Users/janekkorczynski/Desktop/BizHack/NR18Can-final-water-fix-1818/valid/images/DJI_0005_JPG.rf.b10f430cb4b4e9878f15c96995465e25.jpg'
predict_and_display(img_path)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 53ms/step
