In [27]:
import os
import numpy as np
from tensorflow.keras.applications.resnet50 import ResNet50, preprocess_input
from tensorflow.keras.preprocessing.image import img_to_array, load_img

# Define the classes exactly as they appear in your directories
classes = ["Defective", "Non defective"]

# Load the ResNet50 model without the top layer
base_model = ResNet50(weights='imagenet', include_top=False, pooling='avg')

# Function to extract features from images in a directory
def extract_features(directory):
    features = []
    labels = []
    class_counts = {class_name: 0 for class_name in classes}  # For counting samples per class
    for label in os.listdir(directory):
        if label not in classes:
            print(f"Skipping {label} as it is not in the defined classes.")
            continue
        label_dir = os.path.join(directory, label)
        if not os.path.isdir(label_dir):
            print(f"Skipping {label_dir} as it is not a directory.")
            continue
        for filename in os.listdir(label_dir):
            img_path = os.path.join(label_dir, filename)
            if not filename.lower().endswith(('.png', '.jpg', '.jpeg')):
                print(f"Skipping {img_path} as it is not an image file.")
                continue
            print(f"Processing {img_path}...")
            img = load_img(img_path, target_size=(224, 224))
            img_array = img_to_array(img)
            img_array = np.expand_dims(img_array, axis=0)
            img_array = preprocess_input(img_array)
            feature = base_model.predict(img_array)
            features.append(feature.flatten())
            labels.append(classes.index(label))
            class_counts[label] += 1  # Increment class count
    print(f"Class counts in {directory}: {class_counts}")  # Print class counts
    return np.array(features), np.array(labels)

# Directories containing your train and test datasets
train_dir = 'Railway Track fault Detection Updated/Train'
test_dir = 'Railway Track fault Detection Updated/Test'

# Extract features from the training set
train_features, train_labels = extract_features(train_dir)

# Extract features from the test set
test_features, test_labels = extract_features(test_dir)

# Check if there are at least 2 classes in the training data
unique_classes = np.unique(train_labels)
if len(unique_classes) < 2:
    raise ValueError("Training data must contain at least two classes.")

# Train a logistic regression classifier on the extracted features
from sklearn.linear_model import LogisticRegression

clf = LogisticRegression(max_iter=1000)
clf.fit(train_features, train_labels)

# Make predictions on the test set
predictions = clf.predict(test_features)

# Print the classification report
from sklearn.metrics import classification_report

print(classification_report(test_labels, predictions, target_names=classes))


Processing Railway Track fault Detection Updated/Train/Defective/IMG_20201114_102340.jpg...
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 672ms/step
Processing Railway Track fault Detection Updated/Train/Defective/download (5).jpeg...
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 84ms/step
Processing Railway Track fault Detection Updated/Train/Defective/IMG_20201211_113650.jpg...
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 60ms/step
Processing Railway Track fault Detection Updated/Train/Defective/images (2).jpeg...
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 59ms/step
Processing Railway Track fault Detection Updated/Train/Defective/IMG_20201114_102022.jpg...
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 61ms/step
Processing Railway Track fault Detection Updated/Train/Defective/E116_8996 (1).jpg...
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 62ms/step
Processing Railway Track fault Dete

In [21]:
!pip3 install scikit-learn

Collecting scikit-learn
  Downloading scikit_learn-1.5.1-cp39-cp39-macosx_12_0_arm64.whl.metadata (12 kB)
Collecting joblib>=1.2.0 (from scikit-learn)
  Downloading joblib-1.4.2-py3-none-any.whl.metadata (5.4 kB)
Collecting threadpoolctl>=3.1.0 (from scikit-learn)
  Downloading threadpoolctl-3.5.0-py3-none-any.whl.metadata (13 kB)
Downloading scikit_learn-1.5.1-cp39-cp39-macosx_12_0_arm64.whl (11.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m11.0/11.0 MB[0m [31m14.6 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hDownloading joblib-1.4.2-py3-none-any.whl (301 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m301.8/301.8 kB[0m [31m12.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading threadpoolctl-3.5.0-py3-none-any.whl (18 kB)
Installing collected packages: threadpoolctl, joblib, scikit-learn
Successfully installed joblib-1.4.2 scikit-learn-1.5.1 threadpoolctl-3.5.0


In [22]:
from sklearn.linear_model import LogisticRegression

# Train a logistic regression classifier on the extracted features
clf = LogisticRegression(max_iter=1000)
clf.fit(train_features, train_labels)


ValueError: This solver needs samples of at least 2 classes in the data, but the data contains only one class: 0

In [5]:
train=pd.DataFrame()
train=train.assign(filename=Id)
train.head()

Unnamed: 0,filename
0,Railway Track fault Detection Updated/Train/De...
1,Railway Track fault Detection Updated/Train/De...
2,Railway Track fault Detection Updated/Train/De...
3,Railway Track fault Detection Updated/Train/De...
4,Railway Track fault Detection Updated/Train/De...


In [6]:
train['label'] = train['filename'].str.split('/').str[-2]
train.head()

Unnamed: 0,filename,label
0,Railway Track fault Detection Updated/Train/De...,Defective
1,Railway Track fault Detection Updated/Train/De...,Defective
2,Railway Track fault Detection Updated/Train/De...,Defective
3,Railway Track fault Detection Updated/Train/De...,Defective
4,Railway Track fault Detection Updated/Train/De...,Defective


In [7]:
Id=[]
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/Users/janekkorczynski/Desktop/BizHack/Railway Track fault Detection Updated/Test'):
    for filename in filenames:
        Id.append(os.path.join(dirname, filename))
Id[:5]

['/Users/janekkorczynski/Desktop/BizHack/Railway Track fault Detection Updated/Test/Defective/IMG_20201114_102222.jpg',
 '/Users/janekkorczynski/Desktop/BizHack/Railway Track fault Detection Updated/Test/Defective/IMG_20201114_103110.jpg',
 '/Users/janekkorczynski/Desktop/BizHack/Railway Track fault Detection Updated/Test/Defective/IMG_20201114_100209.jpg',
 '/Users/janekkorczynski/Desktop/BizHack/Railway Track fault Detection Updated/Test/Defective/IMG_20201211_121712_1.jpg',
 '/Users/janekkorczynski/Desktop/BizHack/Railway Track fault Detection Updated/Test/Defective/IMG_20201114_101200.jpg']

In [9]:
test=pd.DataFrame()
test=test.assign(filename=Id)
test.head()

Unnamed: 0,filename
0,/Users/janekkorczynski/Desktop/BizHack/Railway...
1,/Users/janekkorczynski/Desktop/BizHack/Railway...
2,/Users/janekkorczynski/Desktop/BizHack/Railway...
3,/Users/janekkorczynski/Desktop/BizHack/Railway...
4,/Users/janekkorczynski/Desktop/BizHack/Railway...


In [11]:
test['label'] = test['filename'].str.split('/').str[-2]
test.head()

Unnamed: 0,filename,label
0,/Users/janekkorczynski/Desktop/BizHack/Railway...,Defective
1,/Users/janekkorczynski/Desktop/BizHack/Railway...,Defective
2,/Users/janekkorczynski/Desktop/BizHack/Railway...,Defective
3,/Users/janekkorczynski/Desktop/BizHack/Railway...,Defective
4,/Users/janekkorczynski/Desktop/BizHack/Railway...,Defective


In [16]:
def preprocess_image(image_path):
    img = Image.open(image_path).convert('RGB')
    img = img.resize((224, 224), Image.Resampling.LANCZOS)  # Ensure the image is resized to 224x224
    img_array = np.array(img)
    img_array = img_array / 255.0  # Scale pixel values to [0, 1]
    img_array = np.expand_dims(img_array, axis=0)  # Add batch dimension
    return img_array

In [18]:
for i in test.filename:
    try:
        img_array = preprocess_image(i)
        inp = tf.constant(img_array, dtype='float32')
        class_scores = model(inp)[0].numpy()
        print(f"Class Scores: {class_scores}")  # Debugging line
        predicted_index = class_scores.argmax()
        print(f"Predicted Index: {predicted_index}")  # Debugging line
        result.append(classes[predicted_index])
    except Exception as e:
        print(f"Error processing {i}: {e}")
        result.append(None)

print(result[:5])

Class Scores: [9.85345760e-05 2.18745321e-04 6.93441616e-05 1.23739053e-04
 5.35045911e-05 2.86705355e-04 6.07287848e-06 4.62925309e-05
 1.00271609e-05 1.84908509e-04 8.37587810e-04 1.73958877e-04
 7.02802718e-05 9.45664942e-05 1.29914897e-05 4.09002423e-05
 1.52638735e-04 2.60501820e-05 6.19938583e-05 4.92227446e-05
 2.05041477e-04 3.14453314e-03 1.18800486e-03 3.06452857e-04
 9.42493425e-05 9.13533004e-05 3.44525935e-04 2.36174994e-04
 1.34139351e-04 2.07726989e-04 3.41671293e-05 2.75875296e-04
 6.79964432e-05 1.27232066e-04 2.68573203e-04 1.00653124e-05
 1.57586619e-04 7.89663318e-06 5.03031351e-03 2.80682016e-05
 4.32891102e-05 1.76617861e-04 4.37954848e-04 2.28046658e-04
 6.82673926e-05 1.04466286e-04 4.83335680e-05 2.79621338e-04
 4.34344438e-05 7.23145640e-05 2.40614580e-04 3.65173706e-04
 2.31422498e-04 4.35276830e-04 4.42282646e-04 2.17797569e-05
 7.30339671e-05 1.33888461e-05 1.50110718e-04 3.89261637e-03
 3.96036339e-04 3.97433432e-05 3.61905753e-04 4.37133625e-04
 1.0702107