In [7]:
import os
import cv2
import pytesseract
import pandas as pd
import re
from pathlib import Path
from datetime import datetime

# Function to extract temperature and time data from an image
def extract_data_from_image(image_path):
    # Read the image
    img = cv2.imread(image_path)
    
    # Convert to grayscale
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    
    # Apply threshold to get a binary image
    thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
    
    # Extract text using pytesseract
    text = pytesseract.image_to_string(thresh)
    
    # Create a copy of the image for visualization (optional)
    img_copy = img.copy()
    
    # Initialize variables
    max_temp = None
    min_temp = None
    center_temp = None
    timestamp = None
    
    # For thermal images, extract text from specific regions
    # Extract from top-left corner (for Cen and Max values)
    height, width = img.shape[:2]
    top_left_roi = img[0:int(height*0.15), 0:int(width*0.2)]
    top_left_text = pytesseract.image_to_string(top_left_roi)
    
    # Extract from bottom of image (for timestamp)
    bottom_roi = img[int(height*0.85):height, 0:int(width*0.5)]
    bottom_text = pytesseract.image_to_string(bottom_roi)
    
    # Extract from right side (for min value)
    right_roi = img[int(height*0.7):height, int(width*0.8):width]
    right_text = pytesseract.image_to_string(right_roi)
    
    # Combine all extracted text
    all_text = text + " " + top_left_text + " " + bottom_text + " " + right_text
    
    # Search for center temperature (labeled as "Cen")
    cen_temp_match = re.search(r'Cen\s*(\d+\.\d+)', all_text)
    if cen_temp_match:
        center_temp = cen_temp_match.group(1)
    
    # Search for maximum temperature (labeled as "Max")
    max_temp_match = re.search(r'Max\s*(\d+\.\d+)', all_text)
    if max_temp_match:
        max_temp = max_temp_match.group(1)
    
    # Search for minimum temperature (often at bottom right)
    min_temp_match = re.search(r'(\d+\.\d+)\s*$', right_text)
    if min_temp_match:
        min_temp = min_temp_match.group(1)
    else:
        # Alternative: look for lowest number in the right side of the image
        temp_matches = re.findall(r'(\d+\.\d+)', right_text)
        if temp_matches:
            min_temp = min(temp_matches, key=float)
    
    # Search for timestamp (format: YYYY/MM/DD HH:MM(AM/PM))
    timestamp_match = re.search(r'(\d{4}/\d{2}/\d{2}\s*\d{2}:\d{2}(?:AM|PM)?)', all_text)
    if timestamp_match:
        timestamp = timestamp_match.group(1)
    
    # If no timestamp found, try to extract from bottom part with common formats
    if not timestamp:
        timestamp_alt_match = re.search(r'(\d{4}[-/]\d{2}[-/]\d{2}\s*\d{1,2}:\d{2}(?::\d{2})?(?:\s*[AP]M)?)', bottom_text)
        if timestamp_alt_match:
            timestamp = timestamp_alt_match.group(1)
    
    # Print extracted data for debugging
    print(f"File: {os.path.basename(image_path)}")
    print(f"Center: {center_temp}, Max: {max_temp}, Min: {min_temp}, Time: {timestamp}")
    
    return {
        'file_name': os.path.basename(image_path),
        'center_temp': center_temp,
        'max_temp': max_temp,
        'min_temp': min_temp,
        'timestamp': timestamp
    }

# Main function to process all images in a folder
def process_folder(folder_path, output_csv):
    # Get all image files in the folder
    image_extensions = ['.jpg', '.jpeg', '.png', '.bmp', '.tiff']
    image_files = [str(f) for f in Path(folder_path).iterdir() 
                  if f.is_file() and f.suffix.lower() in image_extensions]
    
    if not image_files:
        print(f"No image files found in {folder_path}")
        return
    
    # Process each image and collect data
    results = []
    for i, img_path in enumerate(image_files):
        print(f"Processing image {i+1}/{len(image_files)}: {os.path.basename(img_path)}")
        try:
            data = extract_data_from_image(img_path)
            results.append(data)
        except Exception as e:
            print(f"Error processing {img_path}: {str(e)}")
    
    # Create DataFrame and save to CSV
    if results:
        df = pd.DataFrame(results)
        df.to_csv(output_csv, index=False)
        print(f"Data extracted and saved to {output_csv}")
    else:
        print("No data was successfully extracted")

# Example usage
if __name__ == "__main__":
    folder_path = input("Enter the path to the folder containing images: ")
    output_csv = input("Enter the output CSV file path (default: temperature_data.csv): ") or "temperature_data.csv"
    
    process_folder(folder_path, output_csv)

Enter the path to the folder containing images:  C:\Users\Anuska Palit\OneDrive\Desktop\project\Computer Vision\Thermal Imaging.v8i.yolov8\valid\images
Enter the output CSV file path (default: temperature_data.csv):  C:\Users\Anuska Palit\OneDrive\Desktop\project\Computer Vision\data3.csv


Processing image 1/110: DIVU20240822115507_jpeg.rf.72b72f6c88deda0e18079589504fb743.jpg
File: DIVU20240822115507_jpeg.rf.72b72f6c88deda0e18079589504fb743.jpg
Center: 38.1, Max: 49.3, Min: 0.93, Time: 2024/08/22 11:55AM
Processing image 2/110: DIVU20240822120117_jpeg.rf.ba40e13f9abcb70bbb1dcc3f79d51dcd.jpg
File: DIVU20240822120117_jpeg.rf.ba40e13f9abcb70bbb1dcc3f79d51dcd.jpg
Center: 28.9, Max: 34.8, Min: 10.93, Time: 2024/08/22 12:01PM
Processing image 3/110: DIVU20240822144453_jpeg.rf.190bff8d4cba2fd5fe2fef21448eb3ca.jpg
File: DIVU20240822144453_jpeg.rf.190bff8d4cba2fd5fe2fef21448eb3ca.jpg
Center: 39.1, Max: 46.4, Min: 0.93, Time: 2024/08/22 02:44PM
Processing image 4/110: DIVU20240822144702_jpeg.rf.a5332050ff4dcfbbd45e097341e70255.jpg
File: DIVU20240822144702_jpeg.rf.a5332050ff4dcfbbd45e097341e70255.jpg
Center: 38.2, Max: None, Min: None, Time: 2024/08/22 02:47PM
Processing image 5/110: DIVU20240822150556_jpeg.rf.4b8353d8ed58dddfa35cbc9d6f346323.jpg
File: DIVU20240822150556_jpeg.rf.4b

In [8]:
import pandas as pd

files = ["data1.csv", "data2.csv", "data3.csv"]
df_list = [pd.read_csv(file) for file in files]

merged_df = pd.concat(df_list, ignore_index=True)

# Fill NaN values with the mean of each column
merged_df.fillna(merged_df.mean(numeric_only=True), inplace=True)

merged_df.to_csv("data.csv", index=False)


In [None]:
import flask
from flask import request, jsonify
import joblib
import tensorflow as tf

app = flask.Flask(__name__)
model = tf.keras.models.load_model("autoencoder_model.h5")
scaler = joblib.load("scaler.pkl")

@app.route('/predict', methods=['POST'])
def predict():
    data = request.get_json()
    input_data = np.array([data["center_temp"], data["max_temp"], data["min_temp"], data["temp_range"]]).reshape(1, -1)
    input_data = scaler.transform(input_data)
    input_data = np.expand_dims(input_data, axis=2)

    # Predict reconstruction error
    reconstructed = model.predict(input_data)
    reconstruction_loss = np.mean(np.abs(input_data - reconstructed))

    # Compare with threshold
    anomaly = int(reconstruction_loss > threshold)

    return jsonify({"anomaly_detected": anomaly, "reconstruction_loss": float(reconstruction_loss)})

if __name__ == "__main__":
    app.run(port=5000)
