In [9]:
import cv2
import numpy as np
import json

# --- Configuration ---
# Color of the data line to be extracted (in BGR format)
# You can use an online color picker to find the BGR values.
# Example: Blue is (255, 0, 0), Red is (0, 0, 255), Black is (0, 0, 0)
LINE_COLOR_BGR = [255, 0, 0]
# Color tolerance. A higher value means more shades of the color are matched.
COLOR_TOLERANCE = 30
# Path to your chart image
IMAGE_PATH = r"C:\Users\12643\Desktop\download.png"
# Path for the output files
OUTPUT_JSON_PATH = 'extracted_data.json'
OUTPUT_IMAGE_PATH = 'processed_chart.png'

# --- Automated Calibration Configuration ---
# The data values corresponding to the min/max of the X and Y axes.
# The script will try to find the axes pixels automatically.
# You must provide the values they represent.
X_AXIS_MIN_VAL = 0.0
X_AXIS_MAX_VAL = 7.0  # Example: if x-axis goes from 0 to 7
Y_AXIS_MIN_VAL = 0.0
Y_AXIS_MAX_VAL = 450.0 # Example: if y-axis goes from 0 to 450


def get_color_mask(image, target_color_bgr, tolerance):
    """
    Creates a binary mask for a given color within a certain tolerance.
    This helps isolate the data line from the rest of the chart.
    """
    lower_bound = np.array(target_color_bgr, dtype=np.uint8) - tolerance
    upper_bound = np.array(target_color_bgr, dtype=np.uint8) + tolerance
    # Clip values to be within the valid 0-255 range
    lower_bound = np.clip(lower_bound, 0, 255)
    upper_bound = np.clip(upper_bound, 0, 255)

    mask = cv2.inRange(image, lower_bound, upper_bound)
    return mask

def find_axes_automatically(image):
    """
    Automatically detects the X and Y axes, origin, and endpoints.
    Returns a tuple of (origin_px, x_axis_px, y_axis_px) or None if detection fails.
    """
    # Convert to grayscale and apply a binary inverse threshold
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    _, thresh = cv2.threshold(gray, 230, 255, cv2.THRESH_BINARY_INV)

    # Use morphological operations to detect horizontal and vertical lines
    horizontal_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (50, 1))
    detect_horizontal = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, horizontal_kernel, iterations=2)
    hor_contours, _ = cv2.findContours(detect_horizontal, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    if not hor_contours:
        print("Error: Could not automatically detect the X-axis.")
        return None

    vertical_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, 50))
    detect_vertical = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, vertical_kernel, iterations=2)
    ver_contours, _ = cv2.findContours(detect_vertical, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    if not ver_contours:
        print("Error: Could not automatically detect the Y-axis.")
        return None

    # Assume the longest contour in each direction is the axis
    x_axis_contour = max(hor_contours, key=cv2.contourArea)
    y_axis_contour = max(ver_contours, key=cv2.contourArea)

    # Get the extreme points of the axes contours
    x_axis_coords = np.concatenate(x_axis_contour)
    y_axis_coords = np.concatenate(y_axis_contour)

    # The origin is the intersection of the axes.
    # We assume it's near the bottom-left of the plot area.
    origin_px = (np.min(y_axis_coords[:, 0]), np.max(x_axis_coords[:, 1]))

    # The other ends of the axes are the max points
    x_axis_px = (np.max(x_axis_coords[:, 0]), origin_px[1])
    y_axis_px = (origin_px[0], np.min(y_axis_coords[:, 1]))
    
    print("Axes detected automatically:")
    print(f"  - Pixel Origin: {origin_px}")
    print(f"  - Pixel X-Max: {x_axis_px}")
    print(f"  - Pixel Y-Max: {y_axis_px}")

    return origin_px, x_axis_px, y_axis_px


def main():
    """
    Main function to run the chart data extraction process.
    """
    # 1. Load the image
    image = cv2.imread(IMAGE_PATH)
    if image is None:
        print(f"Error: Could not load image from {IMAGE_PATH}")
        # Create a dummy image if the specified one doesn't exist
        print("Creating a sample 'line_chart.png'. Please replace it with your own chart.")
        height, width = 600, 800
        image = np.full((height, width, 3), 255, np.uint8)
        # Draw axes
        cv2.line(image, (50, 550), (750, 550), (0, 0, 0), 2)  # X-axis
        cv2.line(image, (50, 50), (50, 550), (0, 0, 0), 2)  # Y-axis
        # Draw a simple sine wave as the data line
        pts = []
        for i in range(700):
            x = 50 + i
            y = 300 - int(200 * np.sin(i * np.pi / 350))
            pts.append((x, y))
        cv2.polylines(image, [np.array(pts, dtype=np.int32)], isClosed=False, color=LINE_COLOR_BGR, thickness=2)
        cv2.imwrite(IMAGE_PATH, image)
        print(f"Sample chart saved to {IMAGE_PATH}. Please run the script again.")
        return

    # 2. Automatic Calibration
    calibration_result = find_axes_automatically(image)
    if calibration_result is None:
        print("Automated calibration failed. Please check the image or adjust detection parameters.")
        return
    origin_px, x_axis_px, y_axis_px = calibration_result

    # 3. Image Processing
    # Isolate the data line based on its color
    mask = get_color_mask(image, LINE_COLOR_BGR, COLOR_TOLERANCE)

    # Use morphological operations to clean up the mask
    kernel = np.ones((3, 3), np.uint8)
    mask = cv2.erode(mask, kernel, iterations=1)
    mask = cv2.dilate(mask, kernel, iterations=1)

    # 4. Find the data line contour
    contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
    if not contours:
        print("Error: No data line contour found. Try adjusting LINE_COLOR_BGR or COLOR_TOLERANCE.")
        return

    # Assume the longest contour is the data line
    data_contour = max(contours, key=cv2.contourArea)

    # 5. Transformation and Data Extraction
    # Calculate pixel-to-data scaling factors
    x_pixel_range = x_axis_px[0] - origin_px[0]
    y_pixel_range = origin_px[1] - y_axis_px[1]  # Y is inverted in pixel coordinates

    if x_pixel_range == 0 or y_pixel_range == 0:
        print("Error: Detected axis length is zero. Calibration failed.")
        return

    x_data_range = X_AXIS_MAX_VAL - X_AXIS_MIN_VAL
    y_data_range = Y_AXIS_MAX_VAL - Y_AXIS_MIN_VAL

    x_scale = x_data_range / x_pixel_range
    y_scale = y_data_range / y_pixel_range

    extracted_points = []
    for point in data_contour:
        px, py = point[0]

        # Convert pixel coordinates to data coordinates relative to the origin
        data_x = ((px - origin_px[0]) * x_scale) + X_AXIS_MIN_VAL
        data_y = ((origin_px[1] - py) * y_scale) + Y_AXIS_MIN_VAL

        # We only care about points within the detected plot area
        if px >= origin_px[0] and py <= origin_px[1]:
            extracted_points.append({'x': data_x, 'y': data_y})

    # Sort points by x-value to ensure correct order
    extracted_points.sort(key=lambda p: p['x'])

    # Remove duplicate x-values by averaging y-values (optional, but good practice)
    unique_points = {}
    for p in extracted_points:
        # Group by x-value. The precision of rounding can be adjusted.
        x_key = round(p['x'], 4) 
        if x_key not in unique_points:
            unique_points[x_key] = []
        unique_points[x_key].append(p['y'])

    final_data = []
    for x, y_list in unique_points.items():
        final_data.append({'x': x, 'y': np.mean(y_list)})
    final_data.sort(key=lambda p: p['x'])

    print(f"\nSuccessfully extracted {len(final_data)} data points.")

    # 6. Find Significant Points (Max/Min)
    if not final_data:
        print("No data was extracted.")
        return

    min_point = min(final_data, key=lambda p: p['y'])
    max_point = max(final_data, key=lambda p: p['y'])

    print(f"Maximum Value: {max_point}")
    print(f"Minimum Value: {min_point}")

    # 7. Save and Visualize Output
    output_data = {
        'max_point': max_point,
        'min_point': min_point,
        'data_points': final_data
    }
    with open(OUTPUT_JSON_PATH, 'w') as f:
        json.dump(output_data, f, indent=4)
    print(f"Data saved to {OUTPUT_JSON_PATH}")

    # Draw the results on the original image for verification
    output_image = image.copy()

    # Draw the detected contour
    cv2.drawContours(output_image, [data_contour], -1, (0, 255, 255), 2)  # Yellow contour

    # Function to convert data point back to pixel for drawing
    def data_to_pixel(data_point):
        px = int(origin_px[0] + ((data_point['x'] - X_AXIS_MIN_VAL) / x_scale))
        py = int(origin_px[1] - ((data_point['y'] - Y_AXIS_MIN_VAL) / y_scale))
        return (px, py)

    # Highlight Max and Min points
    max_px = data_to_pixel(max_point)
    min_px = data_to_pixel(min_point)
    cv2.circle(output_image, max_px, 8, (0, 0, 255), -1)  # Red circle for Max
    cv2.putText(output_image, f"Max: ({max_point['x']:.2f}, {max_point['y']:.2f})",
                (max_px[0] + 10, max_px[1]), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2)

    cv2.circle(output_image, min_px, 8, (255, 0, 255), -1)  # Magenta circle for Min
    cv2.putText(output_image, f"Min: ({min_point['x']:.2f}, {min_point['y']:.2f})",
                (min_px[0] + 10, min_px[1]), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 255), 2)

    cv2.imwrite(OUTPUT_IMAGE_PATH, output_image)
    print(f"Processed image with highlighted data saved to {OUTPUT_IMAGE_PATH}")

    # Display the final image
    cv2.imshow("Extraction Result", output_image)
    cv2.waitKey(0)
    cv2.destroyAllWindows()


if __name__ == '__main__':
    main()


Axes detected automatically:
  - Pixel Origin: (20, 151)
  - Pixel X-Max: (372, 151)
  - Pixel Y-Max: (20, 3)
Error: No data line contour found. Try adjusting LINE_COLOR_BGR or COLOR_TOLERANCE.
