In [34]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import gradio as gr
from PIL import Image
import albumentations as A
from albumentations.pytorch import ToTensorV2
import matplotlib.pyplot as plt
import torchvision
from ultralytics import YOLO
import cv2

In [98]:
# Define the model architecture
class conv_block(nn.Module):
    def __init__(self, in_c, out_c):
        super().__init__()
        self.conv1 = nn.Conv2d(in_c, out_c, kernel_size=3, padding=1)
        self.bn1 = nn.BatchNorm2d(out_c)
        self.conv2 = nn.Conv2d(out_c, out_c, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm2d(out_c)
        self.relu = nn.ReLU()

    def forward(self, inputs):
        x = self.conv1(inputs)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.conv2(x)
        x = self.bn2(x)
        x = self.relu(x)
        return x


class encoder_block(nn.Module):
    def __init__(self, in_c, out_c):
        super().__init__()
        self.conv = conv_block(in_c, out_c)
        self.pool = nn.MaxPool2d((2, 2))

    def forward(self, inputs):
        x = self.conv(inputs)
        p = self.pool(x)
        return x, p


class decoder_block(nn.Module):
    def __init__(self, in_c, out_c):
        super().__init__()
        self.up = nn.ConvTranspose2d(in_c, out_c, kernel_size=2, stride=2, padding=0)
        self.conv = conv_block(out_c + out_c, out_c)

    def forward(self, inputs, skip):
        x = self.up(inputs)
        # Adjust the dimensions of `skip` to match `x` before concatenation
        skip = F.interpolate(
            skip, size=(x.size(2), x.size(3)), mode="bilinear", align_corners=True
        )
        x = torch.cat([x, skip], dim=1)  # Use `dim` instead of `axis`
        x = self.conv(x)
        return x


class build_unet(nn.Module):
    def __init__(self):
        super().__init__()
        """ Encoder """
        self.e1 = encoder_block(3, 64)
        self.e2 = encoder_block(64, 128)
        self.e3 = encoder_block(128, 256)
        self.e4 = encoder_block(256, 512)
        """ Bottleneck """
        self.b = conv_block(512, 1024)
        """ Decoder """
        self.d1 = decoder_block(1024, 512)
        self.d2 = decoder_block(512, 256)
        self.d3 = decoder_block(256, 128)
        self.d4 = decoder_block(128, 64)
        """ Classifier """
        self.outputs = nn.Conv2d(64, 1, kernel_size=1, padding=0)

    def forward(self, inputs):
        """Encoder"""
        s1, p1 = self.e1(inputs)
        s2, p2 = self.e2(p1)
        s3, p3 = self.e3(p2)
        s4, p4 = self.e4(p3)
        """ Bottleneck """
        b = self.b(p4)
        """ Decoder """
        d1 = self.d1(b, s4)
        d2 = self.d2(d1, s3)
        d3 = self.d3(d2, s2)
        d4 = self.d4(d3, s1)
        """ Classifier """
        outputs = self.outputs(d4)
        return outputs

# Define the model
class RegressionModel(nn.Module):

    def __init__(self):

        super(RegressionModel, self).__init__()
        resnet = torchvision.models.resnet34()
        self.features = nn.Sequential(
            *list(resnet.children())[:-1]
        )  # Remove the last fully connected layer
        self.regressor = nn.Linear(
            512, 1
        )  # Replace the last layer with a regression layer

    def forward(self, x):

        x = self.features(x)
        x = x.view(x.size(0), -1)
        x = self.regressor(x)
        return x


# Define the model
model_reg = RegressionModel()

# Load the pre-trained model
model_seg = build_unet()
"""
checkpoint = torch.load("mold_model_comb.pth")
model_seg.load_state_dict(checkpoint)
"""
model = torch.load('regression_model.pth',map_location ='cpu')
model_reg.eval()
model_seg.eval()


mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]

test_transform = A.Compose(
    [
        A.Resize(224, 224),  # Resize images to 224x224
        A.Normalize(mean=mean, std=std),  # Normalize images using general mean and std
        ToTensorV2(),  # Convert image to PyTorch tensor
    ]
)


# Define the prediction function
def predict_image(image):
    # Apply transformations to the input image
    img_numpy = test_transform(image=np.array(image))["image"]
    image_tensor = img_numpy.unsqueeze(0)  # Add batch dimension

    # Perform prediction
    with torch.no_grad():
        output = model_seg(image_tensor)

    # Apply sigmoid activation and thresholding
    preds = torch.sigmoid(output)
    preds = output.detach().squeeze().numpy()

    preds = np.where(preds >= 0.5, 1, 0)
    pred_indices = np.where(preds == 1)

    masked_image = torch.squeeze(image_tensor).permute(1, 2, 0).numpy()

    masked_image = (masked_image * std) + mean

    # Visualize the predictions
    cmap = plt.get_cmap("jet")
    masked_image[pred_indices] = cmap(preds[pred_indices])[:, :3]

    # Normalize the image array to be between -1 and 1
    return masked_image

def regression(image):
    img_numpy = test_transform(image=np.array(image))["image"]
    image_tensor = img_numpy.unsqueeze(0)  # Add batch dimension

    with torch.no_grad():
        output = model_reg(image_tensor)
    
    print(output.shape)
    return output.squeeze().tolist()

def hugg_face(img):
    model = YOLO("peachlemonv3.pt")
    labels = ["freshpeach", "freshlemon", "rottenpeach", "rottenlemon"]

    img = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
    
    results = model(img)

    img_label_results = []
    img_2 = img.copy()
    
    for result in results:
        for i, cls in enumerate(result.boxes.cls):
            crop_img = img[int(result.boxes.xyxy[i][1]):int(result.boxes.xyxy[i][3]),
                       int(result.boxes.xyxy[i][0]):int(result.boxes.xyxy[i][2])]
            cv2.rectangle(img_2, (int(result.boxes.xyxy[i][0]), int(result.boxes.xyxy[i][1])),
                          (int(result.boxes.xyxy[i][2]), int(result.boxes.xyxy[i][3])), (0, 255, 0), 2)
            cv2.putText(img_2, labels[int(cls)] + str(i), (int(result.boxes.xyxy[i][0]), int(result.boxes.xyxy[i][1])),
                        cv2.FONT_HERSHEY_SIMPLEX, 3, (0, 255, 0), 2, cv2.LINE_AA)
            img_label_results.append({"label": labels[int(cls)] + str(i), "crop_img": crop_img})

    
    img_2_pil = Image.fromarray(cv2.cvtColor(img_2, cv2.COLOR_BGR2RGB))

    regression_results = []

    # Iterate over cropped images and their labels
    for item in img_label_results:
        label = item["label"]
        cropped_img = item["crop_img"]
        
        # Apply regression to the cropped image
        regression_output = regression(cropped_img)
        
        # Append the regression output along with its label to the regression_results list
        regression_results.append({"label": label, "Rotten Proportion": regression_output})

    return img_2_pil, regression_results

In [99]:
# Create Gradio interface
inputs = gr.Image(type="pil")  # Define input image shape

# Define output types: one for the text box (for regression results) and one for the image (segmented image)
outputs = [gr.Image(type="pil", label="Detecion Result"), gr.Textbox(label="Regression Results")]

app = gr.Interface(
    fn=hugg_face,
    inputs=inputs,
    outputs=outputs,
    title="Image Segmentation with Regression",
    description="Segmentation of input image using a U-Net model with regression results."
)

# Launch the app
app.launch(share=True)

Running on local URL:  http://127.0.0.1:7885

Could not create share link. Please check your internet connection or our status page: https://status.gradio.app.




Exception in callback _ProactorBasePipeTransport._call_connection_lost(None)
handle: <Handle _ProactorBasePipeTransport._call_connection_lost(None)>
Traceback (most recent call last):
  File "C:\Users\bahak\AppData\Local\Programs\Python\Python39\lib\asyncio\events.py", line 80, in _run
    self._context.run(self._callback, *self._args)
  File "C:\Users\bahak\AppData\Local\Programs\Python\Python39\lib\asyncio\proactor_events.py", line 162, in _call_connection_lost
    self._sock.shutdown(socket.SHUT_RDWR)
ConnectionResetError: [WinError 10054] Varolan bir bağlantı uzaktaki bir ana bilgisayar tarafından zorla kapatıldı



0: 640x480 13 freshlemons, 2 rottenlemons, 242.0ms
Speed: 4.0ms preprocess, 242.0ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 480)
torch.Size([1, 1])
torch.Size([1, 1])
torch.Size([1, 1])
torch.Size([1, 1])
torch.Size([1, 1])
torch.Size([1, 1])
torch.Size([1, 1])
torch.Size([1, 1])
torch.Size([1, 1])
torch.Size([1, 1])
torch.Size([1, 1])
torch.Size([1, 1])
torch.Size([1, 1])
torch.Size([1, 1])
torch.Size([1, 1])
