# Data Loading and Extracting

In [None]:
from PIL import Image
import numpy as np

def preprocess_image(image_path, target_size=(224, 224)):
    # Open the image file
    img = Image.open(image_path)

    # Resize the image
    img = img.resize(target_size)

    # Convert to numpy array
    img_array = np.array(img)

    # Normalize the image to range [0, 1]
    img_array = img_array / 255.0

    return img_array

# Example usage
image_path = "downloaded_images/image_0.jpg"
preprocessed_image = preprocess_image(image_path)


In [None]:
import re

def clean_entity_value(value):
    # Remove any extra spaces
    value = value.strip()

    # Standardize units (example: replace 'gms' with 'gram')
    value = re.sub(r'\bgms\b', 'gram', value)
    value = re.sub(r'\bmilligrams\b', 'milligram', value)

    # Ensure the number is in a consistent format
    match = re.match(r'([\d.]+)\s*(\w+)', value)
    if match:
        number, unit = match.groups()
        # Convert number to a float and format it
        number = float(number)
        formatted_value = f"{number} {unit}"
        return formatted_value
    return value

# Apply the cleaning function to the 'entity_value' column
train_df['cleaned_entity_value'] = train_df['entity_value'].apply(clean_entity_value)

# Example usage
print(train_df[['entity_value', 'cleaned_entity_value']].head())


In [None]:
# Check for missing values in the dataset
missing_data = train_df.isnull().sum()

# Optionally drop rows with missing values or handle them
train_df = train_df.dropna()  # Dropping missing rows for simplicity

# Validate no more missing data
print(train_df.isnull().sum())


### feature extraction

In [None]:
import torch
import torchvision.transforms as transforms
from PIL import Image
from torchvision import models

# Load a pre-trained ResNet model
model = models.resnet50(pretrained=True)

# Remove the last fully connected layer to get feature vectors
model = torch.nn.Sequential(*(list(model.children())[:-1]))

# Set model to evaluation mode
model.eval()

# Image transformations - resizing, normalization (assuming ImageNet normalization)
preprocess = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# Example function to extract features
def extract_image_features(image_path):
    img = Image.open(image_path)
    img_tensor = preprocess(img).unsqueeze(0)  # Add batch dimension
    with torch.no_grad():
        features = model(img_tensor)
    return features.squeeze().numpy()

# Example usage
image_path = "downloaded_images/image_0.jpg"
features = extract_image_features(image_path)
print(features)


In [None]:
pip install pytesseract
sudo apt-get install tesseract-ocr


In [None]:
import pytesseract
from PIL import Image

# Example function to extract text from image using Tesseract OCR
def extract_text_from_image(image_path):
    img = Image.open(image_path)
    text = pytesseract.image_to_string(img)
    return text

# Example usage
text = extract_text_from_image(image_path)
print(text)


In [None]:
pip install easyocr


In [None]:
import easyocr

# Initialize EasyOCR reader
reader = easyocr.Reader(['en'])

# Function to extract text using EasyOCR
def extract_text_easyocr(image_path):
    results = reader.readtext(image_path)
    return " ".join([res[1] for res in results])

# Example usage
ocr_text = extract_text_easyocr(image_path)
print(ocr_text)


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import models

# Define the multi-task learning model
class EntityPredictionModel(nn.Module):
    def __init__(self, num_units):
        super(EntityPredictionModel, self).__init__()

        # Pre-trained ResNet model for feature extraction
        self.resnet = models.resnet50(pretrained=True)
        self.resnet.fc = nn.Identity()  # Remove the last fully connected layer

        # Task 1: Predicting the numeric value (regression)
        self.fc_value = nn.Sequential(
            nn.Linear(2048, 512),
            nn.ReLU(),
            nn.Linear(512, 1)  # Output a single numeric value
        )

        # Task 2: Predicting the unit (classification)
        self.fc_unit = nn.Sequential(
            nn.Linear(2048, 512),
            nn.ReLU(),
            nn.Linear(512, num_units),  # Output logits for each unit
            nn.Softmax(dim=1)
        )

    def forward(self, x):
        # Pass through the CNN (ResNet) to get features
        features = self.resnet(x)

        # Predict the numeric value
        value = self.fc_value(features)

        # Predict the unit
        unit = self.fc_unit(features)

        return value, unit

# Example usage
num_units = 10  # Replace with actual number of allowed units from constants.py
model = EntityPredictionModel(num_units)


In [None]:
def train_model(model, dataloader, num_epochs=25):
    criterion_value = nn.MSELoss()  # Loss for predicting the numeric value (regression)
    criterion_unit = nn.CrossEntropyLoss()  # Loss for predicting the unit (classification)
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0

        for inputs, labels_value, labels_unit in dataloader:
            # Zero the parameter gradients
            optimizer.zero_grad()

            # Forward pass
            outputs_value, outputs_unit = model(inputs)

            # Compute the loss
            loss_value = criterion_value(outputs_value, labels_value)
            loss_unit = criterion_unit(outputs_unit, labels_unit)
            loss = loss_value + loss_unit

            # Backward pass and optimize
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

        print(f'Epoch {epoch+1}/{num_epochs}, Loss: {running_loss/len(dataloader)}')

    print('Training complete')

# Example: Assuming a DataLoader for the dataset exists
# train_model(model, train_dataloader)


In [None]:
def format_prediction(value, unit):
    """
    Format the prediction as '<value> <unit>'.
    """
    formatted_value = f"{value:.2f}"  # Format to two decimal places
    return f"{formatted_value} {unit}"

# Example usage
predicted_value = 2.567  # Example predicted value
predicted_unit = 'gram'  # Example predicted unit
formatted_prediction = format_prediction(predicted_value, predicted_unit)
print(formatted_prediction)  # Output: "2.57 gram"


In [None]:
import pandas as pd

def make_predictions(model, test_dataloader):
    predictions = []
    model.eval()

    with torch.no_grad():
        for inputs, index in test_dataloader:
            # Get predictions
            value_pred, unit_pred = model(inputs)

            # Convert logits to predicted unit (get the index of the highest probability)
            _, unit_idx = torch.max(unit_pred, 1)

            # Format the prediction
            prediction = format_prediction(value_pred.item(), unit_idx.item())
            predictions.append((index.item(), prediction))

    return predictions

# Format the results into a CSV file for submission
def save_predictions_to_csv(predictions, output_file="submission.csv"):
    df = pd.DataFrame(predictions, columns=["index", "prediction"])
    df.to_csv(output_file, index=False)

# Example: Assuming test_dataloader is available
# predictions = make_predictions(model, test_dataloader)
# save_predictions_to_csv(predictions)
