In [1]:
pip install torchvision

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [1]:
import os
import torch
import torchvision.transforms as transforms
from torchvision import models
from PIL import Image
import pandas as pd
import numpy as np
from tqdm import tqdm

In [2]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    )
])

In [3]:
resnet = models.resnet18(weights=models.ResNet18_Weights.DEFAULT)

# Remove final classification layer
resnet = torch.nn.Sequential(*list(resnet.children())[:-1])

resnet.eval()

Sequential(
  (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (2): ReLU(inplace=True)
  (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (4): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Con

In [4]:
IMAGE_DIR = "data/images"
image_ids = [img.split(".")[0] for img in os.listdir(IMAGE_DIR)]

In [6]:
features = {}
with torch.no_grad():
    for img_id in tqdm(image_ids):
        img_path = os.path.join(IMAGE_DIR, f"{img_id}.png")
        image = Image.open(img_path).convert("RGB")
        image = transform(image).unsqueeze(0)

        embedding = resnet(image)
        embedding = embedding.view(-1).numpy()

        features[int(img_id)] = embedding

100%|██████████████████████████████████████████████████████████████████████████████| 3994/3994 [05:23<00:00, 12.36it/s]


In [40]:
# np.save("data/cleaned/image_embeddings.npy", features)

In [27]:
import pandas as pd
import numpy as np

tabular_df = pd.read_csv("data/cleaned/train_clean.csv")
tabular_df = tabular_df.drop(columns=["date"])

In [28]:
image_features = np.load(
    "data/cleaned/image_embeddings.npy",
    allow_pickle=True
).item()

In [29]:
img_df = pd.DataFrame.from_dict(image_features, orient="index")
img_df.index.name = "id"
img_df.reset_index(inplace=True)

#### Merge Tabular + Image Data

In [47]:
merged_df = tabular_df.merge(img_df, on="id", how="inner")
print("Merged shape:", merged_df.shape)

Merged shape: (4034, 533)


#### Prepare X and y

In [31]:
X = merged_df.drop(columns=["price", "id"])
y = merged_df["price"]
X.columns = X.columns.astype(str)

In [32]:
from sklearn.model_selection import train_test_split

X_train, X_val, y_train, y_val = train_test_split(
    X, y, test_size=0.2, random_state=42
)

#### Multimodal Regression Model

In [33]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score

rf_multi = RandomForestRegressor(
    n_estimators=300,
    max_depth=25,
    random_state=42,
    n_jobs=-1
)

rf_multi.fit(X_train, y_train)

y_pred = rf_multi.predict(X_val)

rmse = np.sqrt(mean_squared_error(y_val, y_pred))
r2 = r2_score(y_val, y_pred)

print("Multimodal RMSE:", rmse)
print("Multimodal R2:", r2)

Multimodal RMSE: 154403.0066194829
Multimodal R2: 0.7553104869966105


In [34]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()

X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)

In [36]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
import numpy as np

lin_model = LinearRegression()
lin_model.fit(X_train, y_train)

y_pred_lr = lin_model.predict(X_val)

rmse_lr = np.sqrt(mean_squared_error(y_val, y_pred_lr))
r2_lr = r2_score(y_val, y_pred_lr)

print(f"Linear Regression RMSE: {rmse_lr:.2f}")
print(f"Linear Regression R²  : {r2_lr:.4f}")


Linear Regression RMSE: 201188.13
Linear Regression R²  : 0.5846


In [37]:
from sklearn.ensemble import RandomForestRegressor

rf_model = RandomForestRegressor(
    n_estimators=300,
    max_depth=15,
    min_samples_split=5,
    random_state=42,
    n_jobs=-1
)

rf_model.fit(X_train, y_train)

y_pred_rf = rf_model.predict(X_val)

rmse_rf = np.sqrt(mean_squared_error(y_val, y_pred_rf))
r2_rf = r2_score(y_val, y_pred_rf)

print(f"Random Forest RMSE: {rmse_rf:.2f}")
print(f"Random Forest R²  : {r2_rf:.4f}")


Random Forest RMSE: 153972.47
Random Forest R²  : 0.7567


In [38]:
from lightgbm import LGBMRegressor

lgb_model = LGBMRegressor(
    n_estimators=500,
    learning_rate=0.05,
    max_depth=6,
    num_leaves=31,
    subsample=0.8,
    colsample_bytree=0.8,
    random_state=42
)

lgb_model.fit(X_train, y_train)

y_pred_lgb = lgb_model.predict(X_val)

rmse_lgb = np.sqrt(mean_squared_error(y_val, y_pred_lgb))
r2_lgb = r2_score(y_val, y_pred_lgb)

print(f"LightGBM RMSE: {rmse_lgb:.2f}")
print(f"LightGBM R²  : {r2_lgb:.4f}")


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.029735 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 133012
[LightGBM] [Info] Number of data points in the train set: 3227, number of used features: 531
[LightGBM] [Info] Start training from score 540719.980167
LightGBM RMSE: 137550.17
LightGBM R²  : 0.8058


In [39]:
import pandas as pd

results = pd.DataFrame({
    "Model": [
        "Linear Regression",
        "Random Forest",
        "LightGBM",
        "XGBoost"
    ],
    "RMSE": [
        rmse_lr,
        rmse_rf,
        rmse_lgb,
        rmse
    ],
    "R2": [
        r2_lr,
        r2_rf,
        r2_lgb,
        r2
    ]
})

results.sort_values("RMSE")


Unnamed: 0,Model,RMSE,R2
2,LightGBM,137550.170439,0.80581
1,Random Forest,153972.474939,0.756673
3,XGBoost,154403.006619,0.75531
0,Linear Regression,201188.126039,0.58456


#### Train MLP Regressor

In [35]:
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_squared_error, r2_score
import numpy as np

mlp = MLPRegressor(
    hidden_layer_sizes=(256, 128),
    activation='relu',
    solver='adam',
    max_iter=300,
    random_state=42,
    early_stopping=True,
    n_iter_no_change=10
)

mlp.fit(X_train_scaled, y_train)



In [15]:
y_pred = mlp.predict(X_val_scaled)

rmse = np.sqrt(mean_squared_error(y_val, y_pred))
r2 = r2_score(y_val, y_pred)

print("Multimodal (MLP) RMSE:", rmse)
print("Multimodal (MLP) R2:", r2)

Multimodal (MLP) RMSE: 188063.55749222566
Multimodal (MLP) R2: 0.6369945502410832


#### Apply PCA to embeddings

In [17]:
image_cols = img_df.columns.drop("id")

In [18]:
from sklearn.decomposition import PCA

pca = PCA(n_components=20, random_state=42)
img_pca = pca.fit_transform(img_df[image_cols])

img_pca_df = pd.DataFrame(
    img_pca,
    columns=[f"img_pca_{i}" for i in range(20)]
)
img_pca_df["id"] = img_df["id"]

In [19]:
merged_pca_df = tabular_df.merge(img_pca_df, on="id", how="inner")

X = merged_pca_df.drop(columns=["price", "id"])
y = merged_pca_df["price"]

X.columns = X.columns.astype(str)

In [21]:
from sklearn.linear_model import Ridge
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score
import numpy as np

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train, X_val, y_train, y_val = train_test_split(
    X_scaled, y, test_size=0.2, random_state=42
)

ridge = Ridge(alpha=10.0)
ridge.fit(X_train, y_train)

y_pred = ridge.predict(X_val)

rmse = np.sqrt(mean_squared_error(y_val, y_pred))
r2 = r2_score(y_val, y_pred)

print("Multimodal (PCA + Ridge) RMSE:", rmse)
print("Multimodal (PCA + Ridge) R2:", r2)

Multimodal (PCA + Ridge) RMSE: 184849.02731739453
Multimodal (PCA + Ridge) R2: 0.6492980424202721


so our best model is coming to be LightGBM with R2 score = 0.805810

In [52]:
test_df = pd.read_excel("data/test2.xlsx")

In [54]:
print(os.path.exists(IMAGE_DIR))
print(os.listdir(IMAGE_DIR))

True
['1000102.jpg', '100100050.jpg', '1001200035.jpg', '100300500.jpg', '1005000036.jpg', '1005000220.jpg', '1005000240.jpg', '100600320.jpg', '100600860.jpg', '1020069017.jpg', '1021049022.jpg', '1021079099.jpg', '1022069050.jpg', '1022069183.jpg', '1023059190.jpg', '1023059246.jpg', '1023059365.jpg', '1023089096.jpg', '1024000109.jpg', '1024039001.jpg', '1024069027.jpg', '1025039292.jpg', '1025049174.jpg', '1025059186.jpg', '1026049036.jpg', '1026069061.jpg', '1026069106.jpg', '1036400100.jpg', '1036400200.jpg', '1036700220.jpg', '1042700050.jpg', '1042700300.jpg', '1043000100.jpg', '104500730.jpg', '104510180.jpg', '104510230.jpg', '104510440.jpg', '104540820.jpg', '104550690.jpg', '104560120.jpg', '1049010300.jpg', '1049010390.jpg', '1049010620.jpg', '1051000040.jpg', '106000015.jpg', '106000395.jpg', '1061500360.jpg', '1068000110.jpg', '1068000255.jpg', '1068000375.jpg', '1068000520.jpg', '1068000559.jpg', '1070000390.jpg', '1072010350.jpg', '1072100085.jpg', '1073100065.jpg', '1

In [56]:
import os
import numpy as np
import torch
from PIL import Image
from tqdm import tqdm

IMAGE_DIR = "data/satellite_images_test"

image_ids = [
    os.path.splitext(img)[0]
    for img in os.listdir(IMAGE_DIR)
    if img.endswith(".jpg")
]

features = {}

with torch.no_grad():
    for img_id in tqdm(image_ids):
        img_path = os.path.join(IMAGE_DIR, f"{img_id}.jpg")
        image = Image.open(img_path).convert("RGB")
        image = transform(image).unsqueeze(0)

        embedding = resnet(image)
        embedding = embedding.view(-1).cpu().numpy()

        features[int(img_id)] = embedding

# SAVE TEST embeddings
np.save("data/cleaned/image_embeddings_test.npy", features)


100%|██████████████████████████████████████████████████████████████████████████████| 5393/5393 [07:45<00:00, 11.58it/s]


In [57]:
image_features = np.load(
    "data/cleaned/image_embeddings_test.npy",
    allow_pickle=True
).item()


In [58]:
img_df = pd.DataFrame.from_dict(image_features, orient="index")
img_df.index.name = "id"
img_df.reset_index(inplace=True)


In [59]:
merged_df = test_df.merge(img_df, on="id", how="left")

print("Merged shape:", merged_df.shape)
print("Missing image rows:", merged_df.isna().any(axis=1).sum())


Merged shape: (5404, 532)
Missing image rows: 3


In [60]:
merged_df_clean = merged_df.dropna().reset_index(drop=True)

print("After dropping rows:", merged_df_clean.shape)


After dropping rows: (5401, 532)


In [65]:
merged_df["time_index"] = pd.to_datetime(merged_df["date"]).factorize()[0]
X_test = merged_df.drop(columns=["id", "date"])
predicted_prices = lgb_model.predict(X_test)

submission = pd.DataFrame({
    "id": merged_df["id"],
    "predicted_price": predicted_prices
})

submission.to_csv("23112054_final.csv", index=False)
