<a href="https://colab.research.google.com/github/abglnv/faio-warmup/blob/main/-2-shapes/-2-shapes.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import json
import os
from google.colab import userdata

# Get credentials from secrets
kaggle_username = userdata.get('KAGGLE_USERNAME')
kaggle_key = userdata.get('KAGGLE_KEY')

# Make sure you have both
if kaggle_username is None or kaggle_key is None:
    print("Please check that KAGGLE_USERNAME and KAGGLE_KEY are set in your secrets!")
else:
    # Create the .kaggle directory if it doesn't exist
    os.makedirs(os.path.expanduser('~/.kaggle'), exist_ok=True)
    # Write the kaggle.json file
    with open(os.path.expanduser('~/.kaggle/kaggle.json'), 'w') as f:
        json.dump({'username': kaggle_username, 'key': kaggle_key}, f)
    # Set correct permissions
    os.chmod(os.path.expanduser('~/.kaggle/kaggle.json'), 0o600)
    print("kaggle.json created successfully!")

kaggle.json created successfully!


In [2]:
!kaggle competitions download -c shape-classification-challenge

Downloading shape-classification-challenge.zip to /content
  0% 0.00/2.90M [00:00<?, ?B/s]
100% 2.90M/2.90M [00:00<00:00, 140MB/s]


In [3]:
import zipfile

with zipfile.ZipFile('shape-classification-challenge.zip', 'r') as zip_ref:
    zip_ref.extractall('shape-classification-challenge')

In [29]:
import pandas as pd
import numpy as np

train_df = pd.read_csv("shape-classification-challenge/train.csv", header=None)
train_df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,4087,4088,4089,4090,4091,4092,4093,4094,4095,4096
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,66,0,0,0,0
3,2,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,2,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [30]:
labels = train_df.iloc[:, 0].values
features = train_df.iloc[:, 1:].values

In [31]:
labels

array([0, 0, 1, ..., 1, 1, 2])

In [32]:
import torch

X = torch.tensor(features, dtype=torch.float32).view(-1, 64, 64)
y = torch.tensor(labels, dtype=torch.long)

In [33]:
from sklearn.model_selection import train_test_split

X_train, X_val, y_train, y_val = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)

In [34]:
from torch.utils.data import TensorDataset, DataLoader

batch_size = 32
train_ds = TensorDataset(X_train, y_train)
val_ds = TensorDataset(X_val, y_val)
train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_ds, batch_size=batch_size)

In [35]:
X_train[0].shape

torch.Size([64, 64])

In [36]:
import torch.nn as nn
import torch.nn.functional as F

class PointNet(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        self.conv1 = nn.Conv1d(64, 64, 1)
        self.conv2 = nn.Conv1d(64, 128, 1)
        self.conv3 = nn.Conv1d(128, 1024, 1)
        self.fc1 = nn.Linear(1024, 256)
        self.fc2 = nn.Linear(256, 128)
        self.fc3 = nn.Linear(128, num_classes)
        self.bn1 = nn.BatchNorm1d(64)
        self.bn2 = nn.BatchNorm1d(128)
        self.bn3 = nn.BatchNorm1d(1024)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = x.transpose(1, 2)
        x = self.relu(self.bn1(self.conv1(x)))
        x = self.relu(self.bn2(self.conv2(x)))
        x = self.relu(self.bn3(self.conv3(x)))
        x = torch.max(x, 2)[0]
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.fc3(x)
        return x

In [37]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
num_classes = 3
model = PointNet(num_classes).to(device)

optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

for epoch in range(20):
  model.train()
  total_loss, total_correct = 0,0
  for xb, yb in train_loader:
    xb, yb = xb.to(device), yb.to(device)
    optimizer.zero_grad()
    preds = model(xb)
    loss = criterion(preds, yb)
    loss.backward()
    optimizer.step()

    total_loss += loss.item()
    total_correct += (preds.argmax(dim=1) == yb).sum().item()

  train_loss = total_loss / len(train_loader)
  train_acc = total_correct / len(train_ds)

  model.eval()
  with torch.no_grad():
    total_loss, total_correct = 0,0
    for xb, yb in val_loader:
      xb, yb = xb.to(device), yb.to(device)

      preds = model(xb)
      loss = criterion(preds, yb)
      total_loss += loss.item() * xb.size(0)
      total_correct += (preds.argmax(dim=1) == yb).sum().item()

    val_loss = total_loss / len(val_loader)
    val_acc = total_correct / len(val_ds)
  print(f"Epoch {epoch+1}/{20}, Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}, Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}")

Epoch 1/20, Train Loss: 0.7104, Train Acc: 0.6683, Val Loss: 12.0324, Val Acc: 0.8600
Epoch 2/20, Train Loss: 0.2927, Train Acc: 0.8833, Val Loss: 6.9323, Val Acc: 0.9244
Epoch 3/20, Train Loss: 0.1449, Train Acc: 0.9389, Val Loss: 10.5202, Val Acc: 0.8578
Epoch 4/20, Train Loss: 0.1137, Train Acc: 0.9594, Val Loss: 6.0881, Val Acc: 0.9311
Epoch 5/20, Train Loss: 0.0589, Train Acc: 0.9833, Val Loss: 7.8520, Val Acc: 0.9133
Epoch 6/20, Train Loss: 0.0491, Train Acc: 0.9800, Val Loss: 16.8551, Val Acc: 0.8467
Epoch 7/20, Train Loss: 0.0720, Train Acc: 0.9756, Val Loss: 4.7187, Val Acc: 0.9467
Epoch 8/20, Train Loss: 0.0486, Train Acc: 0.9828, Val Loss: 4.5223, Val Acc: 0.9444
Epoch 9/20, Train Loss: 0.1026, Train Acc: 0.9694, Val Loss: 4.7835, Val Acc: 0.9467
Epoch 10/20, Train Loss: 0.0737, Train Acc: 0.9744, Val Loss: 4.5346, Val Acc: 0.9533
Epoch 11/20, Train Loss: 0.0147, Train Acc: 0.9950, Val Loss: 4.8391, Val Acc: 0.9511
Epoch 12/20, Train Loss: 0.0112, Train Acc: 0.9950, Val Loss

In [38]:
test_df = pd.read_csv('shape-classification-challenge/test.csv')
test_df

Unnamed: 0,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel4086,pixel4087,pixel4088,pixel4089,pixel4090,pixel4091,pixel4092,pixel4093,pixel4094,pixel4095
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,150,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
745,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
746,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
747,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,158,0,0,0
748,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [39]:
test_features = test_df.iloc[:].values

num_cols = test_features.shape[1]

In [40]:
num_cols

4096

In [41]:
X_test = torch.tensor(test_features, dtype=torch.float32).view(-1, 64, 64)

from torch.utils.data import DataLoader, TensorDataset

model.eval()
device = 'cuda' if torch.cuda.is_available() else 'cpu'
test_ds = TensorDataset(X_test)
test_loader = DataLoader(test_ds, batch_size=64)

all_preds = []

with torch.no_grad():
    for xb, in test_loader:
        xb = xb.to(device)
        logits = model(xb)
        preds = logits.argmax(dim=1).cpu().numpy()
        all_preds.append(preds)

all_preds = np.concatenate(all_preds)

submission_df = pd.DataFrame({
    "id": np.arange(len(all_preds)),
    "label": all_preds
})

submission_df.to_csv("submission.csv", index=False)