In [80]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [123]:
import timm
import sys
sys.path.append('../src')
from models import SimpleClassificationModel
from dataset import WindDataset
from transforms import get_valid_transforms
import neptune
from os.path import join as join_path
import pandas as pd
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
import torch
import numpy as np
from tqdm.auto import tqdm
from sklearn.metrics import mean_squared_error
import seaborn as sns
%matplotlib inline

In [124]:
data_dir = '../data/'
image_dir = '../data/all_images/'
sample_sub = pd.read_csv(join_path(data_dir, 'submission_format.csv'))
train_labels = pd.read_csv(join_path(data_dir, 'training_set_labels.csv'))
test_images = [x+'.jpg' for x in sample_sub['image_id']]

Get test loader

In [127]:
test_dataset = WindDataset(test_images, image_dir, get_valid_transforms(256), 1)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)

In [130]:
model = SimpleClassificationModel('resnest200e', pretrained=False)
weights = torch.load('../logs/21.14_resnest200e_adamw_heavy_fold_0/21.14_resnest200e_adamw_heavy_fold_0_best_val_loss=67.265.ckpt')['state_dict']
weights = {k.replace('net.', ''): v for k, v in weights.items() if k in weights}
model.load_state_dict(weights)
model.cuda()
model.eval()




SimpleClassificationModel(
  (enc): Sequential(
    (0): Sequential(
      (0): Conv2d(1, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU(inplace=True)
      (3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (5): ReLU(inplace=True)
      (6): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    )
    (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (4): Sequential(
      (0): ResNestBottleneck(
        (conv1): Conv2d(128, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_

In [None]:
preds = []
with torch.no_grad():
    for sample in tqdm(test_loader, total=len(test_loader)):
        preds.append(model(sample['features'].cuda()).cpu().numpy().squeeze())

In [None]:
preds = np.concatenate(preds)
sample_sub['wind_speed'] = np.round(preds).astype(int)
sample_sub.to_csv('../data/21.14_resnest200e_adamw_heavy_fold_0_best_val_loss=67.265_sub.csv', index=False)

Get validation loader and plot results

In [131]:
val_df = pd.read_csv(join_path(data_dir, 'fold0_val.csv'))
val_df['image_order'] = val_df['image_id'].apply(lambda x: int(str(x).split('_')[-1]))
val_df = val_df.sort_values(['image_order', 'storm_id'])
val_images = [ x+'.jpg' for x in val_df['image_id']]
val_dataset = WindDataset(val_images, image_dir, get_valid_transforms(256), 1)
val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False)

In [132]:
preds_val = []
with torch.no_grad():
    for sample in tqdm(val_loader, total=len(val_loader)):
        preds_val.append(model(sample['features'].cuda()).cpu().numpy().squeeze())

HBox(children=(FloatProgress(value=0.0, max=439.0), HTML(value='')))




In [133]:
val_df['wind_speed_predicted'] = np.concatenate(preds_val)
val_df['type'] = 'val'
val_df['wind_speed_diff'] = val_df['wind_speed_predicted'] - val_df['wind_speed']
rmse = mean_squared_error(val_df['wind_speed_predicted'],  val_df['wind_speed'], squared=False)
print(f'rmse: {rmse}')
val_df.head()

rmse: 8.201247670515333


Unnamed: 0,image_id,wind_speed,storm_id,image_order,wind_speed_predicted,type,wind_speed_diff
5221,acd_000,30,acd,0,27.56753,val,-2.43247
5336,ang_000,30,ang,0,37.430611,val,7.430611
4914,aph_000,30,aph,0,30.657787,val,0.657787
4923,atz_000,30,atz,0,34.396828,val,4.396828
5637,aya_000,26,aya,0,28.288118,val,2.288118


In [None]:
sample_sub['storm_id'] = sample_sub['image_id'].apply(lambda x: x.split('_')[0])
sample_sub['image_order'] = sample_sub['image_id'].apply(lambda x: int(str(x).split('_')[-1]))   
sample_sub['type'] = 'test'
sample_sub.head()

# Let's make some diagnostic plots

In [None]:
merged_preds = pd.concat([
    sample_sub[['image_id', 'wind_speed', 'storm_id', 'image_order', 'type']], 
    val_df[['image_id', 'wind_speed', 'storm_id', 'image_order', 'type']]]
    )
merged_preds.head()

In [None]:
storms_val = set(val_df['storm_id'].value_counts().index.tolist())
storms_test = set(sample_sub['storm_id'].value_counts().index.tolist())
common_storms = list(storms_test.intersection(storms_val))
print(f'common storms val-test: {len(common_storms)}')

In [None]:
f, ax = plt.subplots(5, 5, figsize=(25, 25))
ax = ax.flatten()
for idx_storm in range(min(len(ax), len(common_storms))):
    selected_storm = common_storms[idx_storm]
    sns.lineplot(
        y='wind_speed',
        x='image_order',
        style='type', 
        hue='storm_id',
        ax=ax[idx_storm],
        data=merged_preds.loc[merged_preds['storm_id']==selected_storm, :])