# ***HAPPYWHALE-WHALE & DOLPHIN***

Identify whales 🐳 and dolphins 🐬 by unique characteristic

### **Today! Baseline_PytorchLighting_EfficientNet**

## **1. Explore data**

In [None]:
!ls -l ../input/happy-whale-and-dolphin

In [None]:
#Import necessary library
import os
import json
import pandas as pd
import seaborn as sn
import matplotlib.pyplot as plt
from pprint import pprint

sn.set()

## **Download the PytorchLighting Library**

In [None]:
!pip install -q effdet "icevision[all]" 'lightning-flash[image]'
# !pip install -q "pytorch-lightning==1.4.*"
!pip uninstall -y wandb

In [None]:
!pip download -q effdet "icevision[all]" 'lightning-flash[image]' --dest frozen_packages --prefer-binary
!rm frozen_packages/torch-*
!ls -l frozen_packages

## **Training with Lightning⚡Flash**

Follow the example: https://lightning-flash.readthedocs.io/en/stable/reference/image_classification.html

Reference-FullCredit-[https://www.kaggle.com/jirkaborovec/herbarium-eda-baseline-flash-efficientnet](http://)



In [None]:
import torch

import flash
from flash.core.data.utils import download_data
from flash.image import ImageClassificationData, ImageClassifier

## **Load Data's**

In [None]:
df_train = pd.read_csv('../input/happy-whale-and-dolphin/train.csv')
df_train['path'] = '../input/happy-whale-and-dolphin/train_images/' + train_df['image']

pred_df = pd.read_csv('../input/happy-whale-and-dolphin/sample_submission.csv')
pred_df['path'] = '../input/happy-whale-and-dolphin/test_images/' + pred_df['image']

In [None]:
df_train.sample(3)

In [None]:
sample = pd.read_csv('../input/happy-whale-and-dolphin/sample_submission.csv')
sample.sample(3)

## **Build the model and feature selection**

In [None]:
datamodule = ImageClassificationData.from_data_frame(
    input_field="image",
    target_fields="individual_id",
    # for simplicity take just half of the data
    train_data_frame=df_train[:len(df_train) // 2],
    train_images_root=os.path.join(PATH_DATASET, "train_images"),
    batch_size=128,
    transform_kwargs={"image_size": (224, 224)},
    num_workers=3,
)

In [None]:
model = ImageClassifier(
    backbone="efficientnet_b0",
    num_classes=datamodule.num_classes,
    pretrained=True,
    optimizer="AdamW",
    learning_rate=0.001,
)


## **Finetune the model**

In [None]:
from pytorch_lightning.loggers import CSVLogger
# from pytorch_lightning.callbacks import StochasticWeightAveraging

# Trainer Args
GPUS = int(torch.cuda.is_available())  # Set to 1 if GPU is enabled for notebook

# swa = StochasticWeightAveraging(swa_epoch_start=0.6)
logger = CSVLogger(save_dir='logs/')

trainer = flash.Trainer(
    max_epochs=3,
    # gradient_clip_val=0.01,
    gpus=GPUS,
    precision=16 if GPUS else 32,
    logger=logger,
)

In [None]:
trainer.finetune(model, datamodule=datamodule, strategy="freeze")

trainer.save_checkpoint("image_classification_model.pt")

In [None]:
metrics = pd.read_csv(f'{trainer.logger.log_dir}/metrics.csv')
del metrics["step"]
metrics.set_index("epoch", inplace=True)
display(metrics.dropna(axis=1, how="all").head())
g = sn.relplot(data=metrics, kind="line")
plt.gcf().set_size_inches(15, 5)

In [None]:
test_images = pd.DataFrame(pred_df).set_index("path")
display(test_images.head())
print(f"inference for {len(test_images)} images")

In [None]:
datamodule = ImageClassificationData.from_data_frame(
    input_field="image",
    # target_fields="category_id",
    predict_data_frame=test_images,
    # for simplicity take just fraction of the data
    # predict_data_frame=test_images[:len(test_images) // 100],
    predict_images_root=os.path.join(PATH_DATASET, 'test_images'),
    batch_size=16,
    transform_kwargs={"image_size": (224, 224)},
    num_workers=2,
)

In [None]:
predictions = []
for lbs in trainer.predict(model, datamodule=datamodule, output="labels"):
    # lbs = [torch.argmax(p["preds"].float()).item() for p in preds]
    predictions += lbs

## **Thankyou for visiting guys_ComingSoonNextpart**