In [None]:
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from pathlib import Path
import matplotlib.pyplot as plt

In [None]:
if Path("/kaggle").exists():
    data_path = Path("/kaggle", "input", "planets-dataset", "planet", "planet")
else:
    data_path = Path("planet", "planet")

image_path = data_path / "train-jpg"
    

train_classes = pd.read_csv(data_path / "train_classes.csv")

In [None]:
train_classes["tag"] = (
    train_classes["tags"]
    .apply(
        lambda x: [tag for tag in x.split(" ") if tag != "selective_logging"]
    )
    .apply(
        lambda x: sorted(x)[-1]
    )
)

In [None]:
train_labels = {
    row["image_name"]: row["tag"] for i, row in train_classes.iterrows()
}

In [None]:
train_classes.tag.value_counts()

In [None]:
train_classes.query("tag == 'cloudy'").head()

In [None]:
def show_image(image_name):
    img = plt.imread(data_path / "train-jpg" / f"{image_name}.jpg")
    # Add label as title
    plt.title(train_labels[image_name])
    plt.imshow(img)
    plt.axis("off")
    plt.show()

In [None]:
show_image("train_0")

In [None]:
show_image("train_4")

In [None]:
show_image("train_1")

In [None]:
show_image("train_12")

In [None]:
from fastai.vision.all import *

In [None]:
def label(file_name):
    return train_labels[file_name.replace(".jpg", "")]

In [None]:
label("train_30370.jpg")

In [None]:
dls = ImageDataLoaders.from_name_func(
    image_path, get_image_files(image_path), valid_pct=0.2,
    seed=42, label_func=label, item_tfms=Resize(224), loss_func=CrossEntropyLossFlat())
    
# if a string is passed into the model argument, it will now use timm (if it is installed)
learn = vision_learner(dls, 'vit_tiny_patch16_224')

In [None]:
learn.remove_cb(ProgressCallback)

In [None]:
# learn.fit(1, cbs=ShortEpochCallback(short_valid=False, pct=0.000001))

In [None]:
# Export learned model in predatathon folder
(image_path / "outputs").mkdir(exist_ok=True)
learn.export("outputs/planets.pkl")


# Load exported model
learn_3 = load_learner(image_path / "outputs/planets.pkl")

In [None]:
image_name = "train_0"
img = plt.imread(data_path / "train-jpg" / f"{image_name}.jpg")[:,:,:-1]
# img = PILImage.create(img)
learn.predict(img)


In [None]:
learn_3.predict(img)

In [None]:
from huggingface_hub import notebook_login

notebook_login()

In [None]:
from huggingface_hub import push_to_hub_fastai

repo_id = "davidmasip/deforestation_predatathon"

push_to_hub_fastai(learner=learn, repo_id=repo_id)

In [None]:
from huggingface_hub import from_pretrained_fastai


learner_2 = from_pretrained_fastai(repo_id)

In [None]:
_, _, probs = learn.predict(img[:,:,:-1])
print(f"Probability it's a cat: {100 * probs[1].item():.2f}%")


In [None]:
img