In [11]:
import pandas as pd
import numpy as np

import folium

In [12]:
def convert_lat_long(input):
    try:
        return float(input)
    except:
        return np.nan

In [13]:
train_df = pd.read_csv(
    "/home/vova/data/exps/birdclef_2024/birdclef_2024/train_metadata_extended_noduplv1.csv"
)
train_df = train_df.dropna(subset=["latitude", "longitude"]).reset_index(drop=True)
prev_data_df = pd.read_csv(
    "/home/vova/data/exps/birdclef_2024/dfs/full_noduplsV3_scored_meta_prev_comps_extended_2024SecLabels.csv",
    converters={"latitude": convert_lat_long, "longitude": convert_lat_long}
)
prev_data_df = prev_data_df.dropna(subset=["latitude", "longitude"]).reset_index(drop=True)
xc_data_df = pd.read_csv(
    "/home/vova/data/exps/birdclef_2024/xeno_canto/dataset_2024_classes/train_metadata_noduplV3_extended_2024SecLabels.csv",
    converters={"latitude": convert_lat_long, "longitude": convert_lat_long}
)
xc_data_df = xc_data_df.dropna(subset=["latitude", "longitude"]).reset_index(drop=True)

In [4]:
def india_filter(input_df, pad=0.5):
    return input_df[
        (input_df["latitude"] >= 6.75 - pad) & (input_df["latitude"] <= 35.5 + pad) &
        (input_df["longitude"] >= 68.2 - pad) & (input_df["longitude"] <= 97.4 + pad)
    ].reset_index(drop=True)

In [5]:
train_df.shape[0], india_filter(train_df).shape[0]

(23977, 4903)

In [6]:
len(set(train_df["primary_label"])), len(set(india_filter(train_df)["primary_label"]))

(182, 179)

In [7]:
prev_data_df.shape[0], india_filter(prev_data_df).shape[0]

(2694, 26)

In [8]:
len(set(prev_data_df["primary_label"])), len(set(india_filter(prev_data_df)["primary_label"]))

(14, 6)

In [9]:
xc_data_df.shape[0], india_filter(xc_data_df).shape[0]

(21404, 2937)

In [10]:
len(set(xc_data_df["primary_label"])), len(set(india_filter(xc_data_df)["primary_label"]))

(182, 180)

In [None]:
# Create a map centered around an average location
m = folium.Map(location=[train_df['latitude'].mean(), train_df['longitude'].mean()], zoom_start=5)

# Add points
for i, row in train_df.iterrows():
    folium.Marker([row['latitude'], row['longitude']], popup=row['primary_label']).add_to(m)

# Display the map
m

# Prepare Data

In [14]:
train_df_india = india_filter(train_df)
prev_data_df_india = india_filter(prev_data_df)
xc_data_df_india = india_filter(xc_data_df)

In [16]:
prev_data_df_india.to_csv(
    "/home/vova/data/exps/birdclef_2024/dfs/full_noduplsV3_scored_meta_prev_comps_extended_2024SecLabels_IndiaV1.csv",
    index=False
)

In [18]:
xc_data_df_india.to_csv(
    "/home/vova/data/exps/birdclef_2024/xeno_canto/dataset_2024_classes/train_metadata_noduplV3_extended_2024SecLabels_IndiaV1.csv",
    index=False
)