In [None]:
import pandas as pd
import rasterio as rio
import geopandas as gpd
from pathlib import Path
from rasvec import tms_to_geotiff
from rasvec import patchify_raster

# Reading vector data

In [6]:
vec_path = Path("../data/raw/vector/prediction_3857.shp")
gdf = gpd.read_file(vec_path).to_crs("EPSG:4326")
bounds = gdf.total_bounds
bounds

array([79.84896457,  8.1853809 , 79.90514314,  8.20419389])

# Downloading vector data

In [7]:
ras_path = "../data/raw/raster/prediction_3857.tif"

In [None]:
tms_to_geotiff(
    output=ras_path, bbox=bounds.tolist(), source="satellite", zoom=19, overwrite=True
)

Downloaded image 0001/2407
Downloaded image 0002/2407
Downloaded image 0003/2407
Downloaded image 0004/2407
Downloaded image 0005/2407
Downloaded image 0006/2407
Downloaded image 0007/2407
Downloaded image 0008/2407
Downloaded image 0009/2407
Downloaded image 0010/2407
Downloaded image 0011/2407
Downloaded image 0012/2407
Downloaded image 0013/2407
Downloaded image 0014/2407
Downloaded image 0015/2407
Downloaded image 0016/2407
Downloaded image 0017/2407
Downloaded image 0018/2407
Downloaded image 0019/2407
Downloaded image 0020/2407
Downloaded image 0021/2407
Downloaded image 0022/2407
Downloaded image 0023/2407
Downloaded image 0024/2407
Downloaded image 0025/2407
Downloaded image 0026/2407
Downloaded image 0027/2407
Downloaded image 0028/2407
Downloaded image 0029/2407
Downloaded image 0030/2407
Downloaded image 0031/2407
Downloaded image 0032/2407
Downloaded image 0033/2407
Downloaded image 0034/2407
Downloaded image 0035/2407
Downloaded image 0036/2407
Downloaded image 0037/2407
D

# Patching the raster

In [8]:
output_patched_ras = Path("../data/raw/patched_ras")

In [None]:
patchify_raster(ras_path, output_path=output_patched_ras, patch_size=640, padding=True)

Patches shape: (12, 33, 640, 640, 3)
Saved the patched files in output dir: ../data/raw/patched_ras


# Patching Vector

In [10]:
from rasvec import clip_vector_by_raster

In [9]:
output_patched_vec = Path("../data/raw/patched_vec")

In [None]:
for i in Path(output_patched_ras).glob("*.tif"):
    filepath = i.parents[1] / "patched_vec" / (i.stem + ".shp")
    shp_clipped = clip_vector_by_raster(i, vec_path, filepath)

# Vector to txt

In [None]:
def vec_to_txt_add_bbox(ras_path, vec_path, output_path):
    """Convert the shapefile to text with adding attributes of the bbox.
    All the attributes of the bbox are calculated with respect to raster's top left corner.
    The bbox is the bounding box of the vector file.
    the attributes are:
    - distx: distance in the x direction of the bbox from the
    - disty: distance in the y direction of the bbox from the
    - width: width of the bbox.
    - height: height of the bbox.

    Args:
        ras_path (str): path to the raster file.
        vec_path (str): path to the vector file.
        output_path (str): path of the directory of the output txt file.
    """

    ras_path = Path(ras_path)
    vec_path = Path(vec_path)
    output_path = Path(output_path)
    with rio.open(ras_path) as src:
        ras_bounds = src.bounds
        top_left_corner = (ras_bounds.left, ras_bounds.top)
        botton_right_corner = (ras_bounds.right, ras_bounds.bottom)

    vector = gpd.read_file(vec_path).to_crs("EPSG:3857")
    distx = []
    disty = []
    width = []
    height = []
    for feature in vector["geometry"]:
        try:
            if feature is not None:
                distx.append(
                    abs(feature.x - top_left_corner[0])
                    / abs(top_left_corner[0] - botton_right_corner[0])
                )
                disty.append(
                    abs(feature.y - top_left_corner[1])
                    / abs(top_left_corner[1] - botton_right_corner[1])
                )
                width.append(5 / abs(botton_right_corner[0] - top_left_corner[0]))
                height.append(5 / abs(botton_right_corner[1] - top_left_corner[1]))

        except Exception as e:
            print(e)
            continue

    df = pd.DataFrame(
        {"class": 0, "distx": distx, "disty": disty, "width": width, "height": height}
    )
    df.to_csv(output_path / (ras_path.stem + ".txt"), index=False, sep=" ", header=None)

In [62]:
output_patched_txt = Path("../data/raw/patched_txt")

In [None]:
for i in Path(output_patched_ras).glob("*.tif"):
    vec_path = i.parents[1] / "patched_vec" / (i.stem + ".shp")
    vec_to_txt_add_bbox(
        i,
        vec_path,
    )

#  train_test_val_split

In [3]:
import glob
import os
import random
import shutil

random.seed(42)


def shuffle_data(data_list):
    random.shuffle(data_list)
    return data_list


def get_label_path(img_path):
    filename = os.path.basename(img_path)
    label_path = os.path.split(os.path.split(img_path)[0])[0] + rf"\label\{filename}"
    return label_path


def train_test_split(
    images: Path | list,
    labels: Path | list,
    train_size: float,
    split_path: Path | None = None,
):
    """This split the dataset whcih consist of data and label into training and testing set.
        Note: This wont work if there are mutiple directories inside data_path or label_path
    Args:
        images (str): Path consisting the data.
        labels (str): Path consisting the labels.
        train_size (float): Ratio of the train_size. Range from 0 - 1.
        split_path (str, optional): If this is provided the data and label and splited and transfered to this directory.
                                    Defaults to None.

    Returns:
        list (str): training_data, training_label, testing_data, testing_label.
    """

    if isinstance(images, Path):
        img_list = [i for i in images.glob("*.tif")]
    elif isinstance(images, list):
        img_list = images

    label_list = [labels / (i.stem + ".txt") for i in img_list]

    img_list = shuffle_data(img_list)

    train_size = int(len(img_list) * train_size)
    trainx = img_list[:train_size]
    trainy = label_list[:train_size]
    testx = img_list[train_size:]
    testy = label_list[train_size:]

    print(f"Train Size: {train_size}, Test Size: {len(img_list) - train_size}")
    return trainx, trainy, testx, testy

NameError: name 'Path' is not defined

In [None]:
trainx, trainy, testx, testy = train_test_split(
    output_patched_ras, output_patched_txt, 0.8
)
valx, valy, testx, testy = train_test_split(testx, output_patched_txt, 0.5)
# train_test_split(output_patched_ras, output_patched_vec, 0.8, split_path = None)

Train Size: 316, Test Size: 80
Train Size: 40, Test Size: 40


In [None]:
trainx_path = Path("../data/train/images")
trainy_path = Path("../data/train/labels")
testx_path = Path("../data/test/images")
testy_path = Path("../data/test/labels")
valx_path = Path("../data/val/images")
valy_path = Path("../data/val/labels")

trainx_path.mkdir(parents=True, exist_ok=True)
trainy_path.mkdir(parents=True, exist_ok=True)
testx_path.mkdir(parents=True, exist_ok=True)
testy_path.mkdir(parents=True, exist_ok=True)

for file in trainx:
    if (trainx_path / file.name).exists():
        continue
    shutil.copy(file, trainx_path)

for file in trainy:
    if (trainy_path / file.name).exists():
        continue
    shutil.copy(file, trainy_path)

for file in testx:
    if (testx_path / file.name).exists():
        continue
    shutil.copy(file, testx_path)
for file in testy:
    if (testy_path / file.name).exists():
        continue
    shutil.copy(file, testy_path)
for file in valx:
    if (valx_path / file.name).exists():
        continue
    shutil.copy(file, valx_path)
for file in valy:
    if (valy_path / file.name).exists():
        continue
    shutil.copy(file, valy_path)

# model training

In [1]:
from ultralytics import YOLO

# Load a model
model = YOLO("yolo11s.pt")


if __name__ == "__main__":
    # Train the model
    train_results = model.train(
        data="../configs/data.yaml",  # path to dataset YAML
        epochs=1000,  # number of training epochs
        imgsz=640,  # training image size
        device="cpu",  # device to run on, i.e. device=0 or device=0,1,2,3 or device=cpu
        # save_period = 100,  # directory to save results
        # save_best = True,  # save the best model
        # workers = 2
        save=True,
        save_period=100,
        project="../models/v1",
    )

    path = model.export(format="torchscript")  # return path to exported model
    #
    # path of the exported model is returned

New https://pypi.org/project/ultralytics/8.3.114 available 😃 Update with 'pip install -U ultralytics'
Ultralytics 8.3.110 🚀 Python-3.11.12 torch-2.6.0 CPU (Apple M1)
[34m[1mengine/trainer: [0mtask=detect, mode=train, model=yolo11s.pt, data=../configs/data.yaml, epochs=1000, time=None, patience=100, batch=16, imgsz=640, save=True, save_period=100, cache=False, device=cpu, workers=8, project=../models/v1, name=train3, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, embed=None, show=False, save_frames=False, save_txt=Fa

[34m[1mtrain: [0mScanning /Users/nischal/projects/CocoDet/data/test_dataset/train/labels... 1 images, 0 backgrounds, 0 corrupt: 100%|██████████| 1/1 [00:00<00:00,  9.11it/s]

[34m[1mtrain: [0mNew cache created: /Users/nischal/projects/CocoDet/data/test_dataset/train/labels.cache
[34m[1mval: [0mFast image access ✅ (ping: 0.0±0.0 ms, read: 1567.5±0.0 MB/s, size: 1201.3 KB)



[34m[1mval: [0mScanning /Users/nischal/projects/CocoDet/data/test_dataset/val/labels... 1 images, 0 backgrounds, 0 corrupt: 100%|██████████| 1/1 [00:00<00:00, 1022.50it/s]

[34m[1mval: [0mNew cache created: /Users/nischal/projects/CocoDet/data/test_dataset/val/labels.cache





Plotting labels to ../models/v1/train3/labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.002, momentum=0.9) with parameter groups 81 weight(decay=0.0), 88 weight(decay=0.0005), 87 bias(decay=0.0)
Image sizes 640 train, 640 val
Using 0 dataloader workers
Logging results to [1m../models/v1/train3[0m
Starting training for 1000 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     1/1000         0G      6.461      5.736      2.612        419        640: 100%|██████████| 1/1 [00:01<00:00,  1.05s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00,  1.64it/s]

                   all          1        300          0          0          0          0

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size



     2/1000         0G      6.333      3.345      3.186        366        640: 100%|██████████| 1/1 [00:00<00:00,  1.26it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00,  3.13it/s]

                   all          1        300          0          0          0          0

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size



     3/1000         0G      6.506      4.444      2.679        810        640: 100%|██████████| 1/1 [00:00<00:00,  1.10it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00,  2.94it/s]

                   all          1        300          0          0          0          0

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size



     4/1000         0G      6.291      3.291      2.714        642        640: 100%|██████████| 1/1 [00:00<00:00,  1.19it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00,  3.15it/s]

                   all          1        300          0          0          0          0

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size



     5/1000         0G      6.305      3.778       2.56        488        640: 100%|██████████| 1/1 [00:00<00:00,  1.20it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00,  3.13it/s]

                   all          1        300          0          0          0          0

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size



     6/1000         0G       6.09      3.062      2.276        668        640: 100%|██████████| 1/1 [00:00<00:00,  1.12it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00,  3.31it/s]

                   all          1        300          0          0          0          0





KeyboardInterrupt: 

# model testing

In [None]:
from shapely.geometry import Point


def txt_to_shp(txt_dir, img_dir, shp_dir):
    for i in glob.glob(os.path.join(txt_dir, "*.txt")):
        file_name = os.path.splitext(os.path.basename(i))[0]

        with rio.open(os.path.join(img_dir, f"{file_name}.tif")) as src:
            # profile = src.profile
            bounds = src.bounds
            crs = src.crs
            # raster_extent = box(*bounds)
            top_left_corner = (bounds.left, bounds.top)
            bottom_right_corner = (bounds.right, bounds.bottom)

        df = pd.read_csv(i, delimiter=" ", names=["class", "x", "y", "width", "height"])

        df["x"] = (
            df["x"] * (bottom_right_corner[0] - top_left_corner[0]) + top_left_corner[0]
        )
        df["y"] = top_left_corner[1] - df["y"] * (
            top_left_corner[1] - bottom_right_corner[1]
        )

        df["centroid"] = [Point(x, y) for x, y in zip(df.x, df.y)]

        gdf = gpd.GeoDataFrame(geometry=df["centroid"], crs=crs)

        # gdf['geometry'] = gdf.geometry.apply(lambda geom: point_to_bbox(geom, buffer_x, buffer_y))
        gdf.to_file(os.path.join(shp_dir, f"{file_name}.shp"), driver="ESRI Shapefile")

In [None]:
def concat(shp_dir, output_dir):
    df = None
    for i in glob.glob(os.path.join(shp_dir, "*.shp")):
        df_curr = gpd.read_file(i)
        if df is None:
            df = df_curr
        else:
            df = pd.concat([df, df_curr], ignore_index=True)

    gdf = gpd.GeoDataFrame(df)
    gdf.to_file(os.path.join(output_dir, "prediction.shp"), driver="ESRI Shapefile")

In [8]:
import folium
import geopandas as gpd

In [17]:
# tile = folium.TileLayer(
#             tiles = 'http://mt0.google.com/vt/lyrs=s&hl=en&x={x}&y={y}&z={z}',
#             attr = 'Google Satellite',
#             name = 'Satellite',
#             overlay = False,
#             control = True)
m = folium.Map(location=[23.437, 94.012], zoom_start=4)
folium.LayerControl().add_to(m)
gdf = gpd.read_file("../tmp1c2h0gp3/image.shp").to_crs(epsg=4326)
folium.GeoJson(gdf).add_to(m)
m

In [46]:
import folium
import geopandas as gpd

# Create base map with no default tiles
m = folium.Map(location=[23.437, 94.012], zoom_start=6, tiles=None)

# Add custom Google Satellite TileLayer
tile = folium.TileLayer(
    tiles="http://mt0.google.com/vt/lyrs=s&hl=en&x={x}&y={y}&z={z}",
    attr="Google Satellite",
    name="Satellite",
    overlay=False,
    control=True,
)
tile.add_to(m)

# Read and add GeoJSON layer
gdf = gpd.read_file("../tmp1c2h0gp3/image.shp").to_crs(epsg=4326)
folium.GeoJson(
    gdf,
    name="GeoData",
    marker=folium.Circle(
        radius=4,
        fill_color="orange",
        fill_opacity=0.1,
        color="red",
        weight=1,
    ),
    highlight_function=lambda x: {"fillOpacity": 0.8},
    zoom_on_click=True,
).add_to(m)

# Add layer control
folium.LayerControl().add_to(m)

# Show the map (in Jupyter Notebook, just output `m`)
bounds = gdf.total_bounds  # [minx, miny, maxx, maxy]
m.fit_bounds([[bounds[1], bounds[0]], [bounds[3], bounds[2]]])
m

In [22]:
import geopandas as gpd

gdf = gpd.read_file("../data/preds.shp")

In [None]:
# def buffer_download(centroid):
centroids = gdf
# centroid.to_crs(epsg=4326)
centroids

Unnamed: 0,minx,miny,maxx,maxy
0,80.020237,7.552125,80.020237,7.552125
1,80.020632,7.552015,80.020632,7.552015
2,80.020048,7.551602,80.020048,7.551602
3,80.020156,7.552038,80.020156,7.552038
4,80.020030,7.552518,80.020030,7.552518
...,...,...,...,...
503,80.021773,7.556601,80.021773,7.556601
504,80.022078,7.556532,80.022078,7.556532
505,80.022563,7.556669,80.022563,7.556669
506,80.021081,7.556585,80.021081,7.556585


In [29]:
centroid_buffer = gdf.to_crs(epsg=3857).buffer(15, cap_style="square")

In [41]:
buffer = centroid_buffer.to_crs(4326)

In [36]:
import sys

sys.path.append(os.path.abspath("../"))
from src.download import TMStoGeoTIFF

In [46]:
buffer.bounds.to_numpy().tolist()[0]

[80.0201019293402, 7.551991471294963, 80.02037142392544, 7.552258628204508]

In [48]:
ras = TMStoGeoTIFF("img1.tiff", bbox=buffer.bounds.to_numpy().tolist()[0])

In [49]:
ras.download()

In [21]:
centroids.to_crs(epsg=3857)
buffer = [-15, 15, 15, -15]

AttributeError: 'DataFrame' object has no attribute 'to_crs'

In [39]:
import sys

# sys.path.append("..")
# from src.database.connection import engine
from sqlmodel import create_engine
import geopandas as gpd
from pathlib import Path
# def get_bbox_count():

engine = create_engine("postgresql://postgres:postgres@localhost/coconut_db")
query = "SELECT * FROM pred"
pred = gpd.read_postgis(query, con=engine, crs=3857, geom_col="geometry").to_crs(4326)
cnt = len(pred)

country_shp = Path("..", "data", "countries", "world-administrative-boundaries.shp")
country = gpd.read_file(country_shp)

inter = pred.sjoin(country, how="left", predicate="within")


# inter.plot()
# plt.show()

# print(inter)
# return pred, cnt.
inter.groupby("name").count()["id"]

name
Sri Lanka    9619
Name: id, dtype: int64

In [59]:
pred.centroid


  pred.centroid


0       POINT (80.01294 7.55432)
1       POINT (80.01293 7.55438)
2       POINT (80.01273 7.55388)
3       POINT (80.01292 7.55502)
4        POINT (80.0125 7.55401)
                  ...           
9614    POINT (80.01177 7.55373)
9615     POINT (80.01155 7.5535)
9616    POINT (80.01229 7.55317)
9617    POINT (80.01248 7.55362)
9618    POINT (80.01189 7.55368)
Length: 9619, dtype: geometry

In [42]:
len(pred["id_bbox"].unique())

53

In [67]:
# data = (
#     np.random.normal(size=(1, 3)) * np.array([[1, 1, 1]]) + np.array([[48, 5, 1]])
# ).tolist()
# data

In [69]:
pred["latitude"], pred["longitude"] = pred.geometry.x, pred.geometry.y
heat_data = pred[["longitude", "latitude"]].values.tolist()
# pred
# heat_data

In [None]:
import folium
from folium.plugins import HeatMap

m = folium.Map(zoom_start=3)

HeatMap(heat_data, radius=10, blur=10).add_to(m)
m