## Data

In [1]:
import os, sys, re
import pandas as pd
sys.path.append("/pfs/work7/workspace/scratch/tu_zxobe27-master_thesis/code")
os.chdir("/pfs/work7/workspace/scratch/tu_zxobe27-master_thesis")

In [2]:
from data.download import download_agent
from data.preprocess.misc import preprocess_boundaries
from data.preprocess.river_network import preprocess_rivers, river_network

fetch_instructions = pd.read_json("setup/fetch.json")
def get_fetch_instructions(id):
    return fetch_instructions[fetch_instructions["id"] == id].to_dict(orient = "records")[0]

agent = download_agent("/pfs/work7/workspace/scratch/tu_zxobe27-master_thesis/", "BRA", 2010)

#### Land Cover

In [None]:
agent.fetch(get_fetch_instructions("igbe_lulc"))

In [None]:
agent.fetch(get_fetch_instructions("lc_esacci_300"))

In [3]:
agent.fetch(get_fetch_instructions("lc_glc_30"))

### Misc
#### Boundaries

In [None]:
agent.fetch(get_fetch_instructions("msc_gadm"))

In [5]:
preprocess_boundaries(os.getcwd() + "/", "BRA")

#### Rivers

In [3]:
agent.fetch(get_fetch_instructions("msc_rivers"))

In [None]:
preprocess_rivers(os.getcwd() + "/", "BRA")

In [None]:
rivers = gpd.read_feather("/pfs/work7/workspace/scratch/tu_zxobe27-master_thesis/data/misc/msc_rivers.feather").to_crs(5641)
boundaries = gpd.read_file("/pfs/work7/workspace/scratch/tu_zxobe27-master_thesis/data/misc/raw/gadm/gadm41_BRA_2.json", engine = "pyogrio").to_crs(5641)

In [None]:
rivers_brazil = river_network(rivers[(rivers.NORIOCOMP != "Linha de Costa")],
                              boundaries,
                              rivers[(rivers.NORIOCOMP == "Linha de Costa")])
# explode multi-geometries
rivers_brazil.explode_rivers()
rivers_brazil.update_vertices()
# compute nodes
rivers_brazil.update_network_nodes()
rivers_brazil.update_border_nodes() # (multiprocessing-enabled: est. 4min)
rivers_brazil.update_end_nodes()
rivers_brazil.update_vertices()
rivers_brazil.update_nodes()
# break lines at nodes
rivers_brazil.break_lines_at_nodes() # (multiprocessing-enabled: est. 10min)
rivers_brazil.explode_rivers()
rivers_brazil.update_vertices()
# clean shapefile
rivers_brazil.update_estuary_nodes()
rivers_brazil.clean_shapefile()
rivers_brazil.store_network("/pfs/work7/workspace/scratch/tu_zxobe27-master_thesis/data/river_netowork/")

#### Imagery

In [4]:
import sqlite3
import pandas as pd

# Create a new SQLite database
conn = sqlite3.connect('/pfs/work7/workspace/scratch/tu_zxobe27-master_thesis/data/imagery/imagery.db')

# Create tables
cursor = conn.cursor()
cursor.execute('''
    CREATE TABLE GridCells (
        CellID INTEGER PRIMARY KEY,
        Row INTEGER,
        Column INTEGER,
        X REAL,
        Y REAL,
        Internal BOOLEAN
    )
''')
cursor.execute('''
    CREATE TABLE SubGridCells (
        SubCellID INTEGER PRIMARY KEY,
        CellID INTEGER,
        X REAL,
        Y REAL,
        ShareValid REAL
    )
''')
cursor.execute('''
    CREATE TABLE RemoteImageFiles (
        RemoteImageFileID INTEGER PRIMARY KEY,
        CellID INTEGER,
        Year INTEGER,
        RemoteFilePath TEXT,
        RemoteFileSize INTEGER,
        Processed BOOLEAN
    )
''')
cursor.execute('''
    CREATE TABLE LocalImageFiles (
        LocalImageFileID INTEGER PRIMARY KEY,
        RemoteImageFileID INTEGER,
        CellID INTEGER,
        SubCellID INTEGER,
        Year INTEGER,
        LocalFilePath TEXT,
        LocalFileSize INTEGER
    )
''')
conn.commit()


In [3]:
agent.fetch(get_fetch_instructions("im_mapbiomas_30"))

*** Started export task ***
--- Task status ---
{'state': 'READY', 'description': 'mapbiomas_2010', 'creation_timestamp_ms': 1708782512740, 'update_timestamp_ms': 1708782512740, 'start_timestamp_ms': 0, 'task_type': 'EXPORT_IMAGE', 'id': 'OMXIGF4N5BZ6FZCSL5IHMNRO', 'name': 'projects/master-thesis-414809/operations/OMXIGF4N5BZ6FZCSL5IHMNRO'}
--- Task status ---


In [5]:
import pandas as pd

In [31]:
# read in the land cover classes
classes = pd.read_excel("data/land_cover/lulc_classes_conv.xlsx")
# forward fill the missing values
classes[["Level " + str(x) for x in range(1, 3)]] = classes[["Level " + str(x) for x in range(1, 3)]].ffill()
# create a new column with the classes separated in an array
classes["igbe_class"] = classes["IBGE (1999; 2012) Classification "].str.split(",")
# explode the array to create a new row for each class
cov_table = classes[["Level " + str(x) for x in range(1, 3)] + ["igbe_class"]].explode("igbe_class").dropna()
# strip the whitespace from the class names
cov_table["igbe_class"] = cov_table["igbe_class"].str.strip()
# rename the columns 
cov_table.columns = ["level_1", "level_2", "igbe_class"]
# export the table
cov_table.to_csv("data/land_cover/lulc_classes.csv", index = False)