In [1]:
import pandas as pd
import geopandas as gpd
import xarray as xr
import pyproj
from tqdm import tqdm
import numpy as np

In [2]:
import os

In [3]:
# check python version and all packages version
def check_python_version():
    import sys
    print("Python version")
    print (sys.version)
    print("Pandas version")
    print(pd.__version__)
    print("Geopandas version")
    print(gpd.__version__)
    print("Xarray version")
    print(xr.__version__)
    print("Pyproj version")
    print(pyproj.__version__)

check_python_version()

Python version
3.11.9 | packaged by Anaconda, Inc. | (main, Apr 19 2024, 16:40:41) [MSC v.1916 64 bit (AMD64)]
Pandas version
2.2.2
Geopandas version
0.14.2
Xarray version
2023.6.0
Pyproj version
3.6.1


In [22]:
file_path = '../Vegetation_Data/veg_mapping.csv'
# read the csv file
veg_mapping = pd.read_csv(file_path, header=None)
cols = ['type', 'veg_type_details']
# assign head to the dataframe
veg_mapping.columns = cols

In [23]:
# split veg_type by "-", use the 2nd part as column fire_attribute
veg_mapping['fire_attribute'] = veg_mapping['veg_type_details'].str.split('-').str[-1]
veg_mapping['veg'] = veg_mapping['veg_type_details'].str.rsplit('-', n=1).str[0]

In [24]:
veg_mapping

Unnamed: 0,type,veg_type_details,fire_attribute,veg
0,1,Agriculture - low,low,Agriculture
1,2,Agriculture - rarely,rarely,Agriculture
2,3,Barren - rarely,rarely,Barren
3,4,Native Chapparal - archetypical,archetypical,Native Chapparal
4,5,Native Chapparal - fire,fire,Native Chapparal
5,6,Native Coastal Sage Scrub - archetypical,archetypical,Native Coastal Sage Scrub
6,7,Native Coastal Sage Scrub - fire,fire,Native Coastal Sage Scrub
7,8,Native Coastal Sage Scrub - mesic,mesic,Native Coastal Sage Scrub
8,9,Native Conifer Alpine - fire,fire,Native Conifer Alpine
9,10,Native Conifer Alpine - low,low,Native Conifer Alpine


In [25]:
veg_mapping['fire_attribute'].value_counts()

fire_attribute
fire            12
low              8
rarely           7
archetypical     5
mesic            5
never            2
Name: count, dtype: int64

In [26]:
# save the veg_mapping to a csv file
veg_mapping.to_csv('../Clean_Data/veg_mapping_cleaned.csv', index=False)

merge it w cleaned veg data

In [27]:
veg_data = pd.read_parquet('../Clean_Data/lon_lat_pair_weather_match_veg.parquet')

In [28]:
mapping = pd.read_csv('../Clean_Data/veg_mapping_cleaned.csv')

In [29]:
veg_data.dtypes

lon         float64
lat         float64
type        float32
distance    float64
dtype: object

In [30]:
mapping.dtypes

type                 int64
veg_type_details    object
fire_attribute      object
veg                 object
dtype: object

In [31]:
# check missing rate in veg_data
veg_data.isnull().mean()

lon         0.0
lat         0.0
type        0.0
distance    0.0
dtype: float64

In [32]:
# check missing rate in mapping
mapping.isnull().mean()

type                0.0
veg_type_details    0.0
fire_attribute      0.0
veg                 0.0
dtype: float64

In [34]:
# check if type in mapping is unique
mapping['type'].nunique() == mapping.shape[0]

True

In [36]:
# print shape before merge in a sentence
print(f'veg_data shape: {veg_data.shape}')
veg_data = veg_data.merge(mapping, left_on='type', right_on='type', how='left')
# print shape after merge in a sentence
print(f'veg_data shape: {veg_data.shape}')

veg_data shape: (17703, 4)
veg_data shape: (17703, 7)


In [37]:
veg_data.head()

Unnamed: 0,lon,lat,type,distance,veg_type_details,fire_attribute,veg
0,-124.391667,40.441667,7.0,1.826642,Native Coastal Sage Scrub - fire,fire,Native Coastal Sage Scrub
1,-124.391667,40.4,25.0,1.827314,Native Oak Woodland - fire,fire,Native Oak Woodland
2,-124.35,40.566667,1.0,1.824622,Agriculture - low,low,Agriculture
3,-124.35,40.525,8.0,1.825296,Native Coastal Sage Scrub - mesic,mesic,Native Coastal Sage Scrub
4,-124.35,40.483333,25.0,1.825969,Native Oak Woodland - fire,fire,Native Oak Woodland


In [38]:
# save data to Clean_Data/lon_lat_pair_weather_match_veg.parquet
veg_data.to_parquet('../Clean_Data/lon_lat_pair_weather_match_veg_v2.parquet')