In [117]:
import os
import subprocess
import shutil

import pandas as pd
import numpy as np

In [118]:
DATA = "../../data/interim/FIA_remeasured_trees_for_training.csv"
df = pd.read_csv(DATA)
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 290429 entries, 0 to 290428
Data columns (total 63 columns):
 #   Column             Non-Null Count   Dtype  
---  ------             --------------   -----  
 0   STATECD            290429 non-null  int64  
 1   UNITCD             290429 non-null  int64  
 2   COUNTYCD           290429 non-null  int64  
 3   PLOT               290429 non-null  int64  
 4   PLOT_ID            290429 non-null  int64  
 5   SUBPLOT            290429 non-null  int64  
 6   SUBPLOT_ID         290429 non-null  int64  
 7   PLOT_CN            290429 non-null  int64  
 8   PLOT_CN_NEXT       290429 non-null  int64  
 9   TREE_CN            220620 non-null  float64
 10  TREE_CN_NEXT       269952 non-null  float64
 11  TREE_ID            290429 non-null  int64  
 12  LAT                290429 non-null  float64
 13  LON                290429 non-null  float64
 14  ELEV               290429 non-null  float64
 15  VARIANT            290429 non-null  object 
 16  LO

In [119]:
cna = pd.DataFrame(
    df.groupby(by=["PLOT_ID", "LAT", "LON", "ELEV"]).groups.keys(),
    columns=["ID1", "lat", "lon", "el"],
)
cna["el"] = (cna["el"] / 3.28084).astype(int)
cna.insert(1, "ID2", np.nan)
cna["ID1"] = cna["ID1"].astype(str)
cna.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12156 entries, 0 to 12155
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   ID1     12156 non-null  object 
 1   ID2     0 non-null      float64
 2   lat     12156 non-null  float64
 3   lon     12156 non-null  float64
 4   el      12156 non-null  int64  
dtypes: float64(3), int64(1), object(1)
memory usage: 475.0+ KB


In [120]:
cna.head()

Unnamed: 0,ID1,ID2,lat,lon,el
0,60101550679,,41.806228,-123.788726,761
1,60101551744,,41.980638,-124.193526,91
2,60101551969,,41.681432,-123.803842,701
3,60101552953,,41.938125,-123.870868,640
4,60101553315,,41.738938,-123.783382,1432


In [121]:
OUT_CSV = "../../data/interim/plot_info_for_climatena.csv"
cna.to_csv(OUT_CSV, index=False, header=True, lineterminator="\r\n")

In [122]:
df.MEASYEAR.min(), df.MEASYEAR_NEXT.max()

(1999, 2019)

In [123]:
CLIMNA_DIR = "/mnt/e/ecotrust/Climatena_v731/"
OUT_DIR = "../../data/interim/climatena_output"

cna.to_csv(
    os.path.join(CLIMNA_DIR, "plot_info_for_climatena.csv"),
    index=False,
    header=True,
    lineterminator="\r\n",
)

for year in range(1990, 2021):
    infile = "/plot_info_for_climatena.csv"
    outfile = f"/climna_monthly_{year}.csv"
    yearfile = f"/Year_{year}.ann"
    proc = subprocess.run(
        ["./ClimateNA_v7.31.exe", "/M", yearfile, infile, outfile],
        cwd=CLIMNA_DIR,
        stdout=subprocess.PIPE,
        stderr=subprocess.PIPE,
    )
    if os.path.exists(OUT_DIR + outfile):
        os.remove(OUT_DIR + outfile)
    shutil.move(CLIMNA_DIR + outfile, OUT_DIR)
    print(year, end="... ")
print("Done.")

1990... 1991... 1992... 1993... 1994... 1995... 1996... 1997... 1998... 1999... 2000... 2001... 2002... 2003... 2004... 2005... 2006... 2007... 2008... 2009... 2010... 2011... 2012... 2013... 2014... 2015... 2016... 2017... 2018... 2019... 2020... Done.


In [124]:
dfs = []
for year in range(1990, 2021):
    year_df = pd.read_csv(os.path.join(OUT_DIR, f"climna_monthly_{year}.csv"))
    year_df.insert(0, "YEAR", year)
    year_df = year_df.rename({"ID1": "PLOT_ID"})
    year_df = year_df.drop(["ID2"], axis=1)
    dfs.append(year_df)
clim = pd.concat(dfs, axis=0, ignore_index=True)
clim.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 376836 entries, 0 to 376835
Columns: 185 entries, YEAR to CMI12
dtypes: float64(62), int64(123)
memory usage: 531.9 MB


In [125]:
clim.to_csv(
    "../../data/interim/climatena_1990-2020_monthly.csv", index=False, header=True
)