# **Compile pre-processing outputs into one file**

By Bridget Bittmann

Date created: 04/04/2022

Date modified: 04/04/2022

In [1]:
import pandas as pd
import os 
import glob


In [2]:
from google.colab import drive 
drive.mount('/content/gdrive', force_remount=True)

Mounted at /content/gdrive


In [3]:
## NAVIGATE TO YOUR DIRECTORY ##
%cd gdrive/MyDrive/spatial_colab/datasets/
%ls

/content/gdrive/MyDrive/spatial_colab/datasets
[0m[01;34m2021_clip[0m/      [01;34mclimate_stats[0m/         [01;34mIrrMapper[0m/    [01;34moutput_files[0m/
[01;34mBoise_CDL[0m/      [01;34mdiversion_timeseries[0m/  [01;34mLBRB_shp[0m/     [01;34msubset_test_shp[0m/
[01;34mCDL_reproject[0m/  [01;34mirrigation_companies[0m/  [01;34mlcmap_files[0m/
[01;34mCDL_subsets[0m/    [01;34mirrig_lbrb[0m/            [01;34mmasked[0m/


In [6]:
## ------------------- ##
## IMPORT ALL DATASETS ##
## ------------------- ##

div_files = sorted(glob.glob('diversion_timeseries/final_stats/*.csv'))
land_files = sorted(glob.glob('lcmap_files/final_metrics/*.csv'))
climate_files = sorted(glob.glob('climate_stats/final/*.csv'))
common_name = pd.read_csv('diversion_timeseries/relates/name_dictionary.csv')

## Clean the dataframes to match by renaming to create a common name among all three datasets

div_data = []
for i in div_files:
  div = pd.read_csv(i)
  if common_name['DiversionName'].str.contains(div['Diversion Name'][0]).any():
    div['Diversion Name'] = common_name['NewName'][common_name['DiversionName'].str.contains(div['Diversion Name'][0])].to_string(index=False)
    div = div.drop(labels='Unnamed: 0', axis=1)
    div_data.append(div)
  else:
    None

land_data = []
for i in land_files:
  land = pd.read_csv(i)
  land['Div_name'] = common_name['NewName'][common_name['POU_Name'].str.contains(land['Div_name'][0])].to_string(index=False)
  land = land.drop(labels='Unnamed: 0', axis=1)
  land = land.drop(axis=0, index=[0,1]).reset_index().drop(labels=['index'], axis=1)
  land_data.append(land)

climate_data = []
for i in climate_files:
  clim = pd.read_csv(i)
  new_name = common_name['NewName'][common_name['POU_Name'].str.contains(clim['DIV_NAME'][0])].to_string(index=False)
  clim['DIV_NAME'] = new_name
  clim = clim.drop(labels=['Unnamed: 0', 'Year'], axis=1).drop(axis=0, index=[0,1]).reset_index().drop(labels=['index'], axis=1)
  climate_data.append(clim)

## Check to make sure all the same length
print(len(div_data), len(land_data), len(climate_data))

  from ipykernel import kernelapp as app


11 11 11


In [13]:
## --------------------------------------- ## 
## MERGE THREE FILES INTO ONE FILE PER POU ##
## --------------------------------------- ## 

for i in range(len(div_data)):
  df = pd.concat([div_data[i], climate_data[i], land_data[i]], axis=1)
  df.drop(labels=['dates', 'Diversion Name', 'Div_name'], axis=1, inplace=True)
  out_path = os.path.join('output_files/'+df['DIV_NAME'][0]+'.csv')
  df.to_csv(out_path)

df

Unnamed: 0,Year,Total,Reservoir_Fill,StartDate,StartDayofYear,EndDate,EndDayofYear,Range,DIV_NAME,Precip_mm,Max_temp,class1_urban,class2_crops,class3_grass-shrub,class4_treecover,class5_water,class6_wetland,contagion,largest_patch_index,shannon_diversity_index
0,1987,3253.69342,777106.3,1987-04-04,94,1987-10-15,288,194,Siebenberg Canal,206.065673,30.516265,2.62697,95.061296,0.315236,0.0,1.611208,0.385289,85.836824,95.061296,0.246267
1,1988,3194.387499,550214.71,1988-04-06,97,1988-10-17,291,194,Siebenberg Canal,223.747133,32.37524,2.62697,95.096322,0.350263,0.0,1.541156,0.385289,85.941653,95.096322,0.245277
2,1989,3424.470671,848992.87,1989-04-16,106,1989-10-16,289,183,Siebenberg Canal,221.055183,31.44753,2.62697,94.956217,0.455342,0.0,1.576182,0.385289,85.52316,94.956217,0.252734
3,1990,3395.710275,706666.06,1990-04-04,94,1990-10-31,304,210,Siebenberg Canal,230.766539,31.529519,2.697023,94.781086,0.630473,0.0,1.471103,0.420315,84.893233,94.781086,0.262075
4,1991,3445.297166,519360.8,1991-04-05,95,1991-10-14,287,192,Siebenberg Canal,267.748252,31.212955,2.942207,94.255692,0.875657,0.0,1.50613,0.420315,83.598214,94.255692,0.283753
5,1992,3127.346022,376107.62,1992-04-08,99,1992-10-12,286,187,Siebenberg Canal,216.306115,31.25399,2.802102,94.360771,0.910683,0.0,1.50613,0.420315,83.8325,94.360771,0.28033
6,1993,2628.105207,966269.81,1993-05-09,129,1993-10-15,288,159,Siebenberg Canal,330.919648,26.900783,2.732049,94.255692,1.015762,0.0,1.576182,0.420315,83.470704,94.255692,0.28534
7,1994,3085.296339,616003.82,1994-04-06,96,1994-10-15,288,192,Siebenberg Canal,257.219772,32.593446,2.381786,94.290718,1.120841,0.0,1.78634,0.420315,83.551393,94.290718,0.286288
8,1995,2500.369376,979861.16,1995-04-16,106,1995-10-24,297,191,Siebenberg Canal,390.328508,29.717255,2.276708,92.95972,1.401051,0.0,2.942207,0.420315,81.185147,92.924694,0.332635
9,1996,3293.164585,977950.63,1996-04-23,114,1996-10-22,296,182,Siebenberg Canal,370.203182,31.916625,2.241681,92.714536,1.50613,0.0,3.187391,0.350263,80.894239,92.67951,0.3408
