# **Compile pre-processing outputs into one file**

By Bridget Bittmann

Date created: 04/04/2022

Date modified: 04/04/2022

In [1]:
import pandas as pd
import os 
import glob


In [2]:
from google.colab import drive 
drive.mount('/content/gdrive', force_remount=True)

Mounted at /content/gdrive


In [3]:
## NAVIGATE TO YOUR DIRECTORY ##
%cd gdrive/MyDrive/spatial_colab/datasets/
%ls

/content/gdrive/MyDrive/spatial_colab/datasets
[0m[01;34mclimate_stats[0m/         [01;34mirrig_lbrb[0m/   [01;34mmasked[0m/        seb_change.png
[01;34mdiversion_timeseries[0m/  [01;34mIrrMapper[0m/    ny_change.png  set_change.png
fu_change.png          [01;34mLBRB_shp[0m/     [01;34moutput_files[0m/  [01;34msubset_test_shp[0m/
[01;34mirrigation_companies[0m/  [01;34mlcmap_files[0m/  [01;34mPOUs[0m/


In [23]:
## ------------------- ##
## IMPORT ALL DATASETS ##
## ------------------- ##

div_files = sorted(glob.glob('diversion_timeseries/final_stats/*.csv'))
land_files = sorted(glob.glob('lcmap_files/proportions/longform_proportions/*.csv'))
climate_files = sorted(glob.glob('climate_stats/final/*.csv'))
common_name = pd.read_csv('diversion_timeseries/relates/name_dictionary.csv')

## Clean the dataframes to match by renaming to create a common name among all three datasets

div_data = []
for i in div_files:
  div = pd.read_csv(i)
  name = div['DiversionName_x'][0]
  if common_name['DiversionName'].str.contains(div['DiversionName_x'][0]).any():
    div['DiversionName_x'] = common_name['NewName'][common_name['DiversionName'].str.contains(div['DiversionName_x'][0])].to_string(index=False)
    print(div['DiversionName_x'][0])
    div = div.drop(labels='Unnamed: 0', axis=1)
    div_data.append(div)
  else:
    None

div = pd.concat(div_data)

land_data = []
for i in land_files:
  land = pd.read_csv(i)
  land['DivName'] = common_name['NewName'][common_name['WaterRight'].str.contains(land['DivName'][0])].to_string(index=False)
  land = land.drop(labels='Unnamed: 0', axis=1)
  land = land.drop(axis=0, index=[0,1]).reset_index().drop(labels=['index'], axis=1)
  land_data.append(land)

land = pd.concat(land_data)


climate_data = []
for i in climate_files:
  clim = pd.read_csv(i)
  new_name = common_name['NewName'][common_name['WaterRight'].str.contains(clim['DIV_NAME'][0])].to_string(index=False)
  clim['DIV_NAME'] = new_name
  clim = clim.drop(labels=['Unnamed: 0', 'Year'], axis=1).drop(axis=0, index=[0,1]).reset_index().drop(labels=['index'], axis=1)
  climate_data.append(clim)

clim = pd.concat(climate_data)

# ## Check to make sure all the same length
print(len(common_name), len(div_data), len(land_data), len(climate_data))


Andrews
Atwell
Ballentyne Canal
Barber pumps
Baxter
Bates
Boise City Canal
Boise City Parks
Bowman and Swisher
Bubb Canal
Caldwell Highline Canal
Campbell Canal
Canyon County Canal
Conway-Hamming Canal
Crawforth
Caldwell Lowline Canal
Eagle Island State Park
Ester Simplot
Eureka No2 Canal
Fairview Acres
Farmers Union Canal
Golden Gate Canal
Graham-Gilbert Canal
Haas
Hart-Davis Canal
Island Highline Canal
Lemp Canal
Little Pioneer Canal
Lower Center Point
Mace-Catlin Canal
Mace-Mace Canal


  app.launch_new_instance()


Mammon
McConnel Island
McManus and Teater Canal
Middleton Canal
McCurry Pump
New Dry Creek Canal
New York Canal
Parma Ditch
Penitentiary Canal
Phyllis Canal
Quinns Pond
Ridenbaugh Canal
River Run
Riverside Canal
Riverside Village
Rossi Mill Canal
Sebree Canal
Settlers Canal
Seven Suckers Canal
Shakespeare
Shipley and Wagner Pumps
Siebenberg Canal
Stutheit
Surprise Valley and Micron
Thomas Aiken Canal
Thurman Mill Canal
Suez
Upper Center Point Canal
Shipley and Wagner Pumps
Warm Springs Canal
Andrews
65 62 64 64


In [5]:
display(land_name)

0                                               Andrews
1                                                Atwell
2                                      Ballentyne Canal
3                                          Barber pumps
4                                                 Bates
                            ...                        
57                                         Series([], )
58                           Surprise Valley and Micron
59                             Upper Center Point Canal
60    Shipley and Wagner Pumps\nShipley and Wagner P...
61                                   Warm Springs Canal
Length: 62, dtype: object

In [6]:
## --------------------------------------- ## 
## MERGE THREE FILES INTO ONE FILE PER POU ##
## --------------------------------------- ## 

for i in range(len(div_data)):
  df = pd.concat([div_data[i], climate_data[i], land_data[i]], axis=1)
  df.drop(labels=['dates', 'Diversion Name', 'Div_name'], axis=1, inplace=True)
  out_path = os.path.join('output_files/'+df['DIV_NAME'][0]+'.csv')
  df.to_csv(out_path)

df

KeyError: ignored