# **Compile Data for Model in R**

Date created: 07.21.2022

Date modified:

Purpose: This script compiles land use data, climate data, and surface water flow data into one dataframe to use in R for a Generalized Linear Mixed Effect Model. 

In [1]:
## --------------- ##
## IMPORT PACKAGES ## 
## --------------- ##

import pandas as pd # to work with dataframe
import os # for file paths
import glob # read in a folder of csv
import numpy as np # basic statistics


In [2]:
from google.colab import drive 
drive.mount('/content/gdrive', force_remount=True)

Mounted at /content/gdrive


In [3]:
## NAVIGATE TO YOUR DIRECTORY ##
%cd gdrive/MyDrive/Drainage_analysis
%ls

/content/gdrive/MyDrive/Drainage_analysis
[0m[01;34mdatasets[0m/  [01;34mfigures[0m/  [01;34mscripts[0m/


In [19]:
## IMPORT ALL DATASETS ##
## ------------------- ##

div = pd.read_csv('datasets/output_files/annual_flow_vals.csv')

land_files = sorted(glob.glob('datasets/output_files/land_cover/*.csv'))
land = []
for i in land_files:
  land.append(pd.read_csv(i))
land = pd.concat(land)

clim_files = sorted(glob.glob('datasets/output_files/climate_stats/final/*.csv'))
clim = []
for i in clim_files:
  clim.append(pd.read_csv(i))
clim = pd.concat(clim)

In [20]:
## Use a dictionary to help align ##
## ------------------------------ ##

relate = pd.read_csv('datasets/relates/DrainRelates.csv')
spatial = relate.dropna(subset=['Spatial Name'])
newnames = dict(zip(spatial['Spatial Name'], spatial['NewName']))

clim['NewName'] = clim['NAME'].map(newnames)
clim = clim.drop(['Unnamed: 0', 'NAME'], axis=1)
land['NewName2'] = land['DrainName'].map(newnames)
land = land.drop(['Unnamed: 0', 'DrainName'], axis=1)

In [22]:
## Merge the three files together ##
## ------------------------------ ##

merge2 = div.merge(clim, left_on = ['Year', 'Name'], right_on = ['Year', 'NewName'])
merge_final = merge2.merge(land, left_on = ['Year', 'Name'], right_on = ['dates', 'NewName2'])
merge_final = merge_final.drop(['Unnamed: 0', 'NewName', 'dates',  'NewName2'], axis=1)

In [23]:
## Export the file ##
## --------------- ##

merge_final.to_csv('datasets/output_files/R_input/model_input.csv')