In [1]:
import arcpy
import os
import numpy as np
import pandas as pd
from simpledbf import Dbf5

# Input Parameters 

In [2]:
arcpy.env.overwriteOutput = True
# arcpy.env.workspace='F:\SW023129 Los Pen WMP\900-GIS\930-Flood Control\932 Delineation\932.1 Carroll Canyon\932.1.1 East of 15\QGIS_Sandbox\GDB.gdb'
arcpy.env.workspace = 'C:\Users\cfang\Desktop\SHAPEFILES\ARCHYDRO.GDB'
# catchment = 'catchment'
catchment = 'Lagoon_Final_Subwat_NoID'
drainageline = 'Layers\DrainageLineC'
dl_cat_int = 'dl_cat_int_all'

cat_usid = 'Sub_ID'
cat_dsid = 'Dwn_Sub_ID' #to be filled here
dl_usid = 'HydroID'
dl_dsid = 'NextDownID'

# Helper Function

In [3]:
def read_at(input_data):
    arcpy.TableToTable_conversion(in_rows=input_data,
                                  out_path='Output', 
                                  out_name='_temp_input.dbf')
#     df_at = pd.read_csv('Output/_temp_input.csv')
    df_at = Dbf5('Output/_temp_input.dbf').to_dataframe()
    os.remove('Output/_temp_input.dbf')
    return df_at

def Con_in_cat(df_dl):
    df_dl_id_unique = df_dl.drop_duplicates(subset=[dl_usid,dl_dsid])
    max_row = 0
    ds_of_max = []
    sr_us = pd.Series([])
    for index,row in df_dl_id_unique.iterrows():
        _us_list = trace_upstream(row[dl_usid],df_dl_id_unique)
        if _us_list.size == 1:
            sr_us = sr_us.append(_us_list.iloc[0])
        if _us_list.size>max_row:
            max_row = _us_list.size
            df_lgst_flpth = _us_list
            ds_of_max=_us_list.iloc[-1]
    
    # convert upstream list and downstream catchment ID into a dataframe
    sr_us.reset_index(drop=True, inplace=True)
    _df_con_in_cat = pd.DataFrame(sr_us,columns=['us'])
    _df_con_in_cat['ds']=ds_of_max[0]
    return _df_con_in_cat,df_lgst_flpth

def trace_upstream(target_ID, df):
    _neighbors = df[df[dl_dsid] == target_ID]
    upstream = _neighbors
    while not _neighbors.empty:
        _neighbors = df[df[dl_dsid].isin(_neighbors[dl_usid])]
        upstream = upstream.append(_neighbors, ignore_index=True)
    #Comment out the line below if one does not want to include the catchment itself
    upstream=upstream.append(df[df[dl_usid]==target_ID],ignore_index=True)
    exc_cols=df.columns.difference([dl_usid])
    upstream = upstream.drop(exc_cols,1)
    return upstream

def lgst_flpth(df_flpth,org_shp,catID=cat_usid, dlID=dl_usid):
     # Define output variables
#     target_ID = df_fpth[catID][0]
    temp_input='_temp_input_lgst_flpth'
    temp_input_xls='Output/'+temp_input+'.xls'
    table_gdb='a_lgst_flpth'
    table_csv=filename = 'Output/'+table_gdb+'.csv'
    fc_output='lgs_flpth'
    
    df_flpth.to_csv(table_csv,index=False)
    arcpy.conversion.TableToTable(table_csv,
                                 arcpy.env.workspace,
                                table_gdb)
    
    #Create a temp duplicate shapefile in prep for join field - avoid modifying the original input
    arcpy.conversion.FeatureClassToFeatureClass(org_shp, 
                                            arcpy.env.workspace, 
                                            temp_input)
    
    arcpy.management.JoinField(in_data=temp_input,
                          in_field=dlID,
                          join_table= table_gdb,
                          join_field=dlID)
    
    expression = dlID+'_1 IS NOT NULL' #Furture improvement: Avoid hard coding the epxression
    arcpy.conversion.FeatureClassToFeatureClass(temp_input, 
                                            arcpy.env.workspace, 
                                            fc_output,
                                           expression)
    arcpy.management.Delete(temp_input)
    arcpy.management.DeleteField(fc_output,dlID+'_1')

# Main Running Block 

In [4]:
# Intersect drainage line by catchment 
arcpy.analysis.Intersect(in_features=[catchment,drainageline],
                         out_feature_class=dl_cat_int,
                         join_attributes="ALL"
                        )
# Import catchment 
df_cat = read_at(catchment)
df_dl = read_at(drainageline)
df_dl_int = read_at(dl_cat_int)

### Table 1 - df_dl_con ( Global drainage line connectivity correlation)
### Table 2 - df_con_in_cats (Drainage line connectivity correlation within each catchment)
### Table 3 - df_cat_dl_cor (catchment - d/s drainage line correlation) 

In [5]:
# Import drainage line and extract drainage line connectivity correlation
df_dl_con = df_dl[[dl_usid,dl_dsid]]
df_dl_con.set_index(dl_usid,inplace=True)

#Initialize dataframe 
df_con_in_cats = pd.DataFrame([],columns=['us','ds'])
df_cat_dl_cor = pd.DataFrame([],columns=[cat_usid,dl_usid])
df_lgst_flpth = pd.DataFrame([],columns=[cat_usid,dl_usid])
#Beginning of Loop
for index,row in df_cat.iterrows():
    # Find all u/s and d/s nodes of drainage line within each catchment
    sample_cat = row[cat_usid]
    print(sample_cat)
    _temp_con_in_cat,_temp_lgst_flpth = Con_in_cat(df_dl_int[df_dl_int[cat_usid]==sample_cat])
    _temp_lgst_flpth[cat_usid]=sample_cat
    df_con_in_cats = pd.concat([df_con_in_cats,_temp_con_in_cat]) 
    df_lgst_flpth = pd.concat([df_lgst_flpth,_temp_lgst_flpth]) 
    # Assign d/s drainage line ID to CatID
    df_cat_dl_cor.loc[len(df_cat_dl_cor)]=[sample_cat, _temp_con_in_cat['ds'][0]]
#End of Loop

df_cat_dl_cor.set_index(dl_usid,inplace=True)
df_con_in_cats.set_index('us', inplace = True)
# df_lgst_flpth.to_clipboard()
#Create longest flowpath shapefile based on the list


# print(df_con_in_cats[0:3])
# print(df_cat_dl_cor)

2011020000
4011010001
4011010002
4011010003
4011010004
4011010005
4011010006
4011010007
4011010008
4011010009
4011010010
4011010011
4011010012
4011010013
4011010014
4011010015
4011010016
4011010017
4011010018
4011010019
4011010020
4011010021
4011010022
4011010023
4011010024
4011010025
4011010026
4011010027
4011010028
4011010029
4011010030
4011010031
4011010032
4011010033
4011010034
4011010035
4011010036
4011010037
4011010038
4011010039
4011010040
4011010041
4011010042
4011010043
4011010044
4011010045
4011010046
4011010047
4011010048
4011010049
4011010050
4011010051
4011010052
4011010053
4011010054
4011010055
4011010056
4011010057
4011010058
4011010059
4011010060
4011010061
4011010062
4011010063
4011010064
4011010065
4011010066
4011010067
4011010068
4011010069
4011010070
4011010071
4011010072
4011010073
4011010074
4011010075
4011010076
4011010077
4011010078
4011010079
4011010080
4011010081
4011010082
4011010083
4011010084
4011010085
4011010086
4011010087
4011010088
4011010089
4011010090

In [None]:
# lgst_flpth(df_lgst_flpth, drainageline)

## Generate final output table based on Tables 1 - 3

In [6]:
#Prepare final output table based on Table 1-3

# iteration 1
_temp_t12 = df_dl_con.join(df_con_in_cats)
_temp_t12['ds'].fillna(_temp_t12[dl_dsid],inplace=True)
_temp_t123 = _temp_t12.join(df_cat_dl_cor,on='ds') #this table now correlates upstream dl id with downstream catch ID

df_final = df_cat_dl_cor.copy()
df_final = df_final.join(_temp_t123,rsuffix = '_ds',how='left')
df_final = df_final[df_final[cat_usid]!=df_final[cat_usid+'_ds']]
# df_final.to_clipboard()

# iteration 2
_temp_t122 = _temp_t12.join(df_con_in_cats,on='ds',rsuffix='_ds')
_temp_t123 = _temp_t122.join(df_cat_dl_cor,on='ds_ds')
df_final2 = df_final.join(_temp_t123,rsuffix = '_ds2',how='left')
df_final2 = df_final2.drop_duplicates([cat_usid,cat_usid+'_ds'])
df_final2[cat_usid+'_ds'].fillna(df_final2[cat_usid+'_ds2'],inplace=True)

df_final2.rename(index=str,columns={cat_usid+'_ds': cat_dsid},inplace=True)
#export df_final2 to excel for post-processing 
df_final2.to_clipboard()

In [31]:
df_final2.to_csv('Output/LosPen_v2.csv',index=None)

In [26]:
df_final2[:10]

Unnamed: 0,Sub_Ids,NextDownID,ds,Sub_Ids_ds,NextDownID_ds2,ds_ds2,ds_ds,Sub_Ids_ds2
7.0,4011622126,1,1.0,,1,1.0,,


In [25]:
df_final2.columns

Index([       u'Sub_Ids',     u'NextDownID',             u'ds',
           u'Sub_Ids_ds', u'NextDownID_ds2',         u'ds_ds2',
                u'ds_ds',    u'Sub_Ids_ds2'],
      dtype='object')

In [28]:
df_final2.drop(['NextDownID','ds'],axis=1)

MemoryError: 

# Debugging block

In [None]:
df_con_in_cats_d = pd.DataFrame([],columns=['us','ds'])
df_cat_dl_cor_d = pd.DataFrame([],columns=[cat_usid,dl_usid])

sample_cat = 'CarE1105'
_temp_con_in_cat = Con_in_cat(df_dl_int[df_dl_int[cat_usid]==sample_cat])
#     print(_temp_con_in_cat)
# df_con_in_cats.append(_temp_con_in_cat)
df_con_in_cats_d = pd.concat([df_con_in_cats_d,_temp_con_in_cat]) 
# Assign one d/s to CatID, and find DwnID based on the drainageline connectivity correlation
#Add a new row to the new catchment dataframe, which will contain the catchment connectivity info eventually
df_cat_dl_cor_d.loc[len(df_cat_dl_cor)]=[sample_cat, _temp_con_in_cat['ds'][0]]

df_cat_dl_cor_d.set_index(dl_usid,inplace=True)
df_con_in_cats_d.set_index('us', inplace = True)

df_con_in_cats_d = df_con_in_cats_d.join(df_cat_dl_cor_d,on='ds')

print(df_con_in_cats_d)
print(df_cat_dl_cor_d)

In [None]:
arcpy.TableToTable_conversion(in_rows=drainageline,out_path='Output', out_name='_temp_input.dbf')

In [None]:
df = Dbf5('Output/_temp_input.dbf').to_dataframe()

In [None]:
df_cat