## TITLE: Random Locs Same Params

**Purpose:** 

**Dependencies:** 

---


In [1]:
import pandas as pd
import numpy as np
import sys
import os
import glob
from clawpack.geoclaw import topotools
sys.path.insert(0, '/home/catherinej/BarrierBreach/src/')
import breach_randomization as br

In [2]:
# Import bathymetry_adulterant from claw_code
sys.path.insert(0, '/home/catherinej/claw_code/src/claw_code/post')
import waveforms

In [101]:
class RandomBreach(): 
    def __init__(self, breach_data_path, gauge_location_path, gauge_data_path, topo_path, masked_path): 
        self.breach_data_path = breach_data_path
        self.gauge_location_path = gauge_location_path
        self.gauge_data_path = gauge_data_path
        self.topo_path = topo_path
        self.topo_data = br.load_topography(self.topo_path)
        self.masked_island = masked_path
        self.gauge_names = self.load_gauge_names()
        self.gauge_data = br.load_gauge_data(self.gauge_names.index.values, self.gauge_data_path)
        self.breach_data = self.load_existing_breach()    
    
        
    def load_gauge_names(self):
        df = pd.read_csv(self.gauge_location_path)
        df = df.drop(['Unnamed: 0', 'dist'], axis=1)
        return df
    
    
    def load_existing_breach(self):
        breach_data_files = glob.glob(os.path.join(self.breach_data_path, '**', 'breach.data'), recursive=True)
        data_files = [d for d in breach_data_files if not d.split('/')[-2] == '_output']
        breach_data = {}
        for file in data_files:
            directory = file.split('/')[-2]
            if directory not in ['no_breach', '15m']:
                with open(file) as f:
                    data = f.read()
                data = data.split('\n')
                data = [line.split(' ') for line in data]
                data.pop(0)
                names = data.pop(0)
                names.append('Depth')
                d = {k: v for k,v in zip(names, data) if k != 'sigma,'}
                df = pd.DataFrame(d)
                df.columns = [col.replace(',', '') for col in df.columns]
                df = df.apply(pd.to_numeric, errors='ignore')
                df.columns = [x.title() for x in df.columns]
                dist = [abs(west - east) for west, east
                        in zip(df['West'], df['East'])]
                df['Distance'] = dist
                breach_data[directory] = df
        return breach_data
        
        
    def check_location_viability(self, max_dune, gauge_no):
        """Checks to make sure that a breach location reaches the required minimum % of dune height

        Args:
            df (_dataframe_): data frame of all tide gauge timeseries
            max_dune (_type_): max dune height at chosen breach location
        """
        breach_time = []
        x_percent = max_dune * .20
        # Convert the array elements to strings and create a list of column names
        column_names_to_keep = [f"{num}_eta" for num in gauge_no]

        # Use the filter function to select columns based on the list of column names
        gauge = self.gauge_data.filter(items=column_names_to_keep, axis=1)

        # gauge = self.gauge_data[gauge_no]
        cols_greater = (gauge >= x_percent).any()
        if cols_greater.any():
            time_to_exceed_x_percent = [br.first_greater(gauge, x_percent, col) for col in cols_greater.index]
            breach_start = min([x for x in time_to_exceed_x_percent if type(x) == np.float64])
            if breach_start == 27000.0:
                print('these columns suck:', cols_greater, 'dune height is: ', x_percent)
            breach_stop = breach_start + 7200.0
            return breach_start, breach_stop
        else:
            print('This location is not viable')
            return False, False
            # WRite location to file and break to restart randomize location?
        
        
    def randomize_location(self):
        """Takes a collection of breach data and randomizes the locations

        Args:
            df (pandas dataframe): breach data, location, width, depth, timing
        """
        bad_breach = []
        breach_loc = br.get_random_location(self.masked_island)
        lat = (breach_loc['south'][1] + breach_loc['north'][1])/2
        
        max_dune = br.max_dune_height(self.topo_data, breach_loc['south'][0], breach_loc['north'][0],
                                    breach_loc['lon'][0])
        if max_dune == 0.0:
            print('Why is the dune at 0.0?,', breach_loc)

        tide_gauges = br.find_nearest_gauges(self.gauge_names, breach_loc['lon'][1], lat,  1000)
        
        
        breach_start, breach_stop = self.check_location_viability(max_dune)
        if breach_start:
            # print(breach_start, 'something isnt false')
            new_breach_data = {'south' : breach_loc['south'][1],
                            'north': breach_loc['north'][1],
                            'mu': breach_loc['lon'][1],
                            'start': breach_start,
                            'stop': breach_stop,
                            'bad_breach': bad_breach
                            }
            # print(new_breach_data)
            return new_breach_data
            # do more stuff
        else:
            bad_breach.append(breach_loc)
            return self.randomize_location()
        
    
    def arrange_data(self, new_breach, key):
        south = [x['south'] for x in new_breach]
        north = [x['north'] for x in new_breach]
        mu = [x['mu'] for x in new_breach]
        start = [x['start'] for x in new_breach]
        stop = [x['stop'] for x in new_breach]
        west = [x['mu'] - y/2 for x, y in zip(new_breach, randb.breach_data[key]['Distance'])]
        east = [x['mu'] + y/2 for x, y in zip(new_breach, randb.breach_data[key]['Distance'])]
        data = {'South': south,
                'North': north,
                'Mu': mu,
                'Start_Time': start,
                'End_Time': stop,
                'West': west,
                'East': east}
        return data
    
    
    def combine_data(self, key, data):
        df = pd.DataFrame(data)
        self.breach_data[key] = self.breach_data[key].drop(columns=list(data.keys()))
        self.breach_data[key] = pd.concat([self.breach_data[key], df], axis=1)
        
    
    
    def write_breach_data(self, num_breaches, write_path, key):
        comment_str = 'breach_trigger, south, north, west, east, mu, sigma, time_factor, start_time, end_time depth'
        write_order = ['Breach_Trigger', 'South', "North", 'West', 'East',
                       'Mu', 'Sigma', 'Time_Factor', 'Start_Time', 'End_TIme', 
                       'Depth']
        with open(os.path.join(write_path, 'breach.data'), 'w') as f:
            f.write(f'{num_breaches}' + '\n')
            f.write(comment_str + '\n')
            for param in write_order:
                if param == 'Sigma':
                    f.write(f'1' + '\n')
                else:
                    f.write(' '.join(map(str, self.breach_data[key][param])) + '\n')
                    
        

In [102]:
randb = RandomBreach('/home/catherinej/300km_breach/no_breach_300km',
                 '/home/catherinej/BarrierBreach/data/ocean_gauges.csv',
                 '/home/catherinej/300km_breach/no_breach_300km/_output',
                 '/home/catherinej/bathymetry/moriches.nc',
                 '/home/catherinej/BarrierBreach/data/')

filename = 'breach.data'
PATH = '/home/catherinej/width_depth/'
breach_data_files = glob.glob(os.path.join(PATH, '**', filename), recursive=True)
data_files = [d for d in breach_data_files if not d.split('/')[-2] == '_output']

for file in data_files:
    directory = file.split('/')[-2]
    if directory not in ['no_breach', '15m']:
        old_breach_params = load_existing_breach(file)
        breach_loc = [randomize_location() for row in zip(old_breach_params['Breach_Trigger'], old_breach_params['Distance'] )]
        
       

In [103]:

breach = randb.breach_data['no_breach_300km']
breach['s'] = [br.find_nearest_bathy_val(randb.topo_data, s, mu)[1] for s, mu in zip(breach['South'],  breach['Mu'])]
breach['n'] = [br.find_nearest_bathy_val(randb.topo_data, n, mu)[1] for n, mu in zip(breach['North'], breach['Mu'])]
breach['lon'] = [br.find_nearest_bathy_val(randb.topo_data, n, mu)[0] for n, mu in zip(breach['North'], breach['Mu'])]
breach['max_dune'] = [br.max_dune_height(randb.topo_data, s, n, mu) for s, n, mu in zip(breach['s'], breach['n'], breach['lon'])]
breach
                    
#     lat = (row['south'] + row['north'])/2

#     max_dune = br.max_dune_height(self.topo_data, row['south'], row['north'],
#                                 row['mu'])
#     if max_dune == 0.0:
#         print('Why is the dune at 0.0?,', breach_loc)

breach['tide_gauges'] = [br.find_nearest_gauges(randb.gauge_names, mu, (s+n)/2,  1000).index for mu, s, n in zip(breach['Mu'], breach['South'], breach['North'])]


breach['breach_timing'] = [randb.check_location_viability(max_dune, gauges.values) for max_dune, gauges in zip(breach['max_dune'], breach['tide_gauges'])]

In [107]:
breach[['Start_Time', 'End_Time']].to_pickle('6_breach_times.pkl')

In [108]:
times = pd.read_pickle('6_breach_times.pkl')
times

Unnamed: 0,Start_Time,End_Time
0,214219.9,221419.9
1,213647.4,220847.4
2,214603.3,221803.3
3,214556.6,221756.6
4,216243.2,223443.2
5,215135.5,222335.5


In [54]:
check_loca

In [58]:
fldr_label = re.split(r'(\d+)', key)

In [60]:
f'{fldr_label[0]}_rand_{fldr_label[1]}'

'd_rand_16'

In [61]:
print(' '.join(map(str, randb.breach_data[key]['West'])) + '\n')

-72.64312666295108 -72.65393242221057 -72.81251799628774 -72.77394255184257 -72.71010210739686 -72.76479301480535



In [None]:
def runit(write_path):
    num_breaches = 6
    randb = RandomBreach('/home/catherinej/width_depth',
                         '/home/catherinej/BarrierBreach/data/ocean_gauges.csv',
                         '/home/catherinej/width_depth/no_breach/_output',
                         '/home/catherinej/bathymetry/moriches.nc',
                         '/home/catherinej/BarrierBreach/data/')
    for key in randb.breach_data:
        breach = randb.breach_data[key]
        new_breach = [randb.randomize_location() for row in breach.Breach_Trigger]
        data = randb.arrange_data(new_breach, key)
        randb.combine_data(key, data)
        randb.write_breach_data(num_breaches, write_path, key)

In [8]:
randb.breach_data[key]

NameError: name 'key' is not defined

In [69]:
os.listdir(os.getcwd())

['load_all_waveforms.py',
 '.ipynb_checkpoints',
 'random_locs.py',
 '__init__.py',
 'random_locs_same_params.ipynb']

In [74]:
source_dir = os.getcwd()
for file in os.listdir(source_dir):
    print(os.path.join(source_dir , file))

/home/catherinej/BarrierBreach/src/tools/load_all_waveforms.py
/home/catherinej/BarrierBreach/src/tools/.ipynb_checkpoints
/home/catherinej/BarrierBreach/src/tools/random_locs.py
/home/catherinej/BarrierBreach/src/tools/__init__.py
/home/catherinej/BarrierBreach/src/tools/random_locs_same_params.ipynb


In [None]:
for file_name in os.listdir(source_folder):
    # construct full file path
    source = source_folder + file_name
    destination = destination_folder + file_name
    # copy only files
    if os.path.isfile(source):
        shutil.copy(source, destination)
        print('copied', file_name)
