# File Convert Raw
**Description:** Function for Extracting Relevant Data from Raw .pro File and Converting to .csv File
**Input Data:** Ascii file of .pro (snowpack profile) output from SNOWPACK model  
**Output Data:** csv file of snow depth, max density, water content at max density, season, max ice volume, and max density index
**Creator:** Emma Perkins  
**Date:** November 2023

In [1]:
#import necessary packages
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.colors as colors
import datetime
import glob

In [2]:
def file_convert_raw(folder, model_run):
    #define supporting functions-------------------------------------------------------------------------------
    def extract_all(result):
        first_data_idx = result.index[result['[STATION_PARAMETERS]'] == '[DATA]'][0] + 1

        #save header separately from data
        header = result['[STATION_PARAMETERS]'].iloc[0:first_data_idx-1]
        data = result['[STATION_PARAMETERS]'].iloc[first_data_idx:-2]

        #find the rows with date
        date = pd.to_datetime(data.loc[data.str.startswith('0500', na=False)].str.slice(5), format='%d.%m.%Y %H:%M:%S').to_numpy()

        #find rows with depth
        depth = data.loc[data.str.startswith('0501', na=False)].str.slice(5).str.rsplit(',').str[-1].astype(float).to_numpy()

        #find indices of date, depth, density, water content, and ice volume
        date_idx = data.loc[data.str.startswith('0500', na=False)].index.values
        depth_idx = data.loc[data.str.startswith('0501', na=False)].index.values
        den_idx = data.loc[data.str.startswith('0502', na=False)].index.values
        wc_idx = data.loc[data.str.startswith('0506', na=False)].index.values
        ice_vol_idx = data.loc[data.str.startswith('0515', na=False)].index.values

        #create dataframe with date and depth
        full_data = pd.DataFrame({'date': date, 'depth': depth})

        #initialize columns of max_den and WC_dmax
        full_data['max_den'] = np.nan 
        full_data['WC_dmax'] = np.nan
        full_data['max_ice_vol'] = np.nan
        full_data['den_max_idx'] = np.nan

        #populate with max densities and ice volumes for dates with depth greater than 0
        sorter = np.argsort(date_idx)
        full_date_pd_ids = sorter[np.searchsorted(date_idx, den_idx - 2, sorter=sorter)]
        full_data.loc[full_date_pd_ids, 'max_den'] = [np.array(i[1:]).astype(float).max() for i in data[den_idx].str.slice(5).str.rsplit(',')]
        full_data.loc[full_date_pd_ids, 'max_ice_vol'] = [np.array(i[1:]).astype(float).max() for i in data[ice_vol_idx].str.slice(5).str.rsplit(',')]

        #find water content corresponding to layer of maximum density at each timestep------------------------------------------------------------------
        #find index of max density layer for each timestep
        full_data.loc[full_date_pd_ids, 'den_max_idx'] = [np.array(i[1:]).astype(float).argmax() for i in data[den_idx].str.slice(5).str.rsplit(',')]

        #find dates that have full data (depth greater than 0 aka max density has a value, not na)
        full_dates = full_data[~full_data['den_max_idx'].isna()]

        #get index (integer) of maximum density for dates that have full data
        full_dates['den_max_idx'] = full_dates.den_max_idx.astype(int)

        #calculate water content at index of max density
        WCs = np.array([])
        d = 0
        den_max_ids = full_dates.den_max_idx.reset_index().den_max_idx
        for i in data[wc_idx].str.slice(5).str.rsplit(','):
            WCs = np.append(WCs,float(np.array(i[den_max_ids[d]+1])))
            d = d + 1
        full_dates['WC_dmax'] = WCs

        #populate full_data dataframe with full_dates data
        full_data[~full_data['den_max_idx'].isna()] = full_dates

        #drop den_max_idx since we no longer need it
        #full_data = full_data.drop('den_max_idx')

        return full_data
    
    #supporting function for assigning season to dates
    def assign_season(row):
        year = row.date.year
        if datetime.datetime(year, 7, 15) < row.date < datetime.datetime(year + 1, 7, 15):
            season = year
        else:
            season = year - 1
        return season
    
    #END OF SUPPORTING FUNCTIONS-----------------------------------------------------------------------------------------------------------------------
    # Get data file names
    main_path = '/glade/work/eperkins/SNOWPACK_data/Results/pro_files/GE_alts/'+folder+'/'+model_run+'/' #change to appropriate path
    filenames = sorted(glob.glob(main_path + "*.pro"))
    
    #read data for all files
    results = []
    for filename in filenames:
        results.append(pd.read_csv(filename, sep = '\n')) #read each file into separate pandas dataframe
        
    #extract data for all points
    full_data = list(range(len(filenames)))
    for i in range(len(filenames)):
        full_data[i] = extract_all(results[i])
        
    #assign seasons
    for i in range(len(filenames)):
        full_data[i]['season'] = full_data[i].apply(assign_season, axis = 1)
        full_data[i].to_csv('/glade/work/eperkins/SNOWPACK_data/Results/analysis/GE_alts/csv_files/'+folder+'/'+model_run+'/'+'Point'+str(i+1)+'.csv')
    
    return full_data

In [3]:
%%time

folder = '1950_2014'
full_runs = ['LE1','LE2','LE3','LE4','LE5','LE6','LE7','LE8','LE9','LE10']
for model_run in full_runs:
    full_data = file_convert_raw(folder, model_run)
    #print(full_data[0].head(10))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


CPU times: user 12min 38s, sys: 30.2 s, total: 13min 9s
Wall time: 13min 32s
