### How it works:

Here the data is imported from the csv and analytics files into pandas dataframes so they can be plotted in another notebook. The dataframes are saved using ```pandas.DataFrame.to_pickle('file_name.pkl')```. They can then be loaded into the plotting notebook using ```pandas.DataFrame.read_pickle('file_name.pkl')```. 

In [5]:
import pandas as pd
import numpy as np

import csv
import os

In [6]:
species_list = ['be-', 'bO', 'bO2', 'bO2-', 'bO2*', 'bO2+', 'bO3', 'bO3-', 'bO3*', 'bO3+', 'bO-', 'bO*', 'bO+', \
                'e-', 'G0', 'G-', 'ge-', \
                'gO', 'gO2', 'gO2-', 'gO2*', 'gO2+', 'gO3', 'gO3-', 'gO3*', 'gO3+', 'gO-', 'gO*', 'gO+', \
                'O', 'O2', 'O2-', 'O2+', 'O3', 'O3-', 'O3+', 'O-', 'O+', \
                'total_ice_O', 'total_ice_O2', 'total_ice_O3']
bulk_list = ['be-', 'bO', 'bO2', 'bO2-', 'bO2*', 'bO2+', 'bO3', 'bO3-', 'bO3*', 'bO3+', 'bO-', 'bO*', 'bO+']
bulk_list_woions = ['bO','bO*', 'bO2', 'bO2*', 'bO3', 'bO3*']
all_list_woions = ['bO', 'bO2', 'bO3', 'gO', 'gO2', 'gO3']
bulk_list_2 = ['be-', 'bO', 'bO2', 'bO2-', 'bO2+', 'bO3', 'bO3-', 'bO3+', 'bO-', 'bO+'] #The suprathermal species disappeared from my folder
bulk_ion_list = ['be-', 'bO2-', 'bO2+', 'bO3-', 'bO3+', 'bO-', 'bO+']
ion_list = ['bO+', 'bO2+', 'bO3+'] # for testing code

## Define filepath

This is the filepath from the current directory to the directory where your data is stored. This directory should contain the subdirectories: csv, analytics, pickle_dataframes

In [7]:
version = 'w_ions/22_02_07_fit'

## Importing csv files

Imports csv files for all species in ``wk_list`` into a pandas dataframe. (Also does a small amount of data analysis)

In [8]:
## For new code outputs

# Change to the list you want the data for
wk_lst = bulk_list_2

# Rewrite csv file in order to modify the header, must be done to then read into a dataframe. I used code from  https://stackoverflow.com/questions/16306819/python-edit-csv-headers 
i = 0
while i < len(wk_lst) :
    inputFileName = version + "/csv/" + wk_lst[i] + ".csv"
    outputFileName = os.path.splitext(inputFileName)[0] + "_modified.csv"
    
    with open(inputFileName, newline='') as inFile, open(outputFileName, 'w', newline='') as outfile:
        r = csv.reader(inFile)
        w = csv.writer(outfile)
        
        next(r, None)  # skip the first row from the reader, the old header
        # write new header
        w.writerow(['Fluence', wk_lst[i]])
        
        # copy the rest
        for row in r:
            w.writerow(row)
        
    i += 1

# read and merge data
i=0

while i < len(wk_lst) :
    csv = pd.read_csv(version + "/csv/" + wk_lst[i] + "_modified.csv")
    if i == 0:
        merged_data = csv
    else:
        merged_data = merged_data.merge(csv, on=["Fluence"])
    i += 1
    
# Some data analysis
merged_data['Ion volume density'] = merged_data[bulk_ion_list].sum(axis=1)
merged_data['Total volume density'] = merged_data[bulk_list_2].sum(axis=1)
merged_data['Percent Ion'] = merged_data['Ion volume density']/merged_data['Total volume density']

# Save dataframe
merged_data.to_pickle(version + '/pickle_dataframes/csv_dataframe.pkl')
print(merged_data.head(n=25))

     Fluence           be-            bO       bO2          bO2-  \
0   0.000039  7.982000e-12  7.471000e-14  0.004559  7.432000e-18   
1   0.000048  9.851000e-12  7.471000e-14  0.004559  9.408000e-18   
2   0.000059  1.216000e-11  7.471000e-14  0.004559  3.260000e-17   
3   0.000073  1.500000e-11  7.471000e-14  0.004559  6.252000e-17   
4   0.000090  1.851000e-11  7.471000e-14  0.004559  9.946000e-17   
5   0.000111  2.285000e-11  7.471000e-14  0.004559  1.450000e-16   
6   0.000138  2.819000e-11  7.471000e-14  0.004559  2.013000e-16   
7   0.000170  3.479000e-11  7.471000e-14  0.004559  2.707000e-16   
8   0.000209  4.293000e-11  7.471000e-14  0.004559  3.563000e-16   
9   0.000258  5.298000e-11  7.471000e-14  0.004559  4.621000e-16   
10  0.000319  6.538000e-11  7.471000e-14  0.004559  5.925000e-16   
11  0.000394  8.069000e-11  7.471000e-14  0.004559  7.535000e-16   
12  0.000486  9.957000e-11  7.471000e-14  0.004559  9.854000e-16   
13  0.000599  1.229000e-10  7.471000e-14  0.0045

In [23]:
## For old code outputs
## This is only for the version of the code from the Mullikin paper. 

# import csv
# import os

# # Change to the list you want the data for
# wk_lst = all_list_woions
# #version = 'wo_ions/old_output'

# # Rewrite csv file in order to modify the header, must be done to then read into a dataframe. I used code from  https://stackoverflow.com/questions/16306819/python-edit-csv-headers 
# i = 0
# while i < len(wk_lst) :
#     inputFileName = version + "/csv/" + wk_lst[i] + ".csv"
#     outputFileName = os.path.splitext(inputFileName)[0] + "_modified.csv"
    
#     with open(inputFileName, newline='') as inFile, open(outputFileName, 'w', newline='') as outfile:
#         r = csv.reader(inFile)
#         w = csv.writer(outfile)
        
#         next(r, None)  # skip the first row from the reader, the old header
#         # write new header
#         w.writerow(['Time', wk_lst[i]])
        
#         # copy the rest
#         for row in r:
#             w.writerow(row)
        
#     i += 1

# # read and merge data
# i=0

# while i < len(wk_lst) :
#     csv = pd.read_csv(version + "/csv/" + wk_lst[i] + "_modified.csv")
#     if i == 0:
#         merged_data = csv
#     else:
#         merged_data = merged_data.merge(csv, on=["Time"])
#     i += 1
    
# flux = 2.33e14

# merged_data['Fluence'] = merged_data['Time'] * flux
# merged_data['total_O2'] = merged_data['bO2'] + merged_data['gO2']
# merged_data['total_O3'] = merged_data['bO3'] + merged_data['gO3']
    
# # Save dataframe
# merged_data.to_pickle(version + '/pickle_dataframes/csv_dataframe.pkl')
# print(merged_data.head(n=25))

        Time      bO           bO2      bO3       gO       gO2  gO3  \
0   0.000039  0.1471  4.559000e+09    408.3   0.0817  912000.0  0.0   
1   0.000048  0.1471  4.559000e+09    503.9   0.1008  912000.0  0.0   
2   0.000059  0.1471  4.559000e+09    621.8   0.1244  912000.0  0.0   
3   0.000073  0.1471  4.559000e+09    767.4   0.1535  912000.0  0.0   
4   0.000090  0.1471  4.559000e+09    947.1   0.1895  912000.0  0.0   
5   0.000111  0.1471  4.559000e+09   1169.0   0.2338  912000.0  0.0   
6   0.000138  0.1471  4.559000e+09   1442.0   0.2886  912000.0  0.0   
7   0.000170  0.1471  4.559000e+09   1780.0   0.3561  912000.0  0.0   
8   0.000209  0.1471  4.559000e+09   2197.0   0.4395  912000.0  0.0   
9   0.000258  0.1471  4.559000e+09   2711.0   0.5423  912000.0  0.0   
10  0.000319  0.1471  4.559000e+09   3345.0   0.6692  912000.0  0.0   
11  0.000394  0.1471  4.559000e+09   4128.0   0.8259  912000.0  0.0   
12  0.000486  0.1471  4.559000e+09   5095.0   1.0190  912000.0  0.0   
13  0.

## Importing and formatting analytics files

``ana_list`` and ``num_rxn`` must be updated based on the analytics files. ``num_rxn`` should store the number of reactions in the analytics file for the corresponding species in `ana_list``

In [5]:
# Defining values needed to read the analytics files
# with ions
ana_list = ['be-', 'bO', 'bO-', 'bO+', 'bO2', 'bO2-', 'bO2+', 'bO3', 'bO3-', 'bO3+']
num_rxn = [9, 24, 13, 11, 36, 14, 15, 15, 13, 15]

# #w/o ions
# ana_list = ['bO', 'bO2', 'bO3']
# num_rxn = [19, 24, 8]


# Defining the fixed width columns
flu_col_lbl = ['NA', 'Fluence']
flu_specs = [(0,22), (22,-1)]
column_label = ['Index', 'Rxn', 'R1', 'R2', 'P1', 'P2', 'P3', 'D1', 'D2', 'D3', 'D4', 'D5']
col_specs = [(0, 4), (4, 10), (10, 20), (20, 30), (30, 40), (40, 50), (50,80), (80, 91), (91, 98), (98, 103), (103, 116), (116, -1)]

# A comand that returns true if the row is not a fluence row
def logic1(index):
    if (index+1) % flu_row == 0:
       return False
    return True
# A comand that returns true if the row is a fluence row
def logic2(index):
    if (index+1) % flu_row == 0:
       return True
    return False

i=0
while i < len(ana_list) :
    filename = version + "/analytics/analytics_" + ana_list[i]
    flu_row = num_rxn[i] + 1
    
    # Read only the fluence rows
    temp_df = pd.read_fwf(filename, \
                          skiprows= lambda x: logic1(x), \
                          names=flu_col_lbl, \
                          colspecs=flu_specs \
                         )
    # Duplicate each row by number of reactions
    temp_df_2 = pd.concat([temp_df] * num_rxn[i], ignore_index = True)
    # Sort so all x repeats are sequestial
    flu_df = temp_df_2.sort_values(by=['Fluence'], ignore_index = True)
    
    # Read only the non-fluence rows 
    df_rxn = pd.read_fwf(filename, \
                         skiprows = lambda x: logic2(x), \
                         colspecs=col_specs, \
                         names=column_label \
                        )
    # Merge the dataframes and delete unnecessary columns
    merged_df = flu_df.join(df_rxn)
    del merged_df['NA']
    del merged_df['Index']
    del merged_df['R1']
    del merged_df['R2']
    del merged_df['P1']
    del merged_df['P2']
    del merged_df['P3']
    del merged_df['D1']
    del merged_df['D3']
    del merged_df['D4']
    del merged_df['D5']
    
    j=0
    plot_rxn_list = []
    
    while j < num_rxn[i]:
        # Get reaction number from dataframe
        rxn_num = merged_df['Rxn'].values[j] 
        plot_rxn_list.append(rxn_num)
        # Create temporary df of only one reaction
        temp_df = merged_df[merged_df['Rxn'] == rxn_num]
        # Rename D2 to the reaction number
        temp2_df = temp_df.rename(columns={"D2": rxn_num})
        # Delete rxn column
        del temp2_df['Rxn']
        # Merge temp dataframe into reaction dataframe
        if j == 0: 
            rxn_data = temp2_df
        else:
            rxn_data = rxn_data.merge(temp2_df, on=["Fluence"])
        
        j += 1
        
    #Drop all reactions that contribute less than 5% to the rate at all times
    k=0        
    while k < len(plot_rxn_list):
        if ((abs(rxn_data[plot_rxn_list[k]]) < 5).all()) :
            #print('Reaction', plot_rxn_list[k], 'never contributes more than 5% to the rate.')
            rxn_data.drop(plot_rxn_list[k] , inplace=True, axis=1)
#         else :
#             # Use the else bit if you want only the low contributing reactions
#             rxn_data.drop(plot_rxn_list[k] , inplace=True, axis=1)
        k += 1
        
    #print(plot_rxn_list)
    rxn_data.to_pickle(version + '/pickle_dataframes/' + ana_list[i]+'_rxn_dataframe.pkl')
    print(ana_list[i])
    print(rxn_data.head(n=5))
    i += 1
    
#rxn_data
#print(plot_rxn_list)

be-
    Fluence  177.0  69.0  81.0  61.0  71.0  72.0
0  0.000091  100.0  -0.0  -0.0  -0.0  -0.0  -0.0
1  0.000112  100.0  -0.0  -0.0  -0.0  -0.0  -0.0
2  0.000138  100.0  -0.0  -0.0  -0.0  -0.0  -0.0
3  0.000170  100.0  -0.0  -0.0  -0.0  -0.0  -0.0
4  0.000210  100.0  -0.0  -0.0  -0.0  -0.0  -0.0
bO
    Fluence    156    169  158   64  181
0  0.000091 -66.57  31.44  1.7  0.0  0.0
1  0.000112 -66.57  31.44  1.7  0.0  0.0
2  0.000138 -66.57  31.44  1.7  0.0  0.0
3  0.000170 -66.57  31.44  1.7  0.0  0.0
4  0.000210 -66.57  31.44  1.7  0.0  0.0
bO-
    Fluence    171   70  183   82   62   63   73   74   85
0  0.000091  100.0 -0.0  0.0 -0.0 -0.0 -0.0 -0.0 -0.0 -0.0
1  0.000112  100.0 -0.0  0.0 -0.0 -0.0 -0.0 -0.0 -0.0 -0.0
2  0.000138  100.0 -0.0  0.0 -0.0 -0.0 -0.0 -0.0 -0.0 -0.0
3  0.000170  100.0 -0.0  0.0 -0.0 -0.0 -0.0 -0.0 -0.0 -0.0
4  0.000210  100.0 -0.0  0.0 -0.0 -0.0 -0.0 -0.0 -0.0 -0.0
bO+
    Fluence    171   64  179   62   63   67   68   65   66
0  0.000091  100.0 -0.0  0.0 -0.