In [52]:
'''
Code for generating table of R^2 values for my writeup

'''

import numpy as np
import pandas as pd
import sys
import plotly.express as px
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
import seaborn as sns
from sklearn.metrics import r2_score
from scipy.optimize import curve_fit
import matplotlib.ticker as ticker
sys.path.append('..\\custom_libraries')

# Custom library to analyze NC rate of deterioration
import rate_analysis

df = pd.read_csv('../../spreadsheet_data/i16WB_tag_replaced.csv')
nc_distance_from_damaged_by_year = {}

In [53]:
################################################################################
# General Purpose Functions
################################################################################

def exponential_function(x, a, b):
        return a * np.exp(-b * x)

def calc_R2(x_data, y_data):
    params, covariance = curve_fit(exponential_function, x_data, y_data)
    y_pred = exponential_function(x_data, *params)
    return r2_score(y_data, y_pred)


In [54]:
# https://stackoverflow.com/questions/1482308/how-to-get-all-subsets-of-a-set-powerset
def powerset(s):
    x = len(s)
    masks = [1 << i for i in range(x)]
    for i in range(1 << x):
        yield [ss for mask, ss in zip(masks, s) if i & mask]
damaged_set = {'L1', 'T1', 'T2', 'L2', 'CC', 'SS', 'R'}
damaged_subsets = list(powerset(damaged_set))


damaged_states = []
r2_list = []
len_list = []
# Generate a list of R^2 values for each possible definition of a "damaged" slab when using the definition to calculate the distance to nearest damaged slab
for subset in damaged_subsets:
    nc_df = rate_analysis.construct_nc_deterioration_rate_table(df, 2014, 2018, 1, set(subset))
    nc_df = nc_df[nc_df['distance'] <= 20]
    x_data = nc_df['distance'].to_numpy().astype(np.float32)
    y_data = nc_df['deterioration_rate'].to_numpy().astype(np.float32)
    r2 = calc_R2(x_data, y_data)
    damaged_states.append(str(subset))
    r2_list.append(r2)
    len_list.append(len(subset))

r2_df = pd.DataFrame({'Damaged States': damaged_states, 'R^2': r2_list, 'len': len_list})
r2_df = r2_df.sort_values(by=['R^2'], ascending=False)
#r2_df.to_csv('r2_table.csv', index=False)




In [55]:
r2_df_1 = r2_df[r2_df['len'] == 1]
print(r2_df_1.to_latex(columns=['Damaged States', 'R^2']))


\begin{tabular}{llr}
\toprule
{} & Damaged States &  R\textasciicircum 2 \\
\midrule
2  &         ['SS'] & 0.44 \\
8  &         ['T2'] & 0.33 \\
16 &          ['R'] & 0.14 \\
64 &         ['CC'] & 0.10 \\
32 &         ['L2'] & 0.09 \\
4  &         ['L1'] & 0.08 \\
1  &         ['T1'] & 0.02 \\
\bottomrule
\end{tabular}



  print(r2_df_1.to_latex(columns=['Damaged States', 'R^2']))


In [56]:

pd.options.display.float_format = '{:.2f}'.format
r2_df = r2_df.reset_index(drop=True)
print(r2_df.to_latex(columns=['Damaged States', 'R^2']))

\begin{tabular}{llr}
\toprule
{} &                             Damaged States &   R\textasciicircum 2 \\
\midrule
0   &                   ['SS', 'L1', 'L2', 'CC'] &  0.63 \\
1   &                   ['T1', 'SS', 'L1', 'CC'] &  0.63 \\
2   &             ['T1', 'SS', 'L1', 'L2', 'CC'] &  0.62 \\
3   &             ['SS', 'L1', 'T2', 'L2', 'CC'] &  0.61 \\
4   &                    ['SS', 'L1', 'R', 'L2'] &  0.61 \\
5   &              ['SS', 'L1', 'R', 'L2', 'CC'] &  0.61 \\
6   &       ['T1', 'SS', 'L1', 'T2', 'L2', 'CC'] &  0.60 \\
7   &              ['T1', 'SS', 'L1', 'R', 'L2'] &  0.60 \\
8   &                    ['T1', 'L1', 'T2', 'R'] &  0.59 \\
9   &              ['SS', 'L1', 'T2', 'R', 'L2'] &  0.59 \\
10  &                   ['SS', 'L1', 'T2', 'L2'] &  0.58 \\
11  &                   ['T1', 'L1', 'T2', 'L2'] &  0.58 \\
12  &             ['T1', 'L1', 'T2', 'L2', 'CC'] &  0.58 \\
13  &        ['T1', 'SS', 'L1', 'R', 'L2', 'CC'] &  0.58 \\
14  &                    ['SS', 'T2', 'R', 'L2

  print(r2_df.to_latex(columns=['Damaged States', 'R^2']))
