# C A L I P E R

In [1]:
import pandas as pd
import numpy as np
from pathlib import Path
import plotly.express as px



In [2]:
def find_linear_fit(x, y):
    # Check if the number of points is the same for both x and y
    if len(x) != len(y):
        raise ValueError("Number of points in x and y must be the same.")

    # Convert input lists to numpy arrays
    x = np.array(x)
    y = np.array(y)

    # Perform linear regression
    slope, intercept = np.polyfit(x, y, 1)

    # Print the results
    print(f"Slope (m): {slope}")
    print(f"Intercept (b): {intercept}")

    # Print the linear equation
    print(f"Linear Fit: y = {slope:.2f}x + {intercept:.2f}")
    return slope, intercept

def options_color_scheme(option):
    """
    Return a color scheme based on the specified option.

    Parameters:
    - option (str): The color scheme option. Currently supports 'mariana' or 'grays'.

    Returns:
    List[str]: List of color codes representing the chosen color scheme.
    """
    if option == 'mariana':
        color_scheme = [
            '#990000', '#ff0000', '#cc0000', '#ff3333', '#ff6666',
            '#ffff00', '#ffcc00', '#ff9900', '#ff6600', '#ff4500',
            '#00ff00', '#009900', '#00cc00', '#33cc33', '#66ff66',
            '#313695', '#4575b4', '#599ac8', '#74add1', '#abd9e9',
            '#000000', '#333333', '#666666', '#808080', '#999999', '#cccccc'
        ]
    elif option == 'grays':
        color_scheme = [
            '#f0f0f0', '#e6e6e6', '#d9d9d9', '#cccccc', '#b3b3b3',
            '#999999', '#8c8c8c', '#808080', '#737373', '#666666',
            '#595959', '#4d4d4d', '#404040', '#333333', '#262626',
            '#1a1a1a', '#0d0d0d', '#000000', '#000000', '#000000'
        ]
    else:
        print(f"Unknown color scheme option: {option}. Defaulting to 'mariana'.")
        color_scheme = options_color_scheme('mariana')

    return color_scheme

def plot_scatter_all(df, x_column, y_column, color_scheme=None, color_column=None, symbol_column=None, title=None, name=None, width=800, height=600):
    """
    Generate a scatter plot using Plotly and save it as an HTML file.

    Parameters:
    - df: DataFrame
    - x_column: str, column name for x-axis
    - y_column: str, column name for y-axis
    - color_column: str, column name for color (optional, default=None)
    - title: str, title of the plot (optional, default=None)
    - name: str, name for the DataFrame (used for HTML file name, optional, default=None)
    """
    # Multiply values in column_y by -1
    df[y_column] = df[y_column] * -1

    fig = px.scatter(df, x=x_column, y=y_column, color=color_column, title=title,
                     labels={x_column: x_column.capitalize(), y_column: y_column.capitalize()},
                     color_discrete_sequence=color_scheme,
                     symbol=symbol_column,)

    # Set width and height
    fig.update_layout(width=width, height=height)

    # Customize plot background, y-axis ticks, and marker color
    fig.update_layout(
    plot_bgcolor='white',  # Set plot background color to white
    yaxis=dict(tickvals=list(range(int(df[y_column].min()), int(df[y_column].max())+1)), tickmode='linear'),  # Set y-axis ticks every 1 unit
    )

    
    # Add subtle grid with almost imperceptible mesh
    fig.update_layout(
    xaxis=dict(showgrid=True, gridwidth=0.1, gridcolor='lightgray', zeroline=True, zerolinewidth=2, zerolinecolor='black', range=[0,40]),  # Add x-axis grid and zeroline
    yaxis=dict(showgrid=True, gridwidth=0.1, gridcolor='lightgray', zeroline=True, zerolinewidth=2, zerolinecolor='black', range=[-37, 0]),  # Add y-axis grid and zeroline
    )


    # Save figure as HTML file
    if name is None:
        name = "default_name"
    #figure_name = f"{name}_caliper.html"
    #save_path = os.path.join("/home/m/Documents/andros_data_processing/", figure_name)
    #fig.write_html(save_path)

    # Show the plot in the notebook or script
    fig.show()

# Read data

In [3]:
folder_path = Path(r'C:\Users\Mariana\Documents\freshwater_lens\data\caliper\raw')

In [4]:
columns = [
   'Depth_ft',
   'Caliper_in',
]

In [5]:
# Caliper raw files
files = list(folder_path.glob('*.LAS'))

# List to save dataframes
dataframes = []

# Load files
for file_path in files:
    df = pd.read_csv(file_path, sep='\t', skiprows=34, names=columns)
    df['source_file'] = file_path.name  # to know file origin
    dataframes.append(df)

# Concatenate all dataframes 
concatenate_caliper= pd.concat(dataframes, ignore_index=True)

concatenate_caliper.head()


Unnamed: 0,Depth_ft,Caliper_in,source_file
0,84.8563,2.99235,AW1D_caliper_20210910.LAS
1,84.7566,3.9187,AW1D_caliper_20210910.LAS
2,84.6569,4.99492,AW1D_caliper_20210910.LAS
3,84.5572,5.135,AW1D_caliper_20210910.LAS
4,84.4575,5.14982,AW1D_caliper_20210910.LAS


In [6]:
# Information about the auger with the well was drilled
auger_diameter = pd.read_csv(r'C:\Users\Mariana\Documents\freshwater_lens\data\caliper\caliper_diameter_auger.csv')  


# Merge files 
concatenate_caliper = pd.merge(
    concatenate_caliper,
    auger_diameter[['file', 'Diameter_auger_in']],
    how='left',
    left_on='source_file',
    right_on='file'
)

concatenate_caliper.drop(columns='file', inplace=True)
concatenate_caliper.head()

Unnamed: 0,Depth_ft,Caliper_in,source_file,Diameter_auger_in
0,84.8563,2.99235,AW1D_caliper_20210910.LAS,6
1,84.7566,3.9187,AW1D_caliper_20210910.LAS,6
2,84.6569,4.99492,AW1D_caliper_20210910.LAS,6
3,84.5572,5.135,AW1D_caliper_20210910.LAS,6
4,84.4575,5.14982,AW1D_caliper_20210910.LAS,6


# Calibration and SI units

In [7]:
# Calibration based on Natan (Caliper technician) information by email 

x_points = [1.826, 3.246, 5.602]
y_points = [3.06, 4.5, 7]

slope, intercept = find_linear_fit(x_points, y_points)

Slope (m): 1.0452194222927949
Intercept (b): 1.1344426288155678
Linear Fit: y = 1.05x + 1.13


In [8]:
concatenate_caliper['calibrated_in'] = ( concatenate_caliper['Caliper_in'] * slope ) + intercept
concatenate_caliper['calibrated_cm'] = concatenate_caliper['calibrated_in'] * 2.54
concatenate_caliper['Well_Diameter_Deviation_Percentage'] = (((concatenate_caliper['calibrated_in']- concatenate_caliper['Diameter_auger_in']) / concatenate_caliper['Diameter_auger_in'])) * 100
concatenate_caliper['Depth [m]'] = concatenate_caliper['Depth_ft']* 0.3048

# Plots

In [9]:
fig=px.scatter(concatenate_caliper, y=concatenate_caliper['Depth [m]'],
            x=concatenate_caliper.Well_Diameter_Deviation_Percentage, color=concatenate_caliper['source_file'],
            title="Well_Diameter_Deviation_Percentage",
            color_continuous_scale=px.colors.sequential.Jet,

              ) 

fig.update_yaxes(autorange="reversed")
fig.update_yaxes(range = [0,10])
fig.update_traces(marker={'size': 10})
fig.update_layout(autosize=True, width=1400, height=1800 )

fig.update_yaxes(nticks=40)
fig.show()

In [10]:
color_scheme = options_color_scheme('mariana')

In [11]:
plot_scatter_all(concatenate_caliper,'calibrated_cm','Depth [m]',title = 'caliper_all', color_column = 'source_file', color_scheme=color_scheme, symbol_column='Diameter_auger_in', name='caliper_all_cm', width=800, height=1000)

In [12]:
concatenate_caliper.to_csv('concatenate_caliper_all.csv')