In [1]:
import datetime as dt
import pandas as pd
import numpy as np
import altair as alt
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

In [2]:
files = [
    "/data2/elilouis/sublimationofsnow/thermistor_harp/202302032000.log",
    "/data2/elilouis/sublimationofsnow/thermistor_harp/202302042000.log",
    "/data2/elilouis/sublimationofsnow/thermistor_harp/screen20230306184500.log",
    "/data2/elilouis/sublimationofsnow/thermistor_harp/screen20230307184400.log",
]
start_times = []

Parse time info from file name

In [3]:
# start_datetime = filename
for filename in files:
    date_string = filename.split('/')[-1].split('.log')[0].removeprefix("screen")
    time = dt.datetime.strptime(
        date_string,
        '%Y%m%d%H%M%S'
    )
    print(f"Parsing: {date_string}...\t into...\t {time}")
    start_times.append(time)

Parsing: 202302032000...	 into...	 2023-02-03 20:00:00
Parsing: 202302042000...	 into...	 2023-02-04 20:00:00
Parsing: 20230306184500...	 into...	 2023-03-06 18:45:00
Parsing: 20230307184400...	 into...	 2023-03-07 18:44:00


For each file...
- open data
- clean text
- read the two faces of sensors separately
- smush them together

In [4]:
combined_df = pd.DataFrame()

for filename, start_time in zip(files, start_times):
    with open(filename) as f:
        # open data
        lines = f.readlines()
        
        # clean text
        cleaned_lines = [f for f in lines if f.startswith('TS302') or f.startswith('TS301')]
        delimited_lines = [line.split() for line in cleaned_lines]
        temp_only_lines = [[token for token in line if token.startswith('TS') or '.' in token] for line in delimited_lines]
        df = pd.DataFrame(temp_only_lines).drop(columns=5)

        # read the two faces of sensors separately
        low_df = df[df[0]=='TS302']
        low_df.loc[:, 'time_seconds'] = 5*np.arange(0, len(low_df))
        low_df = low_df.rename(columns={1:5, 2:15, 3:25, 4:35})
        low_df = low_df.drop(columns=0)
        low_df['side'] = 'low'

        high_df = df[df[0]=='TS301']
        high_df.loc[:, 'time_seconds'] = 5*np.arange(0, len(high_df))
        high_df = high_df.rename(columns={1:10, 2:20, 3:30, 4:40})
        high_df = high_df.drop(columns=0)
        high_df['side'] = 'high'

        # smush them together
        df = pd.concat([
            low_df.melt(id_vars=['time_seconds', 'side']),
            high_df.melt(id_vars=['time_seconds', 'side'])
        ]).sort_values(['time_seconds', 'variable'])
        df['height'] = df['variable'] - 5
        df = df.drop(columns='variable')
        df = df.rename(columns={'value': 'temperature'})
        df['time'] = df['time_seconds'].apply(lambda x: start_time + dt.timedelta(seconds = x))

        combined_df = pd.concat([combined_df, df])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  low_df.loc[:, 'time_seconds'] = 5*np.arange(0, len(low_df))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  high_df.loc[:, 'time_seconds'] = 5*np.arange(0, len(high_df))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  low_df.loc[:, 'time_seconds'] = 5*np.arange(0, len(low_df))
A value is trying to be

In [5]:
combined_df.time.dt.date.unique()

array([datetime.date(2023, 2, 3), datetime.date(2023, 2, 4),
       datetime.date(2023, 3, 6), datetime.date(2023, 3, 7)], dtype=object)

In [15]:
src = combined_df.copy(deep=True)
src['date'] = src.time.dt.date.apply(str)

alt.Chart(src).transform_window(
    rolling_mean='mean(temperature)',
    frame=[1,1],
    groupby=['height']
).mark_line().encode(
    alt.X('time:T'),
    alt.Y('rolling_mean:Q', scale=alt.Scale(zero=False), title="Temperature (˚C)"),
    alt.Color('height:O', scale = alt.Scale(scheme='viridis'), title=['Height above', 'snow (mm)']),
    alt.Facet("date:O", columns=2)
).properties(
    width=300, 
    height=150, 
    title = src['date'].iloc[0]
).resolve_scale(
    x='independent'
)

In [25]:
src.groupby("date").time.min()

date
2023-02-03   2023-02-03 20:00:00
2023-02-04   2023-02-04 20:00:00
2023-03-06   2023-03-06 18:45:00
2023-03-07   2023-03-07 18:44:00
Name: time, dtype: datetime64[ns]

In [26]:
src.groupby("date").time.max()

date
2023-02-03   2023-02-03 20:00:50
2023-02-04   2023-02-04 20:03:30
2023-03-06   2023-03-06 20:40:55
2023-03-07   2023-03-07 19:44:25
Name: time, dtype: datetime64[ns]

In [21]:
combined_df.to_csv("thermistor_harp.csv", index=False)

In [22]:
ls -lah | grep "thermistor_harp.csv"

-rw-rw-r--. 1 elilouis elilouis 718K Sep 18 10:30 thermistor_harp.csv
