In [15]:
import pandas as pd
import datetime
import plotly.express as px
import plotly.graph_objects as go
import statsmodels.api as sm
from loguru import logger
logger.add(sys.stderr, format="{time} {level} {message}", filter="", level="INFO")
logger.debug('Logger started.')

def extract_data(file):

    df = pd.read_csv(file)
    df.loc[:, 'date'] = pd.to_datetime(df['date'])

    return df

def get_time(key):

        return {
            'pre': '00:00',
            'am': '06:00',
            'mid': '12:00',
            'pm': '18:00',
            }.get(key)
            

def build_INT_df(df, category):
    
    df_category = df[(df['category'] == category)]
    df_category.loc[:, 'time'] = df.apply(lambda x: get_time(x['time of day']), axis=1)

    # hours of sleep need to be converted to floating point.
    if category == 'Sleep':             
        df_category.loc[:, 'rating/amount'] = df_category['rating/amount'] + ":0"
        df_category.loc[:, 'rating/amount'] = df_category.apply(lambda x: pd.to_timedelta(x['rating/amount']), axis=1)
        df_category.loc[:, 'rating/amount'] = df_category['rating/amount'] / datetime.timedelta(minutes=1) / 60
        
    # always end up with a float for rating/amount
    df_category.loc[:, 'rating/amount'] = pd.to_numeric(df_category['rating/amount'], downcast='float')
        
    return df_category


class pawprint_data:

    "This is a basic ETL process wrapped up in a class."
    def __init__(self, file):
        self.file = file
        self.STA_df = extract_data(file)
        
        self.INT_symptoms = build_INT_df(self.STA_df, 'Symptom')
        self.INT_energy= build_INT_df(self.STA_df, 'Energy')
        self.INT_sleep = build_INT_df(self.STA_df, 'Sleep')
        self.INT_sleep_quality = build_INT_df(self.STA_df, 'Sleep quality')
        self.INT_mood = build_INT_df(self.STA_df, 'Mood')
        
        self.REP_dates = pd.DataFrame(self.STA_df['date'].unique(), columns=['date'])
        self.REP_symptoms = build_REP_symptoms(self.INT_symptoms)

def build_REP_symptoms(df):
    # REP_symptoms = pd.DataFrame(columns=['date'])
    # REP_symptoms.loc[:, 'date'] = df['date'].unique()
    df['detail'] = df['detail'].str.extract(r'(.*(?=\ \())')
    REP_symptoms_grouped = df.groupby(['date', 'category', 'detail']).agg('mean').reset_index()
    REP_symptoms_summed = REP_symptoms_grouped.groupby(['date', 'category']).agg('sum').reset_index()
        
    return REP_symptoms_summed

def transform_average(data_obj):

    INT_dataframes = {
    'Symptoms': data_obj.INT_symptoms, 
    'Energy': data_obj.INT_energy, 
    'Sleep quality': data_obj.INT_sleep_quality, 
    'Mood': data_obj.INT_mood
    }
    for df in INT_dataframes.values():
        window = 3
        df['average'] = df['rating/amount'].rolling(window).mean().round(1)


data = pawprint_data('../bearable_data.csv')

print(data.REP_symptoms.head(10))

2021-12-01 10:51:52.022 | DEBUG    | __main__:<module>:8 - Logger started.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



        date category  rating/amount
0 2020-07-23  Symptom       8.000000
1 2020-07-24  Symptom       5.500000
2 2020-07-25  Symptom       4.000000
3 2020-07-26  Symptom       4.000000
4 2020-07-27  Symptom       5.000000
5 2020-07-28  Symptom       3.000000
6 2020-07-29  Symptom       5.000000
7 2020-07-30  Symptom       5.000000
8 2020-07-31  Symptom       6.833333
9 2020-08-01  Symptom       5.000000




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [16]:
def REP_graph(data_obj):
    fig = go.Figure()
    dates = data_obj.REP_dates['date']
    symp_trend = px.scatter(x=data.REP_symptoms['date'], y=data.REP_symptoms['rating/amount'], trendline="rolling", trendline_options={'window': 7}, name='trend')
    symp_trend = symp_trend.data[1]

    fig.add_trace(go.Scatter(x=dates, y=data_obj.REP_symptoms['rating/amount'],
                        mode='lines',
                        name='Symptoms',
                        line_shape='spline',
                        connectgaps=True))
    fig.add_trace(go.Scatter(symp_trend, name='Symptoms Trend', line={'color' : '#fd7776'}))
    fig.add_trace(go.Scatter(x=dates, y=data_obj.INT_energy['rating/amount'],
                        mode='lines',
                        name='Energy',
                        line_shape='spline',
                        connectgaps=True))
    fig.add_trace(go.Scatter(x=dates, y=data_obj.INT_sleep['rating/amount'],
                        mode='lines',
                        name='Sleep',
                        line_shape='spline',
                        connectgaps=True))
                        

    fig.update_layout(
        width=900,
        height=400,
        autosize=False,
        # margin=dict(t=40, b=10, l=10, r=10),
        template="plotly",
        )

    return fig



In [17]:
fig = REP_graph(data)

fig.show()

In [18]:
dates = data.REP_dates['date']
fig = px.scatter(x=data.REP_symptoms['date'], y=data.REP_symptoms['rating/amount'], trendline="rolling", trendline_options={'window': 7})
fig.show()


In [23]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=dates, y=data.REP_symptoms['rating/amount'],
                mode='lines',
                name='Symptoms',
                line_shape='spline',
                connectgaps=True))
fig.add_trace(go.histogram(x=dates, y=data.REP_symptoms['rating/amount'], histfunc='avg'))  

fig.show()
    

TypeError: 'module' object is not callable