In [None]:
import json

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
# Set the grey theme
plt.style.use('ggplot')

In [None]:
df = pd.read_csv('../data/raw/activity.csv')

In [None]:
df.head()

In [None]:
#Fix data types
df['Belopp'] = df['Belopp'].str.replace(',', '.').astype(float)
df['Datum'] = pd.to_datetime(df['Datum'])

In [None]:
df

In [None]:
# Group the data by 'Datum' and sum the 'Belopp'
data_to_plot = df.groupby('Datum', as_index=False).agg({'Belopp': 'sum'})

# Set 'Datum' as index for resampling
data_to_plot.set_index('Datum', inplace=True)

# Resample the data to ensure all days are included, filling missing days with 0
data_to_plot = data_to_plot.resample('D').sum().fillna(0)

# Reset index for plotting
data_to_plot.reset_index(inplace=True)
data_to_plot['Month'] = data_to_plot['Datum'].dt.month  # Extract the month
data_to_plot['Day'] = data_to_plot['Datum'].dt.day      # Extract the day
data_to_plot['Month_Day'] = data_to_plot['Datum'].dt.strftime('%m-%d')  # Format as 'MM-DD'

# Plot the cumulative sum over time
plt.figure(figsize=(12, 5))
sns.barplot(data=data_to_plot,x='Month_Day', y='Belopp',color='C1')

plt.xlabel('')
plt.ylabel('Spend (kr)')
plt.title('')
plt.grid(True)
plt.show()

In [None]:
# Function to preprocess data for plotting
def preprocess_data(df):
    # Group by 'Datum' and sum 'Belopp'
    data_to_plot = df.groupby('Datum', as_index=False).agg({'Belopp': 'sum'})
    # Set 'Datum' as index and resample to fill in missing days with 0
    data_to_plot.set_index('Datum', inplace=True)
    data_to_plot = data_to_plot.resample('D').fillna(0)
    # Reset index for plotting
    data_to_plot.reset_index(inplace=True)
    return data_to_plot

# Function to plot bar and line plot on the same figure
def plot_combined(data_to_plot):
    # Create a cumulative sum column
    data_to_plot['Cumulative_Belopp'] = data_to_plot['Belopp'].cumsum()

    # Set the figure size
    plt.figure(figsize=(12, 5))
    # Plot the bar plot for daily 'Belopp'
    sns.lineplot(data=data_to_plot,x='Datum', y='Belopp', color='C0',marker='o')

    # Plot the line plot for cumulative sum
    sns.lineplot(data=data_to_plot, x='Datum', y='Cumulative_Belopp', marker='o', color='C1')

    # Customize labels and grid
    plt.xlabel('')
    plt.ylabel('Spend (kr)')
    plt.title('Daily Spend and Cumulative Spend Over Time')
    plt.grid(True)

    # Show the plot
    plt.show()

# Main code
# Assuming df is the input dataframe
data_to_plot = preprocess_data(df)
plot_combined(data_to_plot)

In [None]:
json_data = df['Beskrivning'].to_json()

# Use json.loads to parse the JSON string, then json.dumps to pretty print it
formatted_json = json.dumps(json.loads(json_data), indent=4, ensure_ascii=False)

# Print the nicely formatted JSON
print(formatted_json)