# Year in Pixels Visualizations

In [1]:
# imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.colors import LinearSegmentedColormap, to_rgba, Normalize
import seaborn as sns
import json
import datetime

In [2]:
# load externally defined environment variables
from dotenv import dotenv_values
env_vars = dotenv_values()

In [3]:
# load color palette from environment variables
palette_list = [env_vars["COLOR_PALETTE_{}".format(i)] for i in range(1,6)]
palette_list

['ff2700', '88220d', '151518', '213788', '00bfff']

In [None]:
# data extraction into Pandas DataFrame
year = env_vars['YEAR']

with open('./data/data_{}.json'.format(year)) as fp:
    data = json.load(fp)
data = pd.DataFrame(data)
data = data.drop(columns=['type'])

In [5]:
# data cleaning
data['scores'] = data['scores'].apply(lambda x : int(x[0])) # convert scores from lists to integers
data['date'] = data['date'].apply(lambda x : datetime.datetime.strptime(x, '%Y-%m-%d')) # convert dates from strings to datetime objects

# make a column for each tag
tags = {}
for index, row in data.iterrows():
    row_data = {}
    tags_list = row['tags']
    for tag in tags_list:
        if tag['type'] not in row_data:
            row_data = {**row_data, tag['type'] : tag['entries']}
        else:
            row_data[tag['type']].extend(tag['entries'])
    tags = {**tags, index : row_data}

tags = pd.DataFrame(tags).T

data = pd.concat([data, tags], axis=1).drop(columns=['tags'])

data['Productivity Rating'] = data['Productivity Rating'].apply(lambda x : int(x[0]) if isinstance(x, list) else 0) # convert productivity ratings from list to int

# Filter for this year's and last year's data
def check_year(year_to_check : datetime.date, reference : int):
    return True if year_to_check.year == reference else False

data_all = data

mask_previous_year = data_all['date'].apply(lambda x : check_year(x, int(year)-1))
data_previous_year = data_all[mask_previous_year]

mask_current_year = data_all['date'].apply(lambda x : check_year(x, int(year)))
data_current_year = data_all[mask_current_year]

# Pixels (Github-like viz)

In [6]:
# Pixels viz function (each row is one week, squares are a little spaced out and rounded, month labels are visible)
def generate_pixels_heatmap(df : pd.DataFrame, palette : list[str], output_file : str):
    """
    Generates a GitHub-style heatmap chart.

    Parameters:
        df (pd.DataFrame): DataFrame with 'date' (datetime) and 'scores' columns.
        palette (list): List of hexadecimal color strings defining the color palette.
        output_file (str): Path to save the resulting PNG image.

    Returns:
        None
    """
    # Ensure 'date' column is datetime
    df['date'] = pd.to_datetime(df['date'])

    # Create a full year of dates for the current year
    year = df['date'].dt.year.min()
    start_date = pd.Timestamp(f'{year}-01-01')
    end_date = pd.Timestamp(f'{year}-12-31')
    all_dates = pd.date_range(start=start_date, end=end_date, freq='D')

    # Create a DataFrame for all days of the year
    all_days = pd.DataFrame({'date': all_dates})

    # Merge with the input DataFrame to align data
    df = all_days.merge(df, on='date', how='left').fillna({'rating': 0})

    # Add week and day of the week for plotting
    df['week'] = df['date'].dt.isocalendar().week
    df['day_of_week'] = df['date'].dt.weekday

    # Handle the edge case where week 53 exists in the data
    if df['week'].max() == 53:
        df.loc[df['week'] == 53, 'week'] = 52

    # Ensure unique entries for pivot table by averaging ratings for duplicate days
    df = df.groupby(['week', 'day_of_week'], as_index=False)['scores'].mean()

    # Create a pivot table for heatmap data
    heatmap_data = df.pivot(index='day_of_week', columns='week', values='scores')

    # Convert hexadecimal palette to RGBA
    rgba_palette = [to_rgba('#'+color) for color in palette]

    # Create a custom colormap
    cmap = LinearSegmentedColormap.from_list("custom_palette", rgba_palette, N=256)

    # Plot the heatmap
    plt.figure(figsize=(12, 8))
    sns.heatmap(
        heatmap_data,
        cmap=cmap,
        linewidths=0.5,
        linecolor='black',
        cbar=False,
        square=True
    )

    # TODO: rounded squares, padding among squares
    # TODO: xticks are months, yticks are weekdays
    # TODO: add possibility to rotate 90° clockwise

    # Style the plot to look like GitHub
    plt.gca().invert_yaxis()
    plt.axis('off')

    # Save the heatmap to a PNG file with a transparent background
    plt.savefig(output_file, dpi=600, bbox_inches='tight', pad_inches=0, transparent=True)
    plt.close()

In [7]:
# Execution (2024)
generate_pixels_heatmap(data_current_year.copy(), palette_list, 'pixels_grid.png')

# Average Rating YoY

In [8]:
# Avg rating viz function, YoY increments if existing
def plot_average_rating(data : pd.DataFrame, palette_list : list[str], output_file : str):
    """
    Plot a colorbar representing the ratings range and draw the average rating on it.

    Parameters:
        data (pd.DataFrame): DataFrame with 'scores' column.
        palette_list (list): List of hexadecimal color strings defining the color palette.
        output_file (str): Path to save the resulting PNG image.

    Returns:
        None.
    """
    # Define the ratings range
    ratings_range = (0, 5)

    # Calculate the average rating
    average_rating = data['scores'].mean()

    # Create a LinearSegmentedColormap from the palette
    rgba_palette = [to_rgba('#'+color) for color in palette_list]
    cmap = LinearSegmentedColormap.from_list("custom_palette", rgba_palette, N=256)

    # Create a figure and axis
    fig, ax = plt.subplots(figsize=(10, 1))

    range_x_axis = np.linspace(ratings_range[0], ratings_range[1], len(palette_list)*256)

    # Draw the colorbar
    norm = Normalize(vmin=ratings_range[0], vmax=ratings_range[1])
    thinness = 20
    aspect = 1/thinness
    cb = ax.imshow(range_x_axis.reshape((1,len(palette_list)*256)), cmap=cmap, aspect=aspect, norm=norm, extent=[ratings_range[0], ratings_range[1],0, thinness])
    ax.set_yticks([])
    ax.set_xticks([])

    # TODO: Draw the rating milestones
    # milestones = range_x_axis
    # for milestone in milestones:
    #     ax.scatter(milestone * 256 / ratings_range[1], 0.5, color=cmap(norm(milestone)), s=800, edgecolors='black', zorder=20)

    # Draw the average rating indicator
    ax.axvline(average_rating, color='white', linewidth=5, zorder=10)
    ax.text(ratings_range[0] - 0.1, 0.5, f'{average_rating:.2f}', color='white', ha='right', va='bottom', fontsize=48, fontweight='bold', fontname='Helvetica')
    fig.set_facecolor('black')

    plt.savefig(output_file, dpi=600, bbox_inches='tight', pad_inches=0, transparent=True)
    plt.close()

In [9]:
# Show the plot
plot_average_rating(data_current_year.copy(), palette_list, 'avg_rating_curr_year.png')

In [43]:
# Line chart of avg rating per year
def plot_avg_ratings_per_year(data_all : pd.DataFrame, label : str, palette_list : list[str], output_file : str):
    """
    Plot a line chart with value averages per year.

    Parameters:
        data_all (pd.DataFrame): DataFrame containing timed ratings.
        label (str): The column of ratings to be averaged.
        palette_list (list): List of hexadecimal color strings defining the color palette.
        output_file (str): Path to save the resulting PNG image.

    Returns:
        None.
    """

    years = list(range(
        data_all.loc[0,['date']].to_numpy()[0].year,
        data_all.loc[data_all.shape[0]-1, ['date']].to_numpy()[0].year+1,
        1))

    avgs = []
    for year in years:
        filter = data_all['date'].apply(lambda x : check_year(x, year))
        data_filtered = data_all[filter]
        avgs.append(data_filtered[label].mean())

    fig, ax = plt.subplots(figsize=(5,4))

    for data_point in range(len(avgs)):
        ax.annotate(f'{avgs[data_point]:.2f}', (years[data_point]+0.1,avgs[data_point]), color='white', fontname='Helvetica', fontsize=15, fontweight='bold')
        if data_point > 0 :
            yoy_diff = avgs[data_point] - avgs[data_point - 1]
            sign = '-' if yoy_diff < 0 else '+'
            color = '#'+palette_list[0] if yoy_diff < 0 else '#'+palette_list[-1]
            lateral_offset = years[data_point - 1] + 0.6
            vertical_offset = (avgs[data_point] + avgs[data_point - 1]) / 2.0
            ax.annotate(f'{sign}{yoy_diff:.2f}', (lateral_offset, vertical_offset), color=color, fontname='Helvetica', fontsize=12, fontweight='bold')
        ax.axvline(years[data_point], ymin=0.0, ymax=(avgs[data_point]-min(avgs))/(max(avgs)-min(avgs)) - 0.025, color='white', linestyle='--')

    ax.plot(years, avgs, 'o-', color='white', linewidth=3.5, markersize=10)
    ax.set_yticks([])
    ax.set_xticks(years, labels=years, fontname='Helvetica', fontweight='bold', fontsize=17, color='white')
    fig.set_facecolor('black')
    ax.set_facecolor('black')

    ax.set_frame_on(False)

    plt.savefig(output_file, dpi=600, bbox_inches='tight', pad_inches=0, transparent=True)
    plt.close()

In [44]:
# Execution (2024)
plot_avg_ratings_per_year(data_all.copy(), 'scores', palette_list, 'avg_ratings_per_year.png')

# Rating Frequency

In [12]:
# Rating frequency pie chart (with YoY increments if existing) viz function

In [13]:
# Execution (2024)

# Avg Rating per Weekday

In [143]:
# (Normalized) Avg rating/productivity level per weekday/tag viz function, with YoY increments if existing
def plot_avg_ratings_per_weekday(data_all : pd.DataFrame, label : str, palette_list : list[str], output_file : str):
    """
    Plot a bar chart with value averages per weekday.

    Parameters:
        data_all (pd.DataFrame): DataFrame containing timed ratings.
        label (str): The column of ratings to be averaged.
        palette_list (list): List of hexadecimal color strings defining the color palette.
        output_file (str): Path to save the resulting PNG image.

    Returns:
        None.
    """

    assert label in data_all.columns, f"No column named '{label}'"
    # assert data_all[label].dtype not a string and not a list

    curr_year = data_all.loc[data_all.shape[0]-1, ['date']].to_numpy()[0].year

    total_avg = 0

    yearly_avgs = []
    for year in range (curr_year-1, curr_year+1):
        mask_year = data_all['date'].apply(lambda x : check_year(x, year))
        data_year = data_all[mask_year]
        data_year['weekday'] = data_year['date'].apply(lambda x : datetime.date.weekday(x))
        yearly_avgs.append(data_year.groupby('weekday', axis=0)[label].mean().to_numpy())
        if year == curr_year:
            total_avg = float(data_year[label].mean())

    fig, ax = plt.subplots(figsize=(9,4))

    yoy_diffs = [yearly_avgs[-1][i] - yearly_avgs[0][i] for i in range(7)]
    for j in range(7):
        yoy_diffs[j] = '+'+f'{yoy_diffs[j]:.2f}' if yoy_diffs[j] > 0 else f'{yoy_diffs[j]:.2f}'
        if '0.00' in yoy_diffs[j] : yoy_diffs[j] = '='

    # TODO: rounded corners
    container = ax.bar(range(7), yearly_avgs[-1], color='white', width=0.3)
    ax.bar_label(container, labels=[f"{yearly_avgs[-1][i]:.2f}({yoy_diffs[i]})" for i in range(7)], fontname='Helvetica', fontsize=11, color='white', padding=18)
    ax.axhline(total_avg, xmin=0.04, color='#'+palette_list[0], linestyle='--')
    ax.annotate('AVG', (-0.45, total_avg), fontname='Helvetica', fontsize=8, color='#'+palette_list[0], va='center')
    ax.set_yticks([])
    ax.set_xticks(range(7), labels=['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'], fontname='Helvetica', fontweight='bold', fontsize=17, color='white')
    ax.set_ylim(0, 5)
    fig.set_facecolor('black')
    ax.set_facecolor('black')

    ax.set_frame_on(False)

    plt.savefig(output_file, dpi=600, bbox_inches='tight', pad_inches=0, transparent=True)
    plt.close()

In [144]:
# Execution (2024, rating)
plot_avg_ratings_per_weekday(data_all.copy(), 'scores', palette_list, 'avg_ratings_per_weekday.png')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_year['weekday'] = data_year['date'].apply(lambda x : datetime.date.weekday(x))
  yearly_avgs.append(data_year.groupby('weekday', axis=0)[label].mean().to_numpy())
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_year['weekday'] = data_year['date'].apply(lambda x : datetime.date.weekday(x))
  yearly_avgs.append(data_year.groupby('weekday', axis=0)[label].mean().to_numpy())


# Emotion Frequency

In [217]:
# Tag frequency viz function (descending sort by frequency)
def plot_tag_frequency(data_current_year : pd.DataFrame, tag_category : str, palette_list : list[str], output_file : str):
    """"""

    assert tag_category in data_current_year.columns, f"No tag category called {tag_category}"

    # collect all possible tag values
    masked_data = data_current_year[data_current_year[tag_category].notna()] # masked DataFrame
    tag_values = masked_data[tag_category].sum() # list with duplicates
    tag_values = list(dict.fromkeys(tag_values)) # remove duplicates

    # count tag appearances
    tag_counters = {tag : 0 for tag in tag_values}
    for index, row in masked_data.iterrows(): # iterate over masked DataFrame to avoid null values
        tags_of_the_day = list(row[tag_category])
        for tag in tags_of_the_day:
            tag_counters[tag] += 1

    # sort for number of appearances (descending)
    tag_counters = pd.DataFrame(tag_counters.values(), index=tag_counters.keys(), columns=['count'])
    tag_counters.sort_values(by='count', ascending=True, inplace=True) # plotting function will flip the order anyways

    # draw horizontal bar chart
    fig, ax = plt.subplots(figsize=(9,4)) 

    data = tag_counters['count'].to_numpy(dtype=np.int32)

    container = ax.barh(y=range(0,len(tag_counters.index)), data=data, width=data, height=0.5, color='white')
    ax.bar_label(container, data, color='white', fontname='Helvetica', fontsize=9, padding=6)

    ax.set_yticks(range(0,len(tag_counters.index)), labels=tag_counters.index, color='white', fontname='Helvetica', fontsize=11)
    ax.set_xticks([])
    fig.set_facecolor('black')
    ax.set_facecolor('black')

    ax.set_frame_on(False)

    plt.savefig(output_file, dpi=600, bbox_inches='tight', pad_inches=0, transparent=True)
    plt.close()    

In [218]:
# Execution (2024, emotions)
plot_tag_frequency(data_current_year, 'Emotions', palette_list, 'emotion_frequency.png')

# Activity Frequency

In [219]:
# Execution (2024, activities)
plot_tag_frequency(data_current_year, 'Activities', palette_list, 'activity_frequency.png')

# Activity-Emotion Correlation

In [19]:
# Pairwise tag-tag correlation (Pearson) heatmap function

In [20]:
# Execution (2024, activity v. emotion)

# Location-Emotion Correlation

In [21]:
# Execution (2024, location v. emotion)

# Sick Days 🤒

In [22]:
# Number of symptoms + number of pharmaceuticals calendar viz function, ratings in semi-transparency, skull 💀 emoji if bad day + sick day

In [23]:
# Total number of sick (💀) days per year line chart

In [24]:
# Execution (2024)

# Productivity

In [25]:
# Productivity calendar viz function, ratings in semi-transparency

In [146]:
# Average productivity per year line chart
plot_avg_ratings_per_year(data_all.copy(), 'Productivity Rating', palette_list, 'avg_productivity_per_year.png')

In [145]:
# Avg productivity per weekday execution (2024)
plot_avg_ratings_per_weekday(data_all.copy(), 'Productivity Rating', palette_list, 'avg_productivity_per_weekday.png')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_year['weekday'] = data_year['date'].apply(lambda x : datetime.date.weekday(x))
  yearly_avgs.append(data_year.groupby('weekday', axis=0)[label].mean().to_numpy())
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_year['weekday'] = data_year['date'].apply(lambda x : datetime.date.weekday(x))
  yearly_avgs.append(data_year.groupby('weekday', axis=0)[label].mean().to_numpy())


In [28]:
# Execution (2024)

# Weighted Avg Rating per Tag

In [None]:
# (Normalized) avg rating per tag viz function

In [29]:
# (Normalized) Avg rating per productivity level execution (2024)

In [30]:
# (Normalized) Avg rating per location execution (2024)

In [31]:
# (Normalized) Avg rating per emotion execution (2024)

In [32]:
# (Normalized) Avg rating per activity execution (2024)

In [33]:
# (Normalized) Avg rating per medication execution (2024)

In [34]:
# (Normalized) Avg rating per symptom execution (2024)

# Locations Visited 🛫

In [35]:
# 3D Map with visit frequency and average rating per location viz function

In [36]:
# Execution (2024)

# Word Clouds ☁️

In [37]:
# Word cloud per daily notes (filtered by rating) viz function

In [38]:
# Execution (2024, all days)

In [39]:
# Execution (2024, bad days: rating 1,2)

In [40]:
# Execution (2024, good days: rating 4,5)