Requires the todoist-python library. This notebook also uses [calmap](https://pythonhosted.org/calmap/), which creates 'github' like calendar images.

In [None]:
#!pip install todoist-python
#!pip install calmap

# Todoist analysis 

Todoist is a webapp todo list. The following notebook provides some analysis of a private Todoist subscription using their api. You will need to add your API key to the 'token' variable in the next section.

In [None]:
from todoist.api import TodoistAPI
import pandas as pd 
from IPython.core.display import display, HTML
import matplotlib.pyplot as plt
import datetime as dt
import numpy as np
import calmap

token = 'ADD TODOIST TOKEN HERE'

api = TodoistAPI(token)
api.sync()

## Raw Data

Data is pulled into three dataframes - one of which is used later in this notebook.

df_projects: all existing projects
df_tasks: all current tasks
df_activities: all activities (current and historic)

"Activities" are the key to the analysis - this shows when items have been added, updated, completed (it also tracks undo), and deleted.

### Projects.
First, let's pull in the projects

In [3]:

projects = [project.data for project in api.state['projects']] 
df_projects = pd.DataFrame(projects)

df_projects.head()

### Existing tasks

Then let's pull in existing tasks.

This dataframe is not used later in this notebook, but is provided for an example of what data exists. 

In [None]:
# Same as above
tasks = [task.data for task in api.state['items']]

df_tasks = pd.DataFrame(tasks)

df_tasks.head()

In [None]:
df_tasks['due_date'] = ''
df_tasks['recurring'] = ''

# Convert Date strings (in UTC by default) to datetime and format it 
df_tasks['date_added'] = pd.to_datetime(
    (pd.to_datetime(df_tasks['date_added'], utc=True)
     .dt.tz_convert('US/Eastern') # my current timezone
     .dt.strftime("%Y-%m-%d %H:%M:%S"))) # easier to handle format

df_tasks['due'].fillna(0, inplace=True)


for index, row in df_tasks.iterrows():
    a = row['due']

    try:

        df_tasks.at[index,'due_date'] = a.get('date')
        df_tasks.at[index,'recurring'] = a.get('is_recurring')
    except:
        pass
        

df_tasks['due_date'] = pd.to_datetime(
    (pd.to_datetime(df_tasks['due_date'], utc=True)
     .dt.tz_convert('US/Eastern')
     .dt.strftime("%Y-%m-%d %H:%M:%S")))

df_tasks['date_completed'] = pd.to_datetime(
    (pd.to_datetime(df_tasks['date_completed'], utc=True)
     .dt.tz_convert('US/Eastern')
     .dt.strftime("%Y-%m-%d %H:%M:%S")))

map_project = dict(df_projects[['id', 'name']].values) 
df_tasks['project_name'] = df_tasks.project_id.map(map_project)

### All activities.

This puts all of the activities into a single dataframe.

The API has a limit per pull of around 100 tasks, but it is possible to loop through multiple 'pages' of tasks to download the entire set.


In [None]:
# The API limits 100 activities to be retrieved per call, so a loop is needed

# Items are retrieved in descending order by date.
# offset indicates how many items should be skipped
activity_list = []
page = 1

df_activities = pd.DataFrame()

while True:
    activities = api.activity.get(limit = 100, page=page)
    if not activities['events']:
        break
    for activity in activities['events']: 
        activity.update(activity['extra_data'])
        del activity['extra_data']
        
    df_temp = pd.DataFrame.from_dict(activities['events'])
    df_activities = df_activities.append(df_temp)
    page += 1


In [None]:

df_activity = pd.DataFrame()

df_activity = df_activities

# Convert Date strings (in UTC by default) to datetime and format it 
df_activity['due_date'] = pd.to_datetime(
    (pd.to_datetime(df_activity['due_date'], utc=True)
     .dt.tz_convert('US/Eastern')
     .dt.strftime("%Y-%m-%d %H:%M:%S")))

df_activity['event_date'] = pd.to_datetime(
    (pd.to_datetime(df_activity['event_date'], utc=True)
     .dt.tz_convert('US/Eastern')
     .dt.strftime("%Y-%m-%d %H:%M:%S")))

df_activity['last_due_date'] = pd.to_datetime(
    (pd.to_datetime(df_activity['last_due_date'], utc=True)
     .dt.tz_convert('US/Eastern')
     .dt.strftime("%Y-%m-%d %H:%M:%S")))

# Set DataFrame index as the EVENT_DATE (will make it easier to plot later)
df_activity = df_activity.set_index('event_date')

# Add project name to DataFrame, using the mapper from before
df_activity['project_name'] = df_activity.parent_project_id.map(map_project)

df_activity.head(10)

## Analysis

Now everything is in a single dataframe, let's some overall stats.

In [None]:
# Get DAILY AVERAGE of each event type
df_daily_event_avgs = (df_activity.groupby([df_activity.index,'event_type']).
                 size()
                 .unstack()
                 .resample('D')
                 .sum()
                 .mean()
                )

# Get WEEKLY AVERAGE of each event type
df_weekly_event_avgs = (df_activity.groupby([df_activity.index,'event_type']).
                 size()
                 .unstack()
                 .resample('W')
                 .sum()
                 .mean()
                )

# Get SUM of each event type
df_event_sums = df_activity.groupby('event_type').size()

#--------------------------------------------------
# Profile info
premium = api.state['user']['premium_until']
karma = api.state['user']['karma']
daily_goal = api.state['user']['daily_goal']
weekly_goal = api.state['user']['weekly_goal']

# Dates
start_date = df_activity.index[-1]
duration = str(df_activity.index[0] - df_activity.index[-1])

# Averages
daily_avg_adds = df_daily_event_avgs['added']
daily_avg_completes = df_daily_event_avgs['completed']
daily_avg_updates = df_daily_event_avgs['updated']

weekly_avg_adds = df_weekly_event_avgs['added']
weekly_avg_completes = df_weekly_event_avgs['completed']
weekly_avg_updates = df_weekly_event_avgs['updated']

# Sums
sum_adds = df_event_sums['added']
sum_completes = df_event_sums['completed']
sum_updates = df_event_sums['updated']
sum_deleted = df_event_sums['deleted']         

display(HTML('''<h1>My Todoist Stats</h1>
<br>

Started using at............................: {:%B %d, %Y} <br>
Used so far..................................: {}<br>
Premium until...............................: {}<br>
Karma points................................: {:.0f}<br>



Daily tasks goal............................: {}<br>
Weekly tasks goal...........................: {}<br>


<br>_____________________________________________________<br><br>

Total added tasks...........................: {:.0f}<br>
Total completed tasks.......................: {:.0f}<br>
Total updated (re-scheduled) tasks..........: {:.0f}<br>
Total deleted ..............................: {:.0f}<br>
<br>_____________________________________________________<br><br>

Average tasks added per day.................: {:.1f}<br>
Average tasks completed per day.............: {:.1f}<br>
Average tasks updated (re-scheduled) per day: {:.1f}<br>
Average tasks deleted per day...............: {:.1f}<br>
<br>_____________________________________________________<br><br>
Average tasks added per week.................: {:.1f}<br>
Average tasks completed per week.............: {:.1f}<br>
Average tasks updated (re-scheduled) per week: {:.1f}<br>
Average tasks deleted per week: {:.1f}<br>
<br>           
             
             '''.format(start_date,duration,premium,karma,daily_goal,weekly_goal,sum_adds,sum_completes,sum_updates,sum_deleted,daily_avg_adds,daily_avg_completes,daily_avg_updates,
                      df_daily_event_avgs['deleted'],weekly_avg_adds,weekly_avg_completes,weekly_avg_updates,df_weekly_event_avgs['deleted'])))


### Daily activity chart

The following is a daily activity chart including the daily average and the goal. 

Change the year or month in line 11 to see different time periods, or remove '.loc' completely to see the entire dataset.

In [None]:

# Create DF of events per day
df_event_by_day = (df_activity.groupby([df_activity.index,'event_type'])
                   .size()
                   .unstack()
                   .resample('D')
                   .sum())

# Plot completed tasks - change year or month under 'loc' e.g. '2019' or '2019-01'
daily_activities = (df_event_by_day[['completed']].loc['2021-01']
                     .plot(figsize=(15,8),
                           lw=3
                  ))



daily_activities.set_title('Completed tasks over days', fontsize=20)

# Add horizontal line with Average completed tasks
daily_activities.axhline(daily_avg_completes, linestyle='--', color='g', label='daily average')
daily_activities.axhline(daily_goal, linestyle=':', color='y', label='goal')
daily_activities.legend(fontsize=12)

### Annual activity by day

This section creates some 'github'-like charts based on the amount of activity

In [None]:
fig, ax = plt.subplots(8,1,sharex=True)

calmap.yearplot(df_event_by_day['added'],year=2020,cmap='Reds', fillcolor='aqua', ax=ax[0])
calmap.yearplot(df_event_by_day['added'],year=2021,cmap='YlGn', ax=ax[1],fillcolor='grey')

calmap.yearplot(df_event_by_day['completed'], year=2020, ax=ax[2],fillcolor='grey')
calmap.yearplot(df_event_by_day['completed'], year=2021,cmap='YlGn', ax=ax[3],fillcolor='grey')

calmap.yearplot(df_event_by_day['updated'],year=2020,cmap='Reds', ax=ax[4])
calmap.yearplot(df_event_by_day['updated'],year=2021,cmap='YlGn', ax=ax[5])


calmap.yearplot(df_event_by_day['deleted'],year=2020,cmap='Reds', ax=ax[6])
calmap.yearplot(df_event_by_day['deleted'],year=2021,cmap='YlGn', ax=ax[7])

ax[0].title.set_text('2020 Day Added')
ax[1].title.set_text('2021 Day Added')
ax[2].title.set_text('2020 Day Completed')
ax[3].title.set_text('2021 Day Complated')
ax[4].title.set_text('2020 Day Updated')
ax[5].title.set_text('2021 Day Updated')
ax[6].title.set_text('2020 Day Deleted')
ax[7].title.set_text('2021 Day Deleted')

fig.set_size_inches(70, 25, forward=True)

### Monthly activity by project name

In [None]:
df = df_activity.loc['2021-01']
df = df[df['event_type']=='completed']

df_event_by_project_by_day = (df
                              .groupby([df.index,'project_name'])
                              .size()
                              .unstack()
                              .resample('D')
                              .sum())
                            


ax = df_event_by_project_by_day.plot.bar(stacked=True,figsize=(20,8))

## Total activity by project

In [None]:
df_event_by_project = df_activity.loc['2020'].groupby(['project_name','event_type']).size().unstack()

project_counts = (df_event_by_project[['added','completed','updated']]
                     .plot(title='Task activities per project', 
                           figsize=(12,8), 
                           kind='barh',
                           fontsize=12, 
                           width=.7)
                  )

project_counts.set_ylabel('Project Name')
project_counts.set_xlabel('Tasks')

### Activity type by day of the week

In [None]:
df_event_by_weekday_2020 = df_activity.loc['2020'].groupby([df_activity.loc['2020'].index.dayofweek,'event_type']).size().unstack()

df_event_by_weekday_2021 = df_activity.loc['2021'].groupby([df_activity.loc['2021'].index.dayofweek,'event_type']).size().unstack()



weekday_activities_2020 = (df_event_by_weekday_2020[['added','completed','updated']]
                     .plot(figsize=(13,8),
                           lw=3, 
                           marker='.', 
                           markersize=12,
                           grid=True
                  ))

weekday_activities_2020.set_xlabel('Weekday')
weekday_activities_2020.set_ylabel('Tasks')
weekday_activities_2020.set_xticklabels(
    [0,'Sun','Mon','Tue','Wed','Thu','Fri','Sat']) # 0 is a workaround

weekday_activities_2020.legend(fontsize=12)
weekday_activities_2020.set_title('Activities vs Day of Week 2020', fontsize=20)

weekday_activities = (df_event_by_weekday_2021[['added','completed','updated']]
                     .plot(figsize=(13,8),
                           lw=3, 
                           marker='.', 
                           markersize=12,
                           grid=True
                  ))

weekday_activities.set_xlabel('Weekday')
weekday_activities.set_ylabel('Tasks')
weekday_activities.set_xticklabels(
    [0,'Sun','Mon','Tue','Wed','Thu','Fri','Sat']) # 0 is a workaround

weekday_activities.legend(fontsize=12)
weekday_activities.set_title('Activities vs Day of Week 2021', fontsize=20)

### Project activity by day of the week 

Add additional project names in line 8

In [None]:
'''
Day when added

'''

df_event_by_weekday = df_activity.groupby([df_activity.index.dayofweek,'project_name']).size().unstack()

weekday_activities = (df_event_by_weekday[['Personal','Work','Inbox']]
                     .plot(figsize=(13,8),
                           lw=3, 
                           marker='.', 
                           markersize=12,
                           grid=True
                  ))

weekday_activities.set_xlabel('Weekday')
weekday_activities.set_ylabel('Tasks')
weekday_activities.set_xticklabels(
    [0,'Sun','Mon','Tue','Wed','Thu','Fri','Sat']) # 0 is a workaround

weekday_activities.legend(fontsize=12)
weekday_activities.set_title('Activities vs Day of Week', fontsize=20)

### Daily Tasks

Adjust the time period in line 1 - change loc to '2020' or '2020-01'

In [None]:
df_dailies = df_activity.loc['2021']
df_dailies = df_dailies[df_dailies['event_type']=='completed']
df_dailies = df_dailies[(df_dailies['content']=='task1') ## add the name of your daily tasks here.
                        | (df_dailies['content']=='task2')  
                        | (df_dailies['content']=='task3')]

df_event_by_project_by_day = (df_dailies
                              .groupby([df_dailies.index,'content'])
                              .size()
                              .unstack()
                              .resample('D')
                              .sum())
                            


ax = df_event_by_project_by_day.plot.bar(stacked=True,figsize=(20,8))
