<a href="https://colab.research.google.com/github/invisilico/Tutorial-Notebooks/blob/main/DataVizTool.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Welcome to ***your*** Android-Data Visualizer!


---

Made by Nishant Jana 

(Adapted from Dr. Tanya Leise's SRBR Chronoschool 2020 class)



---


Find me at:

Twitter: [@In_Visilico](https://twitter.com/In_Visilico) , Github: [@invisilico](https://github.com/invisilico) , E-mail: nishantjana5@gmail.com

Feel free to DM/raise an issue/E-mail for any help, feedback and suggestions!


---






---

## Helpful tips

Click the [![](https://user-images.githubusercontent.com/68754864/97426576-a8c14200-1939-11eb-9197-15c7be91ab96.png)] button on the left to each cell to run them. 
```
Make sure you have the My Activity.html file from takeout.google.com ready to upload before running the first cell.
```
---



In [None]:
#@title Load your data!
from google.colab import files
import os
import pandas as pd
import numpy as np
import re
import matplotlib.pyplot as plt
import seaborn as sns
from calendar import monthrange
from calendar import isleap
from datetime import datetime, timedelta
from dateutil.parser import parse
import matplotlib as mpl
import matplotlib.ticker as ticker


#@markdown This cell will keep running till you select the file. Click the "Choose Files" tile as it appears to select a file to upload from your system. 
#@markdown 
#@markdown Ensure the file is called *exactly* 'My Activity.html'

#@markdown This cell may take a while to run!


file = files.upload()
#rename the file to use as text file, read lines method. avoids beautiful soup
os.rename(r'My Activity.html',r'actdata.txt')
with open('actdata.txt','r') as file:
  txt = file.readlines()
print("Data loaded from file.")
actdat = txt[32]

#find indices between which appname and timestamp exists
preapp = [app.end(0) for app in re.finditer('<p class="mdl-typography--title">', actdat)] 
postapp = [app.start(0) for app in re.finditer('<br></p></div><div class="content-cell mdl-cell mdl-cell--6-col mdl-typography--body-1">', actdat)]
posttime = [time.start(0) for time in re.finditer('</div><div class="content-cell mdl-cell mdl-cell--6-col mdl-typography--body-1 mdl-typography--text-right">', actdat)]

appname = []
datetime = []
timezone = []

dd = []
mm = []
yy = []
of24h = []
weekday = []

for i in range(len(posttime)):

  appname += [actdat[preapp[i]:postapp[i]]]
  
  # some (stupid) fix that somehow works but I don't remember why
  stamp = actdat[posttime[i]-30:posttime[i]]
  idx = [app.end(0) for app in re.finditer('>', stamp)]
  if len(idx) > 0:
    stamp = stamp[idx[-1]:]

  datetime += [parse(stamp[:-4])]
  dd += [int(datetime[-1].day)]
  mm += [int(datetime[-1].month)]
  yy += [int(datetime[-1].year)]  
  of24h +=   [np.round(int(datetime[-1].hour)+(int(datetime[-1].minute)/60), decimals = 3)]
  timezone += [stamp[-3:]]
  weekday += [int(datetime[-1].weekday())]

appname.reverse()
of24h.reverse()
dd.reverse()
mm.reverse()
yy.reverse()
timezone.reverse()
datetime.reverse()

data = pd.DataFrame(list(zip(appname,of24h,dd,mm,yy,timezone,weekday,datetime)),columns = ['App','of24h','Day','Month','Year','TimeZone','Weekday','TimeStamp'])

data

In [None]:
#@markdown Set the value below to control the DPI of the plots. 

#@markdown larger values offer higher resolution plots but also take longer to generate. 300 is fairly good for HD screens.
plot_dpi = 300 #@param {type:"integer"}
mpl.rcParams['figure.dpi'] = plot_dpi



---

```
hint: if you would like more professional plots, untick xkcd in the cells.
```



In [None]:
#@title Actograms by Month (last 12 months)
#@markdown The month previous to the latest is considered to make sure we only look at complete months

xkcd = True #@param {type:"boolean"}

#functions

def last_12_months(dataframe):

  last_month = data['Month'].iloc[-1] - 1
  last_year = data['Year'].iloc[-1]
  mnamelst = ['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December']

  mlist = []
  mnlist = []
  ylist = []

  for i in range(12):
    # months
    if last_month - i > 0:
      mlist += [last_month - i]
    else:
      mlist += [last_month - i + 12]
    # year
    if last_month - i <= 0:
      ylist += [last_year-1]
    else:
      ylist += [last_year]
    # month names
    mnlist += [mnamelst[last_month - i - 1]]

  mlist.reverse()
  mnlist.reverse()
  ylist.reverse()

  return mlist,mnlist,ylist

def mmprepdata(dataframe, ynum, mnum):
  
    frame = dataframe.query(('Year == '+str(ynum)+' and Month == '+str(mnum)))
    
    days = monthrange(ynum,mnum)[1]
    plotdata = np.empty(days, dtype = object)
    for d in range(days):
      daydata = frame.query(('Day == '+str(d+1)))
      plotdata[-d-1] = daydata.of24h.to_numpy()

    return plotdata

def twelvemonthrast(dataframe, mlist, ylist, mnlist, xkcd):
  
  prepped_data = []

  for i in range(len(mlist)):
    prepped_data += [mmprepdata(dataframe, ylist[i], mlist[i])]

  return prepped_data

#actual calls

mlist,mnlist,ylist = last_12_months(data)

dataforraster = twelvemonthrast(data,mlist,ylist,mnlist,xkcd)

if xkcd == True:
  with plt.xkcd():
    yrrast, plots = plt.subplots(2, 6)
    yrrast.suptitle(('Your last 12 months in Phone App Activity Rasters'))
    plt.gcf().set_size_inches(20,5)
    for i in range(2):
      for j in range(6):
        plots[i,j].eventplot(dataforraster[(i*6)+j], color = '0.2')
        plots[i,j].set_title(( (str(mnlist[(i*6)+j]))+" "+str(ylist[(i*6)+j]) ))
        plots[i,j].set_xlim(-1,25)
        plots[i,j].set_yticks([])
        plots[i,j].xaxis.set_major_locator(ticker.MultipleLocator(6))
        plots[i,j].set_aspect(0.5)
else:

    yrrast, plots = plt.subplots(2, 6)
    yrrast.suptitle(('Your last 12 months in Phone App Activity Rasters'))
    plt.gcf().set_size_inches(20,5)
    for i in range(2):
      for j in range(6):
        plots[i,j].eventplot(dataforraster[(i*6)+j], color = '0.2')
        plots[i,j].set_title(( (str(mnlist[(i*6)+j]))+" "+str(ylist[(i*6)+j]) ))
        plots[i,j].set_xlim(-1,25)
        plots[i,j].set_yticks([])
        plots[i,j].xaxis.set_major_locator(ticker.MultipleLocator(6))
        plots[i,j].set_aspect(0.5)

In [None]:
#@title Heatmaps by month (last 12 months)

xkcd = True #@param {type:"boolean"}

#functions (same as previous)

def last_12_months(dataframe):

  last_month = data['Month'].iloc[-1] - 1
  last_year = data['Year'].iloc[-1]
  mnamelst = ['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December']

  mlist = []
  mnlist = []
  ylist = []

  for i in range(12):
    # months
    if last_month - i > 0:
      mlist += [last_month - i]
    else:
      mlist += [last_month - i + 12]
    # year
    if last_month - i <= 0:
      ylist += [last_year-1]
    else:
      ylist += [last_year]
    # month names
    mnlist += [mnamelst[last_month - i - 1]]

  mlist.reverse()
  mnlist.reverse()
  ylist.reverse()

  return mlist,mnlist,ylist

def mmprepdata(dataframe, ynum, mnum):
  
    frame = dataframe.query(('Year == '+str(ynum)+' and Month == '+str(mnum)))
    
    days = monthrange(ynum,mnum)[1]
    plotdata = np.empty(days, dtype = object)
    for d in range(days):
      daydata = frame.query(('Day == '+str(d+1)))
      plotdata[-d-1] = daydata.of24h.to_numpy()

    return plotdata

def twelvemonthrasterprep(dataframe, mlist, ylist, mnlist, xkcd):
  
  prepped_data = []

  for i in range(len(mlist)):
    prepped_data += [mmprepdata(dataframe, ylist[i], mlist[i])]

  return prepped_data

#functions (new)

def heatmapconversion(dataforraster):

  dataforheatmap = []

  for month in dataforraster:
    heatformonth = np.empty((month.shape[0],24))
    for i in range(month.shape[0]):
      for j in range(24):
        heatformonth[-i-1,j] = np.sum(month[i]<j+1)-np.sum(month[i]<j)
    dataforheatmap += [heatformonth]

  return dataforheatmap

#function calls

mlist,mnlist,ylist = last_12_months(data)

dataforheatmap = heatmapconversion(twelvemonthrasterprep(data,mlist,ylist,mnlist,xkcd))

if xkcd == True:
  with plt.xkcd():
    yrrast, plots = plt.subplots(2, 6)
    yrrast.suptitle(('Your last 12 months in Phone App Activity Heatmaps'))
    plt.gcf().set_size_inches(20,5)
    for i in range(2):
      for j in range(6):
         sns.heatmap(dataforheatmap[(i*6)+j],cmap="Spectral",xticklabels=False,yticklabels=False,ax=plots[i,j])
         plots[i,j].set_title(( (str(mnlist[(i*6)+j]))+" "+str(ylist[(i*6)+j]) ))

else:
    yrrast, plots = plt.subplots(2, 6)
    yrrast.suptitle(('Your last 12 months in Phone App Activity Heatmaps'))
    plt.gcf().set_size_inches(20,5)
    for i in range(2):
      for j in range(6):
         sns.heatmap(dataforheatmap[(i*6)+j],cmap="Spectral",xticklabels=False,yticklabels=False,ax=plots[i,j])
         plots[i,j].set_title(( (str(mnlist[(i*6)+j]))+" "+str(ylist[(i*6)+j]) ))

In [None]:
#@title Single-Plotted Actogram (last 12 months)

xkcd = True #@param {type:"boolean"}

#functions

def last_12_months(dataframe):

  last_month = data['Month'].iloc[-1] - 1
  last_year = data['Year'].iloc[-1]
  mnamelst = ['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December']

  mlist = []
  mnlist = []
  ylist = []

  for i in range(12):
    # months
    if last_month - i > 0:
      mlist += [last_month - i]
    else:
      mlist += [last_month - i + 12]
    # year
    if last_month - i <= 0:
      ylist += [last_year-1]
    else:
      ylist += [last_year]
    # month names
    mnlist += [mnamelst[last_month - i - 1]]

  mlist.reverse()
  mnlist.reverse()
  ylist.reverse()

  return mlist,mnlist,ylist

def mmprepdata(dataframe, ynum, mnum):
  
    frame = dataframe.query(('Year == '+str(ynum)+' and Month == '+str(mnum)))
    
    days = monthrange(ynum,mnum)[1]
    plotdata = np.empty(days, dtype = object)
    for d in range(days):
      daydata = frame.query(('Day == '+str(d+1)))
      plotdata[-d-1] = daydata.of24h.to_numpy()

    return plotdata

def twelvemonthrast(dataframe, mlist, ylist, mnlist, xkcd):
  
  prepped_data = []

  for i in range(len(mlist)):
    prepped_data += [mmprepdata(dataframe, ylist[i], mlist[i])]

  return prepped_data

#actual calls

mlist,mnlist,ylist = last_12_months(data)

dataforraster = twelvemonthrast(data,mlist,ylist,mnlist,xkcd)

# new code
longplot = np.empty((0,),dtype = object)

for month in dataforraster:
  longplot = np.append(month,longplot)

if xkcd == True:

  with plt.xkcd():
    plt.title(('Your last Twelve Months \n in an Actogram'))
    plt.gcf().set_size_inches(3,20)
    plt.eventplot(longplot, color = '0.2')
    plt.xlim(-1,25)
    plt.ylim(-6,372)
    plt.yticks([])
    plt.gca().xaxis.set_major_locator(ticker.MultipleLocator(6))
    plt.gca().set_aspect(0.4)

else:

    plt.title(('Your last Twelve Months \n in an Actogram'))
    plt.gcf().set_size_inches(3,20)
    plt.eventplot(longplot, color = '0.2')
    plt.xlim(-1,25)
    plt.ylim(-6,372)
    plt.yticks([])
    plt.gca().xaxis.set_major_locator(ticker.MultipleLocator(6))
    plt.gca().set_aspect(0.2)

In [None]:
#@title Double-Plotted Actogram (last 12 months)

xkcd = True #@param {type:"boolean"}

#functions

def last_12_months(dataframe):

  last_month = data['Month'].iloc[-1] - 1
  last_year = data['Year'].iloc[-1]
  mnamelst = ['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December']

  mlist = []
  mnlist = []
  ylist = []

  for i in range(12):
    # months
    if last_month - i > 0:
      mlist += [last_month - i]
    else:
      mlist += [last_month - i + 12]
    # year
    if last_month - i <= 0:
      ylist += [last_year-1]
    else:
      ylist += [last_year]
    # month names
    mnlist += [mnamelst[last_month - i - 1]]

  mlist.reverse()
  mnlist.reverse()
  ylist.reverse()

  return mlist,mnlist,ylist

def mmprepdata(dataframe, ynum, mnum):
  
    frame = dataframe.query(('Year == '+str(ynum)+' and Month == '+str(mnum)))
    
    days = monthrange(ynum,mnum)[1]
    plotdata = np.empty(days, dtype = object)
    for d in range(days):
      daydata = frame.query(('Day == '+str(d+1)))
      plotdata[-d-1] = daydata.of24h.to_numpy()

    return plotdata

def twelvemonthrast(dataframe, mlist, ylist, mnlist, xkcd):
  
  prepped_data = []

  for i in range(len(mlist)):
    prepped_data += [mmprepdata(dataframe, ylist[i], mlist[i])]

  return prepped_data

#actual calls

mlist,mnlist,ylist = last_12_months(data)

dataforraster = twelvemonthrast(data,mlist,ylist,mnlist,xkcd)

# new code
longplot = np.empty((0,),dtype = object)

for month in dataforraster:
  longplot = np.append(month,longplot)


#second set being made

doubleplot1 = longplot
doubleplot2 = longplot + 24
ext = np.array([],dtype="float64")
doubleplot2 = doubleplot2.tolist()
doubleplot2.insert(0,ext)
doubleplot2 = np.array(doubleplot2,dtype='object')

#plotting

if xkcd == True:

  with plt.xkcd():
    plt.title(('Your last Twelve Months \n in a Double-Plotted Actogram'))
    plt.gcf().set_size_inches(5,20)
    plt.xlim(-1,49)
    plt.ylim(-6,372)
    plt.yticks([])
    plt.gca().xaxis.set_major_locator(ticker.MultipleLocator(6))
    plt.gca().set_aspect(0.4)
    plt.eventplot(doubleplot1, color = '0.2')
    plt.eventplot(doubleplot2, color = '0.2')

else:

    plt.title(('Your last Twelve Months \n in a Double-Plotted Actogram'))
    plt.gcf().set_size_inches(5,20)
    plt.xlim(-1,49)
    plt.ylim(-6,372)
    plt.yticks([])
    plt.gca().xaxis.set_major_locator(ticker.MultipleLocator(6))
    plt.gca().set_aspect(0.4)
    plt.eventplot(doubleplot1, color = '0.2')
    plt.eventplot(doubleplot2, color = '0.2')



---

# Clean Slate!





In [None]:
#@markdown Deletes all files from colab for privacy.
#@markdown
#@markdown (Make sure to only run this **after** you've saved everything you need!)
!rm *.*
print("Done and Dusted!")



---

