In [1]:
import pandas as pd # You need pandas, plotly, numpy
pd.options.plotting.backend = "plotly"


### Reading Kindle reading session Data from export

In [11]:
kindle_data = pd.read_csv(r"C:\Users\thisi\Downloads\Kindle sorted\Kindle.Devices.ReadingSession.csv")
kindle_data = kindle_data[kindle_data["device_family"] == "Kindle E-reader"] # Filtering only my Kindle Device(s)
kindle_data = kindle_data[kindle_data.content_type == "E-Book"] # Filtering only E-Books
kindle_data = kindle_data[kindle_data.total_reading_millis >= 60000] # For filtering sessions lasting more than a minute
kindle_data['duration'] = pd.to_timedelta(kindle_data.total_reading_millis,'ms')
kindle_data['DateTime'] = pd.to_datetime(kindle_data['start_timestamp']).dt.tz_convert('Asia/Kolkata')
kindle_data['Date'] = kindle_data['DateTime'].dt.date
kindle_data['Time'] = kindle_data['DateTime'].dt.time
kindle_data = kindle_data[["Date","ASIN","duration","number_of_page_flips"]].convert_dtypes()

### Generating data for books list from Calibre catalogue

In [12]:
Books_data = pd.read_csv(r"C:\Users\thisi\Downloads\My Books List.csv")

def extract_amazon_id(identifiers):
    identifiers_list = str(identifiers).split(",")
    for identity in identifiers_list:
        if "amazon:" in identity:
            return identity.lstrip("amazon:")
    return None
    

Books_data['ASIN'] = Books_data['identifiers'].apply(extract_amazon_id)
Books_data = Books_data[Books_data['ASIN'] != None]

In [13]:
Books_data = Books_data[['title','authors','ASIN']]
kindle_data = kindle_data.merge(Books_data, how='left')

### Number of sessions

In [14]:
x = kindle_data.groupby(['Date']).count()

fig = x.plot(kind='bar', x=x.index, y=x.ASIN)

fig.update_layout(
    title = 'Total Number of Reading sessions per day',
    yaxis_title = "Total number of Reading Sessions",
    xaxis_tickformat = '%d %B <br>%Y',
    barcornerradius=15
)

### Daily Duration

In [15]:
x = kindle_data.groupby(['Date']).sum()
x['duration'] = x['duration'] + pd.to_datetime('1970/01/01')

fig = x.plot(y='duration', x=x.index, kind='bar')
fig.update_layout(
    title = 'Total reading duration per day',
    yaxis_title = "Total Reading Duration",
    xaxis_tickformat = '%d %B <br>%Y',
    yaxis_tickformat = '%H:%M:%S',
    barcornerradius=15
)


In [16]:
x = kindle_data.groupby(['Date']).sum()
x['duration'] = x['duration'] + pd.to_datetime('1970/01/01')

fig = x.plot(y='number_of_page_flips', x=x.index, kind='bar')
fig.update_layout(
    title = 'Total page flips per day',
    yaxis_title = "Total Page Flips",
    xaxis_tickformat = '%d %B <br>%Y',
    barcornerradius=15
)


### Individual Book wise stat

In [17]:
def format_timedelta(s):
    s = s.dt.total_seconds()

    seconds = (s%60).astype(int).astype(str).str.zfill(2)
    minutes = (s//60%60).astype(int).astype(str).str.zfill(2)
    hours = (s//3600).astype(int).astype(str)

    return hours+':'+minutes+':'+seconds

Books_stat = kindle_data[["ASIN","duration","number_of_page_flips"]].groupby(['ASIN']).sum()
Books_stat = Books_stat.reset_index()
Books_stat = Books_stat.merge(Books_data, how='left')
Books_stat['Reading Duration'] = format_timedelta(Books_stat.duration)
Books_stat = Books_stat[Books_stat['title'].notna()].sort_values(['duration'], ascending=False)
Books_stat.reset_index()[['title','authors','Reading Duration','number_of_page_flips']]

Unnamed: 0,title,authors,Reading Duration,number_of_page_flips
0,Verity,Colleen Hoover,6:55:32,644
1,Dopamine Detox : A Short Guide to Remove Distr...,Thibaut Meurisse,0:44:05,115
2,Speed Reading: How to Double (Or Triple) Your ...,Justin Hammond,0:40:45,178
3,How to Win Every Argument: The Use and Abuse o...,Madsen Pirie,0:15:06,38
4,Too Late,Colleen Hoover,0:03:10,3
5,World's Best Girlfriend,Durjoy Datta,0:02:12,1
6,How to Win Friends and Influence People,Dale Carnegie,0:01:16,9


### Which Book on which days

In [18]:
x = kindle_data[['Date', 'ASIN','duration','number_of_page_flips']].groupby(['Date','ASIN']).sum()
x['ASIN'] = x.index.get_level_values('ASIN')
x.index.set_names(["Date", "ID"], inplace=True)
x = x.merge(Books_data, how='left').set_index(x.index)
x['Reading Duration'] = format_timedelta(x.duration)
x = x[x['title'].notna()]
x[['title','authors','Reading Duration','number_of_page_flips']]

Unnamed: 0_level_0,Unnamed: 1_level_0,title,authors,Reading Duration,number_of_page_flips
Date,ID,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2024-05-25,B0C1YY5SQR,Too Late,Colleen Hoover,0:03:10,3
2024-05-25,B0CD818TD3,World's Best Girlfriend,Durjoy Datta,0:02:12,1
2024-05-27,B008J2MMWU,Speed Reading: How to Double (Or Triple) Your ...,Justin Hammond,0:40:45,178
2024-05-28,B005CHB726,How to Win Every Argument: The Use and Abuse o...,Madsen Pirie,0:15:06,38
2024-05-28,B07HJYTRMD,Verity,Colleen Hoover,2:00:04,123
2024-05-28,B098MHBF23,Dopamine Detox : A Short Guide to Remove Distr...,Thibaut Meurisse,0:44:05,115
2024-05-29,B07HJYTRMD,Verity,Colleen Hoover,0:49:36,44
2024-05-30,B07HJYTRMD,Verity,Colleen Hoover,0:55:51,82
2024-05-31,B07HJYTRMD,Verity,Colleen Hoover,0:45:26,54
2024-06-01,B07HJYTRMD,Verity,Colleen Hoover,0:10:44,44
