Similarly, if you want to see view counts on your Kaggle notebooks - you have to increase the view count by one... *until now!* We can spy on the latest view counts (and other stats) by requesting a listing from the Kaggle API...

To use this notebook:

 1.   Fork this notebook by clicking on Copy and Edit.
 2.   Set the username variable to your own username.

In [1]:
username = 'azminetoushikwasi'

In [2]:
from IPython.display import Markdown, display
import datetime, requests, json, os, re, sys, time
from matplotlib.ticker import FixedLocator, MaxNLocator
import pandas as pd
import numpy as np
import math
import seaborn as sns
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')
from plotly.subplots import make_subplots

In [3]:
pd.set_option('display.max_rows', 500)

session = requests.session()
profile = session.get('https://www.kaggle.com/' + username)
session.headers['x-xsrf-token'] = session.cookies['XSRF-TOKEN']

uid = int(re.findall('Kaggle\.State\.push\({.*?"userId":(\d+),', profile.text)[0])
count = int(re.findall('"scriptsSummary":{.*?"totalResults":(\d+),', profile.text)[0])
pages = int(np.ceil(count / 20))

In [4]:
url = 'https://www.kaggle.com/api/i/kernels.KernelsService/ListKernels'
request = {
    'kernelFilterCriteria': {
        'search': '',
        'listRequest': {
            'userId': 0,
            'sortBy': 'DATE_RUN',
            'pageSize': 20,
            'group': 'PROFILE',
            'page': 1,
            'tagIds': '',
            'excludeResultsFilesOutputs': False,
            'wantOutputFiles': False
        }
    },
    'detailFilterCriteria': {
        'deletedAccessBehavior': 'RETURN_NOTHING',
        'unauthorizedAccessBehavior': 'RETURN_NOTHING',
        'excludeResultsFilesOutputs': False,
        'wantOutputFiles': False,
        'kernelIds': [],
        'outputFileTypes': []
    }
}

In [5]:
request['kernelFilterCriteria']['listRequest']['userId'] = uid

dfs = []
for page in range(1, pages + 1):
    request['kernelFilterCriteria']['listRequest']['page'] = page
    data = json.dumps(request, separators=(',', ':'))
    res = session.post(url, data=data)
    #print(page, res.status_code, len(res.text))
    if res.status_code != 200:
        break
    df = pd.DataFrame(json.loads(res.text)['kernels'])
    dfs.append(df)

def split_columns(df, col):
    values = df.pop(col)
    return df.join(values.apply(pd.Series).add_prefix(col + "_"))

df = pd.concat(dfs, ignore_index=True)
df = split_columns(df, 'author')
df = split_columns(df, 'dataSources')
df = split_columns(df, 'categories')

filename = f'{username}-{datetime.datetime.now().strftime("%Y-%m-%d")}.csv'
df.to_csv("./data/"+filename, index=False)

display(Markdown(f"""
 - Notebook count: {df.shape[0]:,.0f}
 - Total views: {df.totalViews.sum():,.0f}
 - Total comments: {df.totalComments.sum():,.0f}
 - Total medals: {df.medal.count():,.0f}
 - Total votes: {df.totalVotes.sum():,.0f}
 - Mean views per vote: {df.totalViews.sum() / df.totalVotes.sum():,.0f}
"""))


 - Notebook count: 15
 - Total views: 36,588
 - Total comments: 338
 - Total medals: 15
 - Total votes: 1,267
 - Mean views per vote: 29


In [6]:
display(Markdown(f'# Notebooks by {df.author_displayName.get(0)}'))
df['viewsPerVote'] = df.eval('(totalViews+1)/(totalVotes+1)')
df['lastRun'] = pd.to_datetime(df.lastRunTime).dt.strftime('%c')
stats = ['totalViews', 'totalVotes', 'totalLines', 'totalComments']
links = ('<a href="https://www.kaggle.com' + df.scriptUrl + '" '
         + ' title="'
         + 'Language: ' + df.languageName.fillna('N/A') + '\n'
         + 'Medal: ' + df.medal.fillna('N/A') + '\n'
         + 'Last run: ' + df.lastRun + '\n'
         + 'Runtime: ' + df.lastRunExecutionTimeSeconds.map(str) + 's' + '\n'
         + 'Views per vote: ' + df.eval('(totalViews+1)/(totalVotes+1)').apply(lambda x: f'{x:,.0f}')
         + '">' # end title="..."
         + df.title
         + '</a>')
show = df[['title'] + stats].copy()
show['title'] = links
show.style.format({k:'{:,.0f}' for k in stats}, na_rep="").hide_index().bar(width=50, color='#4cf')

# Notebooks by Thomas Shelby

title,totalViews,totalVotes,totalLines,totalComments
Daily AQI - Air Quality Index [Scheduled],876,52,435,22
House Price Prediction with Stacking & Ensembling,542,26,2878,3
"Coursera 📉 EDA, Reviews Sentiment Analysis",900,52,416,22
XGBoost | Wrangling with Hyperparameters | Guide,3706,87,586,16
Investment and Financial Analysis 🏭 EPZ,920,61,463,23
"▶️ Cristiano Ronaldo ⚽ Goals 📊 EDA, Analysis",2736,83,357,38
Classification ➡️ Comparing Different Algorithms,1701,73,407,11
Advanced Feature Engineering for Classification,2873,73,382,20
UCL ⚽ EDA & Viz ⭐ 2021-22 ⭐ Players & Teams,2318,75,536,27
Mastering Bias-Variance Tradeoff,4417,109,592,33


In [7]:
df['medal'].fillna('N/A', inplace=True)
medal_data=df[['medal','title']].set_index('title')
medal_data=medal_data.to_dict(orient='index')

# Plotly Scatter Plot

You may want to tweak these settings depending on how many notebooks you have...


In [8]:
import plotly.express as px
from datetime import datetime


df['totalComments'].fillna(0, inplace=True)
df['medal'].fillna('N/A', inplace=True)
#df['size'] = df['totalComments'].clip(lower=1) 
df['size'] = df['totalVotes']/df['totalViews']*100
color_map = {
    'GOLD': 'gold',
    'SILVER': 'silver',
    'BRONZE': 'chocolate',
    'N/A': '#333'
}
fig=px.scatter(
    df,
    'totalViews',
    'totalVotes',
    title=f'Notebooks by {df.author_displayName.get(0)} Today '+str(datetime.today().strftime('%Y-%m-%d')),
    log_x=False,
    log_y=False,
    #width=800, 
    height=800,
    color='medal',
    color_discrete_map=color_map,
    size='size',
    trendline='ols',
    #symbol='title',
    trendline_options=dict(log_x=False, log_y=False, add_constant=True),
    #trendline='lowess', # Use for Locally Weighted Scatterplot Smoothing line
    #trendline_scope='overall', # Use this for one trendline
    template='plotly_dark',
    hover_name='title',
    hover_data={
        'size': False,
        'totalViews': ':,.0f',
        'totalVotes': ':,.0f',
        'totalLines': ':,.0f',
        'totalComments': ':,.0f',
        'viewsPerVote': ':,.0f',
        'languageName': True,
        'versionNumber': True,
        'lastRun': True,
        'lastRunExecutionTimeSeconds': ':,.0f',
        'medal': True,
    })
fig.update_layout(legend= {'itemsizing': 'constant'})
fig.show()

In [9]:
links=[]
for dirname, _, filenames in os.walk('./data/'):
    for filename in filenames:
        link=os.path.join(dirname, filename)
        if filename.endswith(".csv"):
            links.append(link)

# Analysis

In [10]:
dfx=pd.DataFrame()
date_list=[]
titles_dft=[]
date_dft=[]
count_dft=[]
view_dft=[]

gTotalUpvotes=[]
gTotalViews=[]
gTotalComments=[]


for link in links:
    date=link[25:35]
    date_list.append(date)
    temp_df=pd.read_csv(link)
    
    gTotalUpvotes.append(temp_df.totalVotes.sum())
    gTotalViews.append(temp_df.totalViews.sum())
    gTotalComments.append(temp_df.totalComments.sum())
    
    temp_titles=list(temp_df.title)
    temp_totalVotes=list(temp_df.totalVotes)
    temp_totalViews=list(temp_df.totalViews)
    
    for name in temp_titles:
        titles_dft.append(name)
        date_dft.append(date)
        count_dft.append(temp_totalVotes[temp_titles.index(name)])
        view_dft.append(temp_totalViews[temp_titles.index(name)])


df_d={"title":titles_dft, "date":date_dft, "count":count_dft,"view_count":view_dft}
df_d=pd.DataFrame(df_d)


In [11]:
medal=[]
for i in range(len(df_d)):
    try:
        medal.append(medal_data[df_d['title'][i]]['medal'])
    except:
        medal.append("Not Found")
df_d['medal']=medal

In [12]:
color_map = {
    'GOLD': 'gold',
    'SILVER': 'silver',
    'BRONZE': 'chocolate',
    'N/A': '#333',
    'Not Found' : 'lime'
}
fig=px.line(
    df_d,
    x='date',
    y='count',
    title=f'Notebooks by {df.author_displayName.get(0)} '+str(links[-2][25:35]),
    log_x=False,
    log_y=False,
    line_group='title',
    #symbol='title',
    markers=False,
    #width=800, 
    height=800,
    line_shape='spline',
    color='medal',
    color_discrete_map=color_map,
    template='plotly_dark',
    hover_name='title')
fig.show()

In [13]:
color_map = {
    'GOLD': 'gold',
    'SILVER': 'silver',
    'BRONZE': 'chocolate',
    'N/A': '#333',
    'Not Found' : 'lime'
}
fig=px.line(
    df_d,
    x='date',
    y='view_count',
    title=f'Notebooks by {df.author_displayName.get(0)} '+str(links[-2][25:35]),
    log_x=False,
    log_y=False,
    line_group='title',
    line_shape='spline',
    markers=False,
    #width=400, 
    height=800,
    color='medal',
    template='plotly_dark',
    color_discrete_map=color_map,
    hover_name='title')

# add traces for annotations and text for end of lines
for i in range (len(fig.data)):
    fig.add_scatter(x = [fig.data[i].x[-1]], y = [fig.data[i].y[-1]],
                     marker = {'color':color_map[fig.data[i].name], 'size':5},
                     text = str([fig.data[i].y[-1]]),# + str(fig.data[i].hovertext[0]),
                     mode = 'markers + text',
                     textposition='middle right')
fig.update_layout(showlegend=False)
fig.show()

In [14]:
fig.data[0].hovertext[0]

'UCL ⚽ EDA & Viz ⭐ 2021-22 ⭐ Players & Teams'

In [15]:
totals={'date':date_list,"Upvotes":gTotalUpvotes,"Views":gTotalViews,"Comments":gTotalComments}
totals=pd.DataFrame(totals)

from plotly.subplots import make_subplots
import plotly.graph_objects as go

fig = make_subplots(rows=1, cols=3, subplot_titles=("Views\n","Upvotes\n", "Comments\n"))

sns.set(rc={'figure.figsize':(28,6)})


fig.add_trace(
    go.Scatter(
    x=totals['date'],
    y=totals['Views']),
    row=1, col=1
)

fig.add_trace(
    go.Scatter(
    x=totals['date'],
    y=totals['Upvotes']),
    row=1, col=2
)

fig.add_trace(
    go.Scatter(
    x=totals['date'],
    y=totals['Comments']),
    row=1, col=3
)


In [16]:
df_d.title.unique()

array(['UCL ⚽ EDA & Viz ⭐ 2021-22 ⭐ Players & Teams',
       'Divorce Prediction & Reasons : XGB,SHAP,RFE,TSNE',
       'Advanced Feature Engineering for Classification',
       'All Imputation Techniques with Pros and Cons',
       'Mastering Bias-Variance Tradeoff ',
       '▶️ Lionel Messi 📊 Extended EDA ⚽ Goals ',
       '▶️ Cristiano Ronaldo ⚽ Goals  📊 EDA, Analysis',
       'Coursera 📉 EDA, Reviews Sentiment Analysis',
       'Investment and Financial Analysis  🏭 EPZ',
       'ML Foundation ➡️ Cross Validation ✅ All Methods',
       'Different Clustering Techniques and Algorithms',
       'Daily AQI - Air Quality Index [Scheduled]',
       'Classification ➡️ Comparing Different Algorithms',
       'House Price Prediction with Stacking & Ensembling',
       'XGBoost | Wrangling with Hyperparameters | Guide'], dtype=object)