# Imports

In [1]:
import pandas as pd
# import numpy as np
from datetime import datetime, timedelta
import altair as alt
# import matplotlib.pyplot as plt
# import seaborn as sns

# Functions  

I will define some functions to avoid repetation in the code.

In [2]:
def change_datetime_zone(df):
    df = datetime.strptime(df, "%Y-%m-%d %H:%M:%S")
    hour_diff = timedelta(hours=3)
    df = df + hour_diff
    return df

def raw_date_to_datetime(raw_date):
    datetime_dict = dict()  # New row to replace
    datetime_dict["Date"] = raw_date  # Get the date in the format "DD.MM.YYYY"
    # datetime_dict["Time"] = raw_date.strftime("%H:%M")  # Get the time in format "HH:MM"
    datetime_dict["MonthName"] = raw_date.strftime("%B")  # Get the full month name
    datetime_dict["DayName"] = raw_date.strftime("%A")  # Get the full day name

    return datetime_dict

def ceil_dt(dt, delta):
    dt_min = datetime.min.replace(tzinfo=dt.tzinfo)
    return dt + (dt_min - dt) % delta

def floor_dt(dt, delta):
    replace = (dt.minute // delta) * delta
    return dt.replace(minute=replace)

def is_subscribed_to(subreddit_name, subscribed_subs_df):
    return (subscribed_subs_df == subreddit_name).any().any()

# Data Collecting

Get the data from the .csv files, clear and format a bit to get useful pandas DataFrames.  

## Different Data
* Subscribed Subreddits
* IP Logs
* Post Votes
* Comment Votes
* Posts
* Comments

In [3]:
# Data path
data_path = "./data/scrapped_data/"
backup_data_path = "./data/raw_data/"

## Subscribed Subreddits

This data holds the subreddits that I am _currently_ subscribed to. It does not hold a history or any temporal data.  

Reddit subreddits doesn't include direct a way to _categorize_ them by some kind of a tag or topic system execpt a list of _"flairs"_ that can be used to label posts in that subreddit. However, to better analyse them a tag system might be useful. I will use a google form to ask people to annotate the subreddits and use that data to categorize them.
This data file also includes followed users (not subs). They are marked with a 'u_' prefix in the data. They will be filtered, but saved anyway since they might be useful in the future.

In [4]:
# Read the file, sort by name of the subreddits and reset the index after sorting
sub_fname = "subscribed_subreddits.csv"
subreddits_df = pd.read_csv(data_path + sub_fname).sort_values(by="subreddit").reset_index().drop(columns="index")

followed_users_df = subreddits_df[subreddits_df["subreddit"].str.contains("u_")]
subreddits_df = subreddits_df[~subreddits_df["subreddit"].str.contains("u_")]
# subreddits_df.to_csv(data_path + sub_fname, index=False)
subreddits_df

Unnamed: 0,subreddit,Flairs
0,AskScienceFiction,
1,CodeBullet,"meme, question for codebullet, video idea, oth..."
2,DMAcademy,"offering advice, need advice: encounters & adv..."
3,DaystromInstitute,
4,Deepspaceninememes,"original content [oc], shitpost"
5,ElectroBOOM,"faf - rectify, electroboom question, non-elect..."
6,ExposurePorn,
7,FATErpg,
8,GeekyaparLamers,
9,GreekMythology,"discussion, question, art, culture, history, i..."


## IP Logs

IP logs data holds information about my logins to Reddit. It holds the datetime and the IP that I used. This data might be used on showing my active times even though it doesn't hold information on how long I have stayed active.  

The date data is in the form of "yyyy-mm-dd hh:mm:ss UTC". I will convert time into GMT+3, and name the months and days.

In [5]:
# Read the file, drop the first row that holds the registiration IP only, drop the IP column and reset the indexing
logs_fname = "ip_logs.csv"
login_datetime_df = pd.read_csv(backup_data_path + logs_fname).rename(columns={"date": "RawDate"}).drop(index=0, columns="ip").reset_index().drop(columns="index")

try:
    raw_date_col = login_datetime_df["RawDate"]  # Raw Date column

    # Add new columns
    login_datetime_df[["Date", "MonthName", "DayName"]] = None
    
    for idx in range(len(raw_date_col)):
        raw_date = raw_date_col.iloc[idx].replace(" UTC", "")  # Get the time in UTC time
    
        # Convert datetime to local time zone
        local_datetime = change_datetime_zone(raw_date)
        datetime_dict = raw_date_to_datetime(local_datetime)

        # Insert items from datetime_dict to the new columns
        for key in datetime_dict:
            login_datetime_df.loc[idx, key] = datetime_dict[key]

    login_datetime_df = login_datetime_df.drop(columns="RawDate")
except KeyError:
    pass

login_datetime_df["Date"] = pd.to_datetime(login_datetime_df["Date"])
# login_datetime_df.to_csv(data_path + logs_fname, index=False)
login_datetime_df

Unnamed: 0,Date,MonthName,DayName
0,2023-06-29 13:16:07,June,Thursday
1,2023-06-29 17:25:25,June,Thursday
2,2023-06-30 04:16:29,June,Friday
3,2023-06-30 07:15:35,June,Friday
4,2023-06-30 08:46:19,June,Friday
...,...,...,...
357,2023-10-06 15:19:55,October,Friday
358,2023-10-06 16:40:52,October,Friday
359,2023-10-06 18:16:58,October,Friday
360,2023-10-07 11:04:02,October,Saturday


## Post Votes

This data includes the posts that I have voted. It includes an ID, the post link, and the type of the vote and through Reddit API it includes the total number of upvotes and downvotes, and the flair; however, no temporal data.  

I will get the subreddit name from the URL, my vote and I will compare the sub to the subscribed subs data and get wheter or not I am subscribed to that subreddit currently.  

Also, note that some of the posts are inaccesible due to different reasons which prevents data collection through Reddit API; therefore, there are some missing values in the data.

In [6]:
# Read the file, rename the vote direction column and drop the id column.
post_votes_fname = "post_votes.csv"
post_votes_df = pd.read_csv(data_path + post_votes_fname).rename(columns={"direction": "MyVote", "Upvotes": "UpvoteCount", "Downvotes": "DownvoteCount"}).drop(columns="id")

# Add the new columns
post_votes_df[["SubredditName", "IsSubscribed"]] = None

for idx in range(len(post_votes_df["permalink"])):
    post_vote_dict = {"SubredditName": None, "IsSubscribed": None}
        
    # Get the sub name from the link
    permalink = post_votes_df.loc[idx, "permalink"]
    start_idx = permalink.find("r/") + 2
    stop_idx = permalink.find("/", start_idx)
    sub_name = permalink[start_idx:stop_idx]
    
    # Add sub name to the corresponding place
    post_votes_df.loc[idx, "SubredditName"] = sub_name

    # Check if the sub is subscribed
    post_votes_df.loc[idx, "IsSubscribed"] = is_subscribed_to(sub_name, subreddits_df)

# Drop the permalink column
post_votes_df = post_votes_df.drop(columns="permalink")
# Specify the Dtypes for later use
post_votes_df["IsSubscribed"] = post_votes_df["IsSubscribed"].astype(dtype="bool")
# post_votes_df.to_csv(data_path + post_votes_fname, index=False)
post_votes_df

Unnamed: 0,MyVote,UpvoteCount,DownvoteCount,Flair,SubredditName,IsSubscribed
0,up,,,,unexpectedMontyPython,True
1,up,196.0,2.0,meme,ProgrammerHumor,True
2,up,11978.0,902.0,meme,TheLastAirbender,True
3,up,152.0,9.0,,risa,True
4,up,,,,unexpectedMontyPython,True
...,...,...,...,...,...,...
1217,up,9443.0,711.0,,gaming,True
1218,none,9.0,0.0,,ProgrammerHumor,True
1219,up,8.0,1.0,,seinfeld,True
1220,up,188.0,6.0,meme,ProgrammerHumor,True


## Comment Votes  

Comment votes is almost identical to the post votes data except that this includes the information about comments that I have voted instead of posts. Also through Reddit API it holds the score (or the net number of upvotes) instead of seperate counts of upvotes and downvotes.  

I will perform the same cleaning as the post votes data: Remove the ID, get the subreddit name from the URL, my vote and I will compare the sub to the subscribed subs data and get wheter or not I am subscribed to that subreddit currently.  

Also, note that some of the comments or their posts are inaccesible due to different reasons which prevents data collection through Reddit API; therefore, there are some missing values in the data.

In [7]:
# Read the file, rename the vote direction column and drop the id column.
comment_votes_fname = "comment_votes.csv"
comment_votes_df = pd.read_csv(data_path + comment_votes_fname).rename(columns={"direction": "MyVote"}).drop(columns="id")

# Add the new columns
comment_votes_df[["SubredditName", "IsSubscribed"]] = None

for idx in range(len(comment_votes_df["permalink"])):
    comment_vote_dict = {"SubredditName": None, "IsSubscribed": None}
        
    # Get the sub name from the link
    permalink = comment_votes_df.loc[idx, "permalink"]
    start_idx = permalink.find("r/") + 2
    stop_idx = permalink.find("/", start_idx)
    sub_name = permalink[start_idx:stop_idx]

    # Add sub name to the corresponding place
    comment_votes_df.loc[idx, "SubredditName"] = sub_name

    # Check if the sub is subscribed
    comment_votes_df.loc[idx, "IsSubscribed"] = is_subscribed_to(sub_name, subreddits_df)

# Drop the permalink column
comment_votes_df = comment_votes_df.drop(columns="permalink")
# Specify the Dtypes for later use
comment_votes_df["IsSubscribed"] = comment_votes_df["IsSubscribed"].astype(dtype="bool")
# comment_votes_df.to_csv(data_path + comment_votes_fname, index=False)
comment_votes_df

Unnamed: 0,MyVote,Score,SubredditName,IsSubscribed
0,up,2.0,GenP,False
1,up,1.0,flashcarts,False
2,up,1190.0,ProgrammerHumor,True
3,up,196.0,startrek,True
4,up,5.0,startrek,True
...,...,...,...,...
255,up,35.0,CodeBullet,True
256,up,1.0,montypython,True
257,none,37.0,seinfeld,True
258,up,11.0,TheLastAirbender,True


## Posts  

The posts data is about the posts that I have created. It includes an ID, a permalink to the post, posting date, the IP that I have used, subreddit name that the post has been posted, and gildings and url data. Also the number of upvotes and downvotes, and the flair through Reddit API.  

I will drop the permalink, IP, gildings, url and check if I am subscribed to the sub I have posted. I will keep the IDs to compare with the comments data later on.

Also, note that some of the posts are inaccesible due to different reasons which prevents data collection through Reddit API; therefore, there are some missing values in the data.

In [8]:
# Read the file, rename the id, date, and subreddit columns and drop the permalink, ip, gildings, and url columns.
posts_fname = "post_headers.csv"
posts_df = pd.read_csv(data_path + posts_fname).rename(columns={"id": "ID", "date": "RawDate", "subreddit": "Subreddit"}).drop(columns=["permalink", "ip", "gildings", "url"])

try:
    raw_date_col = posts_df["RawDate"]  # Raw Date column

    # Add new columns
    posts_df[["IsSubscribed", "Date", "MonthName", "DayName"]] = None
    
    for idx in range(len(raw_date_col)):
        raw_date = raw_date_col.iloc[idx].replace(" UTC", "")  # Get the time in UTC time
    
        # Convert datetime to local time zone
        local_datetime = change_datetime_zone(raw_date)
        datetime_dict = raw_date_to_datetime(local_datetime)

        # Insert items from datetime_dict to the new columns
        for key in datetime_dict:
            posts_df.loc[idx, key] = datetime_dict[key]
        
        # Check if subscribed
        posts_df.loc[idx, "IsSubscribed"] = is_subscribed_to(posts_df.loc[idx, "Subreddit"], subreddits_df)
    
    posts_df = posts_df.drop(columns="RawDate")
except KeyError:
    pass

# Specify the Dtypes for later use
posts_df["IsSubscribed"] = posts_df["IsSubscribed"].astype(dtype="bool")
posts_df["Date"] = pd.to_datetime(posts_df["Date"])
# posts_df.to_csv(data_path + posts_fname, index=False)
posts_df

Unnamed: 0,ID,Subreddit,Upvotes,Downvotes,Flair,IsSubscribed,Date,MonthName,DayName
0,v7jv2a,consolerepair,3.0,0.0,,True,2022-06-08 10:38:39,June,Wednesday
1,m3jkjt,NintendoDSi,2.0,0.0,,False,2021-03-12 18:13:23,March,Friday
2,10x881w,startrek,3.0,0.0,,True,2023-02-08 22:26:30,February,Wednesday
3,126w0at,webdev,1.0,0.0,,False,2023-03-30 21:58:38,March,Thursday
4,r84lhi,flashcarts,2.0,0.0,,False,2021-12-03 20:40:55,December,Friday
5,126w1tz,webdev,1.0,0.0,,False,2023-03-30 22:00:18,March,Thursday
6,15rhuzw,montypython,58.0,1.0,,True,2023-08-15 07:15:10,August,Tuesday
7,16z0xzh,TheLastAirbender,13.0,3.0,meme violation,True,2023-10-03 22:25:15,October,Tuesday
8,10vxs5t,startrek,11.0,3.0,,True,2023-02-07 12:33:50,February,Tuesday
9,zx3wgi,consolerepair,2.0,0.0,,True,2022-12-28 10:54:33,December,Wednesday


## Comments  

Similar to the data about the posts, comments data also includes an ID, a permalink to the comment, comment date, the IP that I have used, subreddit name that the post that been commented has been posted, gildings, and net score through Reddit API. It does not include a url data like posts and it holds two extra information: a link to the parent object and _if the parent is posted by me_ an ID of the parent.  

I will drop the permalink, IP, and gildings. I check if I am subscribed to the sub I have posted, and I will check if I own the parent and the posts.  

Also, note that some of the comments or their posts are inaccesible due to different reasons which prevents data collection through Reddit API; therefore, there are some missing values in the data.

In [9]:
# Read the file, rename the id, date, and subreddit columns and drop the permalink, ip, and gildings columns.
comments_fname = "comment_headers.csv"
comments_df = pd.read_csv(data_path + comments_fname).rename(columns={"id": "ID", "date": "RawDate", "subreddit": "Subreddit"}).drop(columns=["permalink", "ip", "gildings"])

try:
    raw_date_col = comments_df["RawDate"]  # Raw Date column
    
    # Add new columns
    comments_df[["IsSubscribed", "Date", "MonthName", "DayName", "IsParentOwned", "IsPostOwned"]] = None
    
    for idx in range(len(raw_date_col)):
        raw_date = raw_date_col.iloc[idx].replace(" UTC", "")  # Get the time in UTC time
    
        # Convert datetime to local time zone
        local_datetime = change_datetime_zone(raw_date)
        datetime_dict = raw_date_to_datetime(local_datetime)
    
        comments_dict = dict()
        # Check if subscribed
        comments_dict["IsSubscribed"] = is_subscribed_to(comments_df.loc[idx, "Subreddit"], subreddits_df)

        # Get the post id from the link, note that it does not have to be the parent id if it is reply to another comment
        post_link = comments_df.loc[idx, "link"]
        post_id_idx_start = post_link.find("comments/") + 9
        post_id_idx_end = post_link.find("/", post_id_idx_start)
        post_id = post_link[post_id_idx_start:post_id_idx_end]
        # Check if the post is owned by me
        comments_dict["IsPostOwned"] = (posts_df == post_id).any().any()

        # Check if parent ID exists, and if it does check if it is owned by me
        if type(comments_df.loc[idx, "parent"]) == str:
            parent_id = comments_df.loc[idx, "parent"]
            comments_dict["IsParentOwned"] = (posts_df == parent_id).any().any() or (comments_df["ID"] == parent_id).any()
        else:
            comments_dict["IsParentOwned"] = False
        
        # Combine datetime_dict and comments_dict
        comments_dict = comments_dict | datetime_dict

        # Insert items from comments_dict to the new columns
        for key in comments_dict:
            comments_df.loc[idx, key] = comments_dict[key]
    
    comments_df = comments_df.drop(columns=["RawDate", "parent", "link"])
except KeyError:
    pass

# Specify the Dtypes for later use
comments_df["IsSubscribed"] = comments_df["IsSubscribed"].astype(dtype="bool")
comments_df["IsParentOwned"] = comments_df["IsParentOwned"].astype(dtype="bool")
comments_df["Date"] = pd.to_datetime(comments_df["Date"])
# comments_df.to_csv(data_path + comments_fname, index=False)
comments_df

Unnamed: 0,ID,Subreddit,Score,IsSubscribed,Date,MonthName,DayName,IsParentOwned,IsPostOwned
0,j7jy563,startrek,7,True,2023-02-07 12:42:58,February,Tuesday,False,True
1,j7k0fm0,startrek,4,True,2023-02-07 13:16:29,February,Tuesday,False,False
2,jo77dm4,veYakinEvren,1,True,2023-06-15 10:07:38,June,Thursday,False,False
3,jr1sd8q,veYakinEvren,2,True,2023-07-07 20:33:12,July,Friday,False,False
4,j7yb85s,gaming,1,True,2023-02-10 10:06:23,February,Friday,False,False
...,...,...,...,...,...,...,...,...,...
117,jshmn5a,TheLastAirbender,3,True,2023-07-18 21:58:33,July,Tuesday,False,False
118,jw8pa9d,tumblr,1,False,2023-08-15 07:08:57,August,Tuesday,False,False
119,jxxbsqc,camphalfblood,3,True,2023-08-27 08:47:22,August,Sunday,False,True
120,jvey8c7,veYakinEvren,3,True,2023-08-09 10:59:16,August,Wednesday,False,False


# Data Visiualization

## Tags and Flairs

I will count and visualize the tags and flairs of the subreddits that I am subscribed to. It is worth to note that while tags have no missing data since they are annotated by hand, flairs have missing data since some of the subreddits do not have flairs or they have a poor flair system.

In [10]:
# Count the number of each tag from the tags column of the subreddits_df
if "Tags" in subreddits_df.columns:
    tag_count_dict = dict()
    tags_col = subreddits_df["Tags"]
    for idx in range(len(tags_col)):
        tags = tags_col.iloc[idx].split(", ")
        for tag in tags:
            if tag in tag_count_dict:
                tag_count_dict[tag] += 1
            else:
                tag_count_dict[tag] = 1
    tag_count_df = pd.DataFrame({"Tag": list(tag_count_dict.keys()), "Count": list(tag_count_dict.values())})
    tag_count_df["Tag"] = tag_count_df["Tag"].astype(dtype="category")

    # Create the chart for tags
    chart1 = alt.Chart(tag_count_df).mark_bar().encode(
        alt.X("Count:Q", axis=alt.Axis(tickCount=tag_count_df["Count"].max() // 2)),
        alt.Y("Tag:N", sort="-x", title=None),
        color=alt.value("#1f77b4"),
        tooltip=["Count:Q"],  # Show the count when hovering over the bar
    ).properties(
        # Set the size of the chart
        width=500,
        height=333.33,
        title = "Tags"
    )
else:
    chart1 = None


# Count the number of each flair from the flairs column of the subreddits_df
flair_count_dict = dict()
flairs_col = subreddits_df["Flairs"]
for idx in range(len(flairs_col)):
    if type(flairs_col.iloc[idx]) != str:
        continue
    flairs = flairs_col.iloc[idx].split(", ")
    for flair in flairs:
        if flair in flair_count_dict:
            flair_count_dict[flair] += 1
        else:
            flair_count_dict[flair] = 1
flair_count_df = pd.DataFrame({"Flair": list(flair_count_dict.keys()), "Count": list(flair_count_dict.values())})
flair_count_df["Flair"] = flair_count_df["Flair"].astype(dtype="category")
# Drop flairs with less than 2 counts since it means they are unique for subreddit
flair_count_df = flair_count_df[flair_count_df["Count"] > 1]

# Create the chart for flairs
chart2 = alt.Chart(flair_count_df).mark_bar().encode(
    alt.X("Count:Q", axis=alt.Axis(tickCount=flair_count_df["Count"].max() // 2)),
    alt.Y("Flair:N", sort="-x", title=None),
    color=alt.value("#1f77b4"),
    tooltip=["Count:Q"],  # Show the count when hovering over the bar
).properties(
    # Set the size of the chart
    width=500,
    height=333.33,
    title = "Flairs"
)

if chart1 is not None:
    # Combine the charts
    chart = alt.hconcat(chart1, chart2, spacing=120)
else:
    chart = chart2

# Add paddings to HConcatChart object
chart = chart.configure(
    padding={"left": 15, "right": 15, "top": 15, "bottom": 15},
    title={"fontSize": 18},
).configure_view(
    stroke=None,
).configure_axisX(
    labelFontSize=13,
    grid=False,  # Remove the grid
    domainWidth=2,  # Set the width of the axis line
    domainColor="#000"  # Set the color of the axis line
).configure_axisY(
    labelFontSize=14,
    titleFontSize=18,
    domain=False,  # Remove the axis line
)

if chart1 is not None:
    chart1.save("figures/tags_and_flairs/altair_tags.html")
    chart1.save("figures/tags_and_flairs/altair_tags.png")
    chart2.save("figures/tags_and_flairs/altair_flairs.html")
    chart2.save("figures/tags_and_flairs/altair_flairs.png")
    chart.save("figures/tags_and_flairs/altair_tags_flairs.html")
    chart.save("figures/tags_and_flairs/altair_tags_flairs.png")
else:
    chart2.save("figures/tags_and_flairs/altair_flairs.html")
    chart2.save("figures/tags_and_flairs/altair_flairs.png")
chart

## Logins

I will count the logins in 30 minute intervals and visualize in bar chart to see at what times I am active.  

For the code I round (floor) the minutes to 30 minute intervals and count the logins. But for the _histograms_ to function properly I need a data including all the intervals even if there are no logins in that interval. Therefore, I will create a new dataframe with all the intervals, combine with my data, count the logins and offset everything by 1 to get the correct counts.

In [11]:
# from datetime import timedelta
# Round time to closest 10 minutes
login_datetime_rounded_df = login_datetime_df.copy()[["Date", "DayName"]]
login_datetime_rounded_df["Date"] = login_datetime_rounded_df["Date"].apply(lambda x: floor_dt(x, 30))

total = login_datetime_rounded_df.copy()
time_range = pd.date_range(start="00:00", end="23:59", freq="30min").strftime("%H:%M")
time_df = pd.DataFrame({"Time": time_range})
total["Time"] = total["Date"].dt.strftime("%H:%M")
# Concat the time_df and group_time
total = pd.concat([time_df, total], axis=0).reset_index().drop(columns="index")
# Count the number of each time
total = total.value_counts(subset=["Time"]).reset_index()
# Offset counts by 1
total["count"] = total["count"] - 1

# Create the chart
chart = alt.Chart(total).mark_bar().encode(
    x=alt.X("Time:O", title="Time", axis=alt.Axis(tickCount=48)),
    y=alt.Y("count:Q", title="Count", axis=alt.Axis(tickCount=total["count"].max())),
    tooltip=["count:Q"],
).properties(
    title="Total Logins by Time",
)

chart

In [12]:

# Group by day
login_datetime_grouped = login_datetime_rounded_df.groupby("DayName")

# Create the charts for each day
charts = {}
for name, group in login_datetime_grouped:
    time_range = pd.date_range(start="00:00", end="23:59", freq="30min").strftime("%H:%M")
    time_df = pd.DataFrame({"Time": time_range})
    group["Time"] = group["Date"].dt.strftime("%H:%M")
    # Concat the time_df and group_time
    group = pd.concat([time_df, group], axis=0).reset_index().drop(columns="index")
    # Count the number of each time
    group = group.value_counts(subset=["Time"]).reset_index()
    # Offset counts by 1
    group["count"] = group["count"] - 1

    # Create the chart
    chart = alt.Chart(group).mark_bar().encode(
        x=alt.X("Time:O", title="Time", axis=alt.Axis(tickCount=48)),
        y=alt.Y("count:Q", title="Count", axis=alt.Axis(tickCount=group["count"].max())),
        tooltip=["count:Q"],
    ).properties(
        title=name
    )
    charts[name] = chart

# Sort the charts by day in week order
charts_list = [charts["Monday"], charts["Tuesday"], charts["Wednesday"], charts["Thursday"], charts["Friday"], charts["Saturday"], charts["Sunday"]]
# Combine the charts
chart = alt.vconcat(*charts_list, spacing=10)

chart = chart.configure(
    padding={"left": 15, "right": 15, "top": 15, "bottom": 15},
    title={"fontSize": 18},
).configure_view(
    stroke=None,
).configure_axisX(
    labelFontSize=13,
    grid=False,  # Remove the grid
    domainWidth=2,  # Set the width of the axis line
    domainColor="#000"  # Set the color of the axis line
).configure_axisY(
    labelFontSize=14,
    titleFontSize=18,
    domain=False,  # Remove the axis line
)

# Save the charts
for name, ch in charts.items():
    ch.save(f"figures/login_times/altair_{name}.html")
    ch.save(f"figures/login_times/altair_{name}.png")
chart.save("figures/login_times/altair_login_times.html")

chart