In [1]:
import polars as pl
import datetime as dt

# 8.1 Parsing Unix timestamps

The columns in polars dataframes are statically typed, meaning there is no ambiguity regarding parsing data as integers or as dates. The file we're using here is a popularity-contest file I found on my system at `/var/log/popularity-contest`.

Here's an [explanation of how this file works](http://popcon.ubuntu.com/README).

In [2]:
# Read it, and remove the last row
popcon = pl.read_csv('../data/popularity-contest',
                     separator=' ',
                     ignore_errors=True,
                     new_columns=['atime', 'ctime', 'package-name', 'mru-program', 'tag'])[:-1]

The colums are the access time, created time, package name, recently used program, and a tag. In this case, polars has parsed the access time and created time as integers instead of datetimes.

In [3]:
popcon.head()

atime,ctime,package-name,mru-program,tag
i64,i64,str,str,str
1387295797,1367633260,"""perl-base""","""/usr/bin/perl""",
1387295796,1354370480,"""login""","""/bin/su""",
1387295743,1354341275,"""libtalloc2""","""/usr/lib/x86_6…",
1387295743,1387224204,"""libwbclient0""","""/usr/lib/x86_6…","""<RECENT-CTIME>…"
1387295742,1354341253,"""libselinux1""","""/lib/x86_64-li…",


We can explicitly convert the integers to datetimes using the `from_epoch` function:

In [4]:
popcon = popcon.with_columns(
    pl.from_epoch(pl.col('atime'), time_unit='s'),
    pl.from_epoch(pl.col('ctime')), #time_unit='s' is default
)

If we look at the dtype now, it's `pl.Datetime`.

In [5]:
popcon['atime'].dtype

Datetime(time_unit='us', time_zone=None)

So now we can look at our `atime` and `ctime` as dates!

In [6]:
popcon[:5]

atime,ctime,package-name,mru-program,tag
datetime[μs],datetime[μs],str,str,str
2013-12-17 15:56:37,2013-05-04 02:07:40,"""perl-base""","""/usr/bin/perl""",
2013-12-17 15:56:36,2012-12-01 14:01:20,"""login""","""/bin/su""",
2013-12-17 15:55:43,2012-12-01 05:54:35,"""libtalloc2""","""/usr/lib/x86_6…",
2013-12-17 15:55:43,2013-12-16 20:03:24,"""libwbclient0""","""/usr/lib/x86_6…","""<RECENT-CTIME>…"
2013-12-17 15:55:42,2012-12-01 05:54:13,"""libselinux1""","""/lib/x86_64-li…",


Now suppose we want to look at all packages that aren't libraries. First, I want to get rid of everything with timestamp 0.

In [7]:
popcon.filter(
    pl.col('atime') > dt.datetime(1970, 1, 1)
)

atime,ctime,package-name,mru-program,tag
datetime[μs],datetime[μs],str,str,str
2013-12-17 15:56:37,2013-05-04 02:07:40,"""perl-base""","""/usr/bin/perl""",
2013-12-17 15:56:36,2012-12-01 14:01:20,"""login""","""/bin/su""",
2013-12-17 15:55:43,2012-12-01 05:54:35,"""libtalloc2""","""/usr/lib/x86_6…",
2013-12-17 15:55:43,2013-12-16 20:03:24,"""libwbclient0""","""/usr/lib/x86_6…","""<RECENT-CTIME>…"
2013-12-17 15:55:42,2012-12-01 05:54:13,"""libselinux1""","""/lib/x86_64-li…",
2013-12-17 15:55:42,2012-12-01 05:54:35,"""libstdc++6""","""/usr/lib/x86_6…",
2013-12-17 15:55:40,2013-12-16 20:03:22,"""libpam-winbind…","""/lib/x86_64-li…","""<RECENT-CTIME>…"
2013-12-17 15:55:40,2012-12-01 05:54:13,"""libpam-modules…","""/lib/x86_64-li…",
2013-12-17 15:55:40,2012-12-01 05:54:13,"""libpam-ck-conn…","""/lib/security/…",
2013-12-17 15:55:40,2012-12-01 05:54:13,"""libpam-cap""","""/lib/x86_64-li…",


Now we can use polars' `filter` and `str` look at rows where the package name doesn't contain 'lib'.

In [8]:
nonlibraries = popcon.filter(
    ~pl.col('package-name').str.contains('lib')
)

In [9]:
nonlibraries.top_k(10, by='ctime')

atime,ctime,package-name,mru-program,tag
datetime[μs],datetime[μs],str,str,str
2013-12-17 04:55:39,2013-12-17 04:55:42,"""ddd""","""/usr/bin/ddd""","""<RECENT-CTIME>…"
2013-12-16 20:03:20,2013-12-16 20:05:13,"""nodejs""","""/usr/bin/npm""","""<RECENT-CTIME>…"
2013-12-16 20:03:20,2013-12-16 20:05:04,"""thunderbird-lo…","""/usr/lib/thund…","""<RECENT-CTIME>…"
2013-12-16 20:03:20,2013-12-16 20:05:04,"""switchboard-pl…","""/usr/lib/plugs…","""<RECENT-CTIME>…"
2013-12-16 20:08:27,2013-12-16 20:05:03,"""software-cente…","""/usr/sbin/upda…","""<RECENT-CTIME>…"
2013-12-16 20:03:20,2013-12-16 20:05:00,"""samba-common-b…","""/usr/bin/net.s…","""<RECENT-CTIME>…"
2013-12-16 20:08:25,2013-12-16 20:04:59,"""postgresql-cli…","""/usr/lib/postg…","""<RECENT-CTIME>…"
2013-12-16 20:08:23,2013-12-16 20:04:58,"""postgresql-9.1…","""/usr/lib/postg…","""<RECENT-CTIME>…"
2013-12-16 20:03:20,2013-12-16 20:04:55,"""php5-dev""","""/usr/include/p…","""<RECENT-CTIME>…"
2013-12-16 20:03:20,2013-12-16 20:04:54,"""php-pear""","""/usr/share/php…","""<RECENT-CTIME>…"
