In [1]:
import pandas as pd
import sqlite3

#### Create a connection to the database using the library sqlite3

In [2]:
con = sqlite3.connect('../../datasets/checking-logs.sqlite')

#### Get the schema of the table pageviews using pd.io.sql.read_sql and the query "PRAGMA table_info(pageviews);"

In [3]:
print(pd.io.sql.read_sql("PRAGMA table_info(pageviews)", con))

   cid      name       type  notnull dflt_value  pk
0    0     index    INTEGER        0       None   0
1    1       uid       TEXT        0       None   0
2    2  datetime  TIMESTAMP        0       None   0


* get the subtable using only one query where:
  * only uid and datetime are used
  * only user data (user_*) is used and not admin data
  * it is sorted by uid in ascending order
  * the index column is datetime
  * datetime is converted to DatetimeIndex
  * the name of the dataframe is pageviews


In [4]:
sql = """
SELECT uid, datetime
FROM pageviews
WHERE uid LIKE "user_%"
ORDER BY uid 
"""
pageviews = pd.read_sql(
    sql,
    con,
    parse_dates=['datetime'],
    index_col='datetime'
    )
print(pageviews)
print(type(pageviews.index))  

                                uid
datetime                           
2020-04-26 21:53:59.624136   user_1
2020-04-26 22:06:19.478143   user_1
2020-04-26 22:12:09.614497   user_1
2020-04-30 19:29:01.831635   user_1
2020-05-05 20:26:32.894852   user_1
...                             ...
2020-04-29 16:51:21.877630  user_30
2020-05-09 20:30:47.034282  user_30
2020-05-22 11:30:18.368990   user_5
2020-05-21 16:28:28.217529   user_9
2020-05-21 16:36:40.915488   user_9

[987 rows x 1 columns]
<class 'pandas.core.indexes.datetimes.DatetimeIndex'>


#### Close the connection to the database

In [5]:
con.close()