In [83]:
import pandas as pd
import numpy as np
import datetime as dt
import pytz

In [55]:
def extract_fields(line):
    parts = line.split(' || ')
    return (parts[3].strip(), parts[4].strip(), parts[7].strip(), parts[8].strip())

In [56]:
log_lines = ["/logger/ || 70.123.102.76 || - || 31/Aug/2015:23:49:01 +0000  || GET /logger/?action-view&site_id=123 HTTP/1.1 || 200 || 236 || https://foo.com/some/url || Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.157 Safari/537.36 || - || - || - || 0.000"]
data = [extract_fields(l) for l in log_lines]
df = pd.DataFrame(data)
df.columns = ['date', 'request', 'referrer', 'user_agent']
df

Unnamed: 0,date,request,referrer,user_agent
0,31/Aug/2015:23:49:01 +0000,GET /logger/?action-view&site_id=123 HTTP/1.1,https://foo.com/some/url,Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.3...


In [57]:
df.date

0    31/Aug/2015:23:49:01 +0000
Name: date, dtype: object

In [58]:
df['date'] = pd.to_datetime(df['date'], format='%d/%b/%Y:%H:%M:%S +0000', utc=True)
df

Unnamed: 0,date,request,referrer,user_agent
0,2015-08-31 23:49:01+00:00,GET /logger/?action-view&site_id=123 HTTP/1.1,https://foo.com/some/url,Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.3...


In [59]:
df.set_index('date', inplace=True)
df

Unnamed: 0_level_0,request,referrer,user_agent
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2015-08-31 23:49:01+00:00,GET /logger/?action-view&site_id=123 HTTP/1.1,https://foo.com/some/url,Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.3...


In [60]:
df.index

DatetimeIndex(['2015-08-31 23:49:01+00:00'], dtype='datetime64[ns, UTC]', name=u'date', freq=None)

In [61]:
df.index = df.index.tz_convert('America/Los_Angeles')
df

Unnamed: 0_level_0,request,referrer,user_agent
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2015-08-31 16:49:01-07:00,GET /logger/?action-view&site_id=123 HTTP/1.1,https://foo.com/some/url,Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.3...


In [71]:
df.index = df.index.floor('1H')
df

Unnamed: 0_level_0,request,referrer,user_agent
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2015-08-31 16:00:00-07:00,GET /logger/?action-view&site_id=123 HTTP/1.1,https://foo.com/some/url,Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.3...


In [82]:
df.groupby(['date', 'request']).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,referrer,user_agent
date,request,Unnamed: 2_level_1,Unnamed: 3_level_1
2015-08-31 16:00:00-07:00,GET /logger/?action-view&site_id=123 HTTP/1.1,1,1


In [84]:
now = dt.datetime.now()
now

datetime.datetime(2017, 12, 27, 15, 17, 35, 311839)

In [124]:
next_month = now + dt.timedelta(days=30)
next_month

datetime.datetime(2018, 1, 26, 15, 17, 35, 311839)

In [102]:
df['week_start'] = df.index.to_period('W').start_time
df

Unnamed: 0_level_0,request,referrer,user_agent,week_start
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2015-08-31 16:00:00-07:00,GET /logger/?action-view&site_id=123 HTTP/1.1,https://foo.com/some/url,Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.3...,2015-08-31


In [120]:
df['next_week_start'] = df['week_start'] + pd.DateOffset(weeks=1)
df

Unnamed: 0_level_0,request,referrer,user_agent,week_start,next_week_start
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2015-08-31 16:00:00-07:00,GET /logger/?action-view&site_id=123 HTTP/1.1,https://foo.com/some/url,Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.3...,2015-08-31,2015-09-07


In [121]:
pd.date_range(now, periods=10, freq='D')

DatetimeIndex(['2017-12-27 15:17:35.311839', '2017-12-28 15:17:35.311839',
               '2017-12-29 15:17:35.311839', '2017-12-30 15:17:35.311839',
               '2017-12-31 15:17:35.311839', '2018-01-01 15:17:35.311839',
               '2018-01-02 15:17:35.311839', '2018-01-03 15:17:35.311839',
               '2018-01-04 15:17:35.311839', '2018-01-05 15:17:35.311839'],
              dtype='datetime64[ns]', freq='D')

In [125]:
pd.date_range(now, next_month, freq='W')

DatetimeIndex(['2017-12-31 15:17:35.311839', '2018-01-07 15:17:35.311839',
               '2018-01-14 15:17:35.311839', '2018-01-21 15:17:35.311839'],
              dtype='datetime64[ns]', freq='W-SUN')