### Use Huayi's time binning function to assign our geotagged tweets to 15-minute time bins:
#### 05, 20, 35, and 50 mins past the hour. 

In [1]:
import pandas as pd
import numpy as np
import datetime as dt

In [2]:
# Huayi's time binning function! 

from datetime import timedelta 

def binMinute(minute):
    '''
    Calculate which of the 15min bins the current minute falls in.
    The bins are 05, 20, 35, 50. 
    Current minutes less than 05 belong to bin 50 of the hour before. 
    '''    
    m2q= (minute-5)/15
    if m2q < 0:
        newminute = -10 
    elif m2q < 1:
        newminute = 5
    elif m2q < 2:
        newminute = 20
    elif m2q < 3:
        newminute = 35
    elif m2q < 4:
        newminute = 50
    return newminute       
    
def BinTime(df_datetime_col):
    '''
    Take a df column that contains datetime object (df_datetime_col)
    Return a new column that contains the binned datetime object
    The bins are as defined in binMinute
    '''
    binned_col= df_datetime_col.apply(lambda x: x.replace(minute = 0) + timedelta(minutes=binMinute(x.minute)))
    return binned_col

In [3]:
df = pd.read_pickle("./geotweets_labeled.pkl")

In [4]:
df.head()

Unnamed: 0,date,text,username,day_of_week,hour,safe
2020-03-23 13:57:44-04:00,2020-03-23 13:57:44-04:00,Omg @Instacart - y’all better not mess this up...,JordanGazay,Monday,13,
2020-03-23 14:10:14-04:00,2020-03-23 14:10:14-04:00,𝐐𝐔𝐄𝐒𝐓𝐈𝐎𝐍 | Are you moving for thirty minutes e...,lifenleggings,Monday,14,0.0
2020-03-23 15:03:32-04:00,2020-03-23 15:03:32-04:00,"#newnormal Day 8 Oh dayum, did Governor #andre...",miles_randy,Monday,15,
2020-03-23 18:14:17-04:00,2020-03-23 18:14:17-04:00,#isolation #prospectpark #prospectparkalliance...,andrewbwhite1,Monday,18,
2020-03-23 18:15:25-04:00,2020-03-23 18:15:25-04:00,#isolation #prospectpark #fishing #prospectpar...,andrewbwhite1,Monday,18,


In [5]:
df['time_bin'] = BinTime(df.date.dt.floor('min'))

In [6]:
df.head()

Unnamed: 0,date,text,username,day_of_week,hour,safe,time_bin
2020-03-23 13:57:44-04:00,2020-03-23 13:57:44-04:00,Omg @Instacart - y’all better not mess this up...,JordanGazay,Monday,13,,2020-03-23 13:50:00-04:00
2020-03-23 14:10:14-04:00,2020-03-23 14:10:14-04:00,𝐐𝐔𝐄𝐒𝐓𝐈𝐎𝐍 | Are you moving for thirty minutes e...,lifenleggings,Monday,14,0.0,2020-03-23 14:05:00-04:00
2020-03-23 15:03:32-04:00,2020-03-23 15:03:32-04:00,"#newnormal Day 8 Oh dayum, did Governor #andre...",miles_randy,Monday,15,,2020-03-23 14:50:00-04:00
2020-03-23 18:14:17-04:00,2020-03-23 18:14:17-04:00,#isolation #prospectpark #prospectparkalliance...,andrewbwhite1,Monday,18,,2020-03-23 18:05:00-04:00
2020-03-23 18:15:25-04:00,2020-03-23 18:15:25-04:00,#isolation #prospectpark #fishing #prospectpar...,andrewbwhite1,Monday,18,,2020-03-23 18:05:00-04:00


In [7]:
df = df.reset_index(drop=True)
df.head()

Unnamed: 0,date,text,username,day_of_week,hour,safe,time_bin
0,2020-03-23 13:57:44-04:00,Omg @Instacart - y’all better not mess this up...,JordanGazay,Monday,13,,2020-03-23 13:50:00-04:00
1,2020-03-23 14:10:14-04:00,𝐐𝐔𝐄𝐒𝐓𝐈𝐎𝐍 | Are you moving for thirty minutes e...,lifenleggings,Monday,14,0.0,2020-03-23 14:05:00-04:00
2,2020-03-23 15:03:32-04:00,"#newnormal Day 8 Oh dayum, did Governor #andre...",miles_randy,Monday,15,,2020-03-23 14:50:00-04:00
3,2020-03-23 18:14:17-04:00,#isolation #prospectpark #prospectparkalliance...,andrewbwhite1,Monday,18,,2020-03-23 18:05:00-04:00
4,2020-03-23 18:15:25-04:00,#isolation #prospectpark #fishing #prospectpar...,andrewbwhite1,Monday,18,,2020-03-23 18:05:00-04:00


In [8]:
# Localize time bin (remove comparison to UTC)

time_bin = []
for i in range(0, len(df)):
    bn = df.iloc[i].time_bin.tz_convert(None) + pd.Timedelta(-4,'h')
    time_bin.append(bn)

df['time_bin'] = time_bin

In [9]:
df.head()

Unnamed: 0,date,text,username,day_of_week,hour,safe,time_bin
0,2020-03-23 13:57:44-04:00,Omg @Instacart - y’all better not mess this up...,JordanGazay,Monday,13,,2020-03-23 13:50:00
1,2020-03-23 14:10:14-04:00,𝐐𝐔𝐄𝐒𝐓𝐈𝐎𝐍 | Are you moving for thirty minutes e...,lifenleggings,Monday,14,0.0,2020-03-23 14:05:00
2,2020-03-23 15:03:32-04:00,"#newnormal Day 8 Oh dayum, did Governor #andre...",miles_randy,Monday,15,,2020-03-23 14:50:00
3,2020-03-23 18:14:17-04:00,#isolation #prospectpark #prospectparkalliance...,andrewbwhite1,Monday,18,,2020-03-23 18:05:00
4,2020-03-23 18:15:25-04:00,#isolation #prospectpark #fishing #prospectpar...,andrewbwhite1,Monday,18,,2020-03-23 18:05:00


In [10]:
df.to_pickle("./geotweets_labeled_binned.pkl")