In [10]:
import pandas as pd 
import numpy as np
import pygeohash as gh
import datetime

df = pd.read_csv('bird_data.csv')
df['geohash']= df.apply(lambda x: gh.encode(x.latitude, x.longitude, precision=6), axis=1) # 12 block radius # changed from 8 



In [11]:
#find the day of the week 
df['day_of_week'] = df['timestamp'].apply(lambda x:datetime.datetime.strptime(x,'%d-%b-%Y %H:%M:%S.%f').strftime('%w'))

In [12]:
df["day_of_week"] = pd.to_numeric(df["day_of_week"]) # convert to numeric 
df['weekend'] = df['day_of_week'].apply(lambda x: 1 if x == 0 or x == 6 else 0) # create weekend variable
df['low_battery'] = np.where(df['battery_level'] <= 90, 1, 0) # find low battery scooters 

In [13]:
# convert string to datetime string for parsing out the month, hour, and other times 
df.timestamp = pd.to_datetime(df.timestamp, format='%d-%b-%Y %H:%M:%S.%f')
df['month'] = df.timestamp.apply(lambda x: x.month)
df['day'] = df.timestamp.apply(lambda x: x.day)
df['hour'] = df.timestamp.apply(lambda x: x.hour)
df['minute'] = df.timestamp.apply(lambda x: x.minute)

In [14]:
#select columns for df 
bird_df = df[['bird_id','geohash', 'day_of_week', 'weekend', 'low_battery', 'month', 'day', 'hour', 'minute']]

In [15]:
bird_df.head()

Unnamed: 0,bird_id,geohash,day_of_week,weekend,low_battery,month,day,hour,minute
0,3ffc1a8b-a939-4db4-82fa-bd44ee18d3a4,9q59x9,2,0,1,6,11,19,35
1,6137c185-4c43-4a4f-8c9c-d1be75954d4a,9q59x9,2,0,1,6,11,19,35
2,da84d4b4-d3a6-405b-8fef-21c04f31ca74,9q59xd,2,0,0,6,11,19,35
3,73f1e864-c9f9-41b4-8381-d06b12c54364,9q59x9,2,0,1,6,11,19,35
4,29b37932-e2db-41b6-a117-355b14d3791e,9q59xd,2,0,1,6,11,19,35


In [16]:
bird_df.to_csv("bird_data-reformated.csv", index = None, header=True)

In [17]:
from rpy2.robjects.vectors import StrVector
import rpy2.robjects.packages as rpackages
import rpy2.robjects as robjects

def create_scooter_counts(data):
    base = rpackages.importr('base')
    packageNames = ('tidyverse')
    utils = rpackages.importr('utils')
    utils.chooseCRANmirror(ind=1)

    packnames_to_install = [x for x in packageNames if not rpackages.isinstalled(x)]

    if len(packnames_to_install) > 0:
        utils.install_packages(StrVector(packnames_to_install))
    tidyverse = rpackages.importr('tidyverse')
    robjects.r['options'](warn=-1)
    create_file = robjects.r('''
        function(results_file) {
            df <- read_csv(results_file, col_names = T)
            nightly_pick_up <- df  %>% filter(hour >= 21 & hour <= 23)  %>% filter(low_battery != 0)# filter by 9PM -11:55PM data 
            scooter_counts <- nightly_pick_up %>% group_by(geohash, day_of_week, weekend, day, hour, minute) %>% count() %>% arrange(desc(n)) %>% ungroup()
            write_csv(scooter_counts, "bird_data_pickups.csv", col_names = T)

        }
        ''')

    create_file(data)

In [18]:
create_scooter_counts("bird_data-reformated.csv") # make count data 

R[write to console]: Parsed with column specification:
cols(
  bird_id = col_character(),
  geohash = col_character(),
  day_of_week = col_integer(),
  weekend = col_integer(),
  low_battery = col_integer(),
  month = col_integer(),
  day = col_integer(),
  hour = col_integer(),
  minute = col_integer()
)

