In [None]:
# Dependencies
import pandas as pd
import datetime as dt
import numpy as np
import glob
import os
  
# Downloaded csv files saved outside respository due to size limits
# Please view readme for file locations
# Merging the files
joined_files = os.path.join("", "20220*.csv")
  
# A list of all joined files is returned
joined_list = glob.glob(joined_files)
  
# Files are joined
df = pd.concat(map(pd.read_csv, joined_list), ignore_index=True)

In [None]:
# Initial set up and cleaning
df.dropna(how='any')
df['started_at'] = pd.to_datetime(df['started_at'])
df['ended_at'] = pd.to_datetime(df['ended_at'])
df['trip_duration'] = (df['ended_at'] - df['started_at']).dt.total_seconds()/60
df['month'] = pd.to_datetime(df['started_at']).dt.to_period('M')
df['day_of_week'] = df['started_at'].dt.day_name()
df = df.loc[df['trip_duration'] < 61, :]
df = df[df['rideable_type'] != 'docked_bike']
df['start_lat'] = df['start_lat'].round(3)
df['start_lng'] = df['start_lng'].round(3)
df['end_lat'] = df['end_lat'].round(3)
df['end_lng'] = df['end_lng'].round(3)

In [None]:
# Creating time bins for later grouping
bins = [0,10,20,30,40,50,120]
labels = ['0-10min', '11-20min', '21-30min', '31-40min', '41-50min', '51min+']
df['time_bin'] = pd.cut(df['trip_duration'], bins, labels = labels)

In [None]:
# Shape of dataframe to better grasp size
df.shape

In [None]:
# Copying df to maintain a clean base
df2 = df.copy()
df2 = df2.dropna(how='any')

In [None]:
# Station summary data set up
# Grouping by month/name
df_sstation = df.groupby([df2['month'], df2['start_station_name'].rename('station')])
# Counting number of start and end stations
sstation = df_sstation['start_station_name'].count().rename('scount')
sstation_duration = df_sstation['trip_duration'].mean().rename('scount_duration')
# Df for counts
df_start = pd.DataFrame(sstation)
df_sduration = pd.DataFrame(sstation_duration)

In [None]:
# Merging two df together
station_summary = pd.merge(df_start, df_sduration, how='outer', on=['month', 'station'])
# Creating the df and making na values 0
df_station_summary = pd.DataFrame(station_summary)
df_station_summary = df_station_summary.fillna(0)

In [None]:
# Creating CSV for Tableau
df_station_summary.to_csv('Resources/station_summary.csv')

In [None]:
# Lat/Lng table for starting location
df_slocation = df2.filter(['start_station_name','start_lat','start_lng'], axis = 1).copy()
df_slocation = df_slocation.sort_values('start_station_name')
df_slocation = df_slocation.dropna(how='any')
df_slocation_unique = df_slocation.drop_duplicates(subset = 'start_station_name', keep = 'first')

In [None]:
# Creating CSV for Tableau
df_slocation_unique.to_csv('Resources/slocation_table.csv')

In [None]:
# Member/Casual summary
df_member = df2.groupby([df2['month'], df2['started_at'].dt.strftime('%m/%d/%Y').rename('Date'), df2['member_casual']])
member_total = df_member['start_station_name'].count()
member_duration = df_member['trip_duration'].mean().round(2)

In [None]:
# Creating df 
member_table = pd.DataFrame({
    'member_total': member_total,
    'member_duration': member_duration
})

In [None]:
# Creating CSV for Tableau
member_table.to_csv('Resources/member_table.csv')

In [None]:
# Average duration and totals by bike type and member
df_type = df2.groupby([df2['month'], df2['rideable_type'], df2['member_casual']])
type_total = df_type['start_station_name'].count()
type_duration = df_type['trip_duration'].mean().round(2)

In [None]:
# Creating df 
type_table2 = pd.DataFrame({
    'type_total': type_total,
    'type_duration': type_duration    
})

In [None]:
# Creating CSV for Tableau
type_table2.to_csv('Resources/type_duration.csv')

In [None]:
# Total counts by DOW and average trip times
df_dayofweek = df2.groupby([df2['month'], df2['start_station_name'], df2['started_at'].dt.hour.rename('hour'), df2['day_of_week']])
dow_total = df_dayofweek['day_of_week'].count().rename('counts')
dow_duration = df_dayofweek['trip_duration'].mean().round(2)

In [None]:
# Creating df
dow_duration_table = pd.DataFrame({
    'dow_total': dow_total,
    'dow_duration': dow_duration,
})

In [None]:
# Creating CSV for Tableau
dow_duration_table.to_csv('Resources/dow_duration_table.csv')

In [None]:
# Total counts by station and time bins
df_bin_count = df2.groupby([df2['month'], df2['start_station_name'], df2['time_bin']])
dow_bin_total = df_bin_count['started_at'].count().rename('counts')

In [None]:
# Creating CSV for Tableau
dow_bin_total.to_csv('Resources/dow_bin_total.csv')