The below file reads in 9 csv files into pandas dataframes,
    prints number of rows and columns in each dataframe,
    prints the column names for each dataframe,
    prints individual and total number of rides
    prints number of bikes belonging to each company,
    prints average ride distance per company (in feet)
    plots starting & ending latitude and longitude for each bike read for each company

### Exploratory Data Analysis of Bike Trips Pilot



In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

%matplotlib inline

#### Loading CSVs into Dataframe

In [None]:
def read_csv (input_file):
    '''
    Reads CSV file and returns pandas dataframe
    '''
    return pd.read_csv(input_file)

In [None]:
lime_customer = read_csv('Lime_May Customer Report.csv')
lime_maintenance = read_csv('Lime_May Maintenance.csv')
lime_trips = read_csv('Lime_May Trips.csv')

ofo_customer = read_csv('ofo-reports-06112018.csv')
ofo_maintenance = read_csv('ofo-maintenance-06112018.csv')
ofo_trips = read_csv('ofo-trips-06112018.csv')


zagster_customer = read_csv('zagster-reports-201805.csv')
zagster_maintenance = read_csv('zagster-maintenance-201805.csv')
zagster_trips = read_csv('zagster-trips-201805.csv')

In [None]:
list_of_dataframes = [lime_customer, lime_maintenance, lime_trips, ofo_customer, 
                      ofo_maintenance, ofo_trips, zagster_customer, zagster_maintenance, zagster_trips]

string_list_of_dataframes = ['lime_customer', 'lime_maintenance', 'lime_trips', 'ofo_customer', 
                      'ofo_maintenance', 'ofo_trips', 'zagster_customer', 'zagster_maintenance', 'zagster_trips']

##### Basic Exploration of All Dataframes

In [None]:
def info (df):
    '''
    Given a dataframe, prints information of the dataset
        
    input:
        dataframe
    '''
    print ('DATASET INFORMATION \n') 
    print (df.info(), '\n \n \n')


In [None]:
# number of rows and columns excluding header and index

for count, item in enumerate(string_list_of_dataframes):
    print (item)
    print ('{}{:>30}'.format('NUMBER OF ROWS', 'NUMBER OF COLUMNS'))
    print ('{}{:>30}'.format(list_of_dataframes[count].shape[0] , list_of_dataframes[count].shape[1]))
    print ('')


In [None]:
# column names in each dataframe

for count, item in enumerate(string_list_of_dataframes):
    print (item)
    print (list_of_dataframes[count].columns)
    print ()

Questions:

- Total Number of Rides
- Rides by each company
- Average ride distance
- Number of bikes of each company


In [None]:
num_lime_trip = lime_trips['TRIP_ID'].nunique() 
num_ofo_trip = ofo_trips['Trip ID'].nunique() 
num_zagster_trips = zagster_trips['Trip ID'].nunique() 

total_ride_count = num_lime_trip + num_ofo_trip + num_zagster_trips

print (num_lime_trip)
print (num_ofo_trip)
print (num_zagster_trips)
print (total_ride_count)

In [None]:
# Average distance (in feet) of each bike ride by company
print (lime_trips['DISTANCE_FEET'].mean())
print (ofo_trips['Trip Distance'].mean())
print (zagster_trips['Trip Distance'].mean())

In [None]:
# Number of bikes per company
print (lime_trips['BIKE_ID'].nunique())
print (ofo_trips['Bike ID'].nunique())
print (zagster_trips['Bike ID'].nunique())

### Visual Plotting

In [None]:
def plotting_lat_long(x_axis,y_axis, dataframe):
    '''
    Given an x (latitude) and y (longitude) axis, and the dataframe,
        plots a distribution of those latitude and longitude points
    '''
    plt.figure(figsize=(8,8))
    sns.jointplot( x = x_axis, y = y_axis, data = dataframe )
    plt.show()
    plt.close()

In [None]:
print ('LIME TRIPS LATITUDE & LONGITUDE DISTRIBUTION \n')
plotting_lat_long('START_LATITUDE', 'START_LONGITUDE', lime_trips[['START_LATITUDE','START_LONGITUDE']]) 
plotting_lat_long('END_LATITUDE', 'END_LONGITUDE', lime_trips[['END_LATITUDE','END_LONGITUDE']]) 

In [None]:
print ('OFO TRIPS LATITUDE & LONGITUDE DISTRIBUTION \n')
plotting_lat_long('From Latitude', 'From Longitude', ofo_trips[['From Latitude','From Longitude']]) 
plotting_lat_long('To Latitude', 'To Longitude', ofo_trips[['To Latitude','To Longitude']]) 

In [None]:
print ('ZAGSTER TRIPS LATITUDE & LONGITUDE DISTRIBUTION \n')
plotting_lat_long('From Latitude', 'From Longitude', zagster_trips[['From Latitude','From Longitude']]) 
plotting_lat_long('To Latitude', 'To Longitude', zagster_trips[['To Latitude','To Longitude']]) 