In [3]:
import pymongo
from pymongo import MongoClient, UpdateOne
import json

# Adding for query to find top ten stations 
#from pymongo.collection import Collection
#from pymongo.aggregation import Aggregation

In [4]:
# Create an instance of MongoClient
mongo = MongoClient(port=27017)

In [5]:
# confirm that our new database was created
print(mongo.list_database_names())

['admin', 'autosaurus', 'chicago_bikes', 'classDB', 'config', 'epa', 'fruits_db', 'gardenDB', 'local', 'met', 'travel_db', 'uk_food']


In [6]:
# assign the database to a variable name
db = mongo.chicago_bikes

In [7]:
# review the collections in our new database
print(db.list_collection_names())

['weather_daily', 'withLatLong', 'Top10Routes', 'divvy_rides_by_season', 'Top10EndStations', 'withStationName', 'withoutStationName', 'Top10StartStations', 'divvy_ridedata', 'divvy_rides_by_month', 'divvy_ridedata_merged', 'RouteDistance']


In [8]:
#define collection variables
divvy_rides = db['divvy_ridedata']
weather_daily = db['weather_daily']
divvy_ridedata_merged = db['divvy_ridedata_merged']

In [9]:
# Use aggregation pipeline to create a collection that contains start and end station names
pipeline = [
         {"$match": {"start_station_name": {"$exists": True, "$ne": ""}, 
                     "end_station_name":{"$exists": True, "$ne": ""}}},

         {"$out": "withStationName"}
]

# Perform the aggregation
result = list(divvy_ridedata_merged.aggregate(pipeline))

# Assign to a variable
withStation = db["withStationName"]

In [10]:
#check if there are any documents without start or end station names
print(db.withStationName.find_one({"start_station_name":""}))
print(db.withStationName.find_one({"end_station_name":""}))

None
None


In [9]:
# Use aggregation pipeline to create a collection that doesn't contain start and end station names
pipeline = [
         {"$match": {"start_station_name": {"$exists": True, "$eq": ""}, 
                     "end_station_name":{"$exists": True, "$eq": ""}}},

         {"$out": "withoutStationName"}
]

# Perform the aggregation
result = list(divvy_ridedata_merged.aggregate(pipeline))

# Assign to a variable
withoutStation = db["withoutStationName"]

In [10]:
#check if there are any documents without start or end station names
print(db.withoutStationName.find_one({"start_station_name":{"$ne":""}}))
print(db.withoutStationName.find_one({"end_station_name":{"$ne":""}}))

None
None


In [11]:
# Use aggregation pipeline to find top ten start stations 
pipeline = [
    {
        "$group": {
            "_id": "$start_station_name",
            "count": {"$sum": 1},
            "latitude": {"$first": "$end_lat"},
            "longitude": {"$first": "$end_lng"}
        }
    },
    {
        "$sort": {"count": -1}
    },
    {
        "$limit": 10
    },
    {   "$out": "Top10StartStations"
}
]

# Perform the aggregation
result = list(withStation.aggregate(pipeline))

# Assign collection to a variable
Top10StartStations = db["Top10StartStations"]

In [12]:
# Check to see that the collection has been created
db.list_collection_names()

['Top10Routes',
 'weather_daily',
 'withStationName',
 'withoutStationName',
 'Top10EndStations',
 'Top10StartStations',
 'divvy_ridedata',
 'divvy_ridedata_merged']

In [13]:
# Find an example from the collection
Top10StartStations.find_one()

{'_id': 'Streeter Dr & Grand Ave',
 'count': 71269,
 'latitude': 41.880958,
 'longitude': -87.616743}

In [14]:
# Use aggregation pipeline to find top ten end stations 
pipeline = [
    {
        "$group": {
            "_id": "$end_station_name",
            "count": {"$sum": 1},
            "latitude": {"$first": "$end_lat"},
            "longitude": {"$first": "$end_lng"}
        }
    },
    {
        "$sort": {"count": -1}
    },
    {
        "$limit": 10
    },
    {   "$out": "Top10EndStations"
}
]

# Perform the aggregation
result = list(withStation.aggregate(pipeline))

# Assign to a variable
Top10EndStations = db['Top10EndStations']

In [15]:
# Check to see that the collection has been created
db.list_collection_names()

['Top10Routes',
 'weather_daily',
 'Top10EndStations',
 'withStationName',
 'withoutStationName',
 'Top10StartStations',
 'divvy_ridedata',
 'divvy_ridedata_merged']

In [16]:
# Find an example from the collection
Top10EndStations.find_one()

{'_id': 'Streeter Dr & Grand Ave',
 'count': 72540,
 'latitude': 41.892278,
 'longitude': -87.612043}

In [11]:
# Create a pipeline query to find the top ten bike routes (by start and end station)
pipeline = [
    {
        "$group": {
            "_id": { "Start Station": "$start_station_name", "End Station": "$end_station_name"},
            "count": {"$sum": 1},
            "start latitude": {"$first": "$start_lat"},
            "start longitude": {"$first": "$start_lng"},
            "end latitude": {"$first": "$end_lat"},
            "end longitude": {"$first": "$end_lng"}
        }
    },
    {"$sort": {"count": -1}
},
    {
        "$limit": 10
},
    {   "$out": "Top10Routes"
}
]
# Perform the aggregation
result = list(withStation.aggregate(pipeline))

# Assign to a variable
Top10Routes = db['Top10Routes']

In [12]:
# Check to see that the collection was added
db.list_collection_names()

['weather_daily',
 'withLatLong',
 'withStationName',
 'Top10Routes',
 'divvy_rides_by_season',
 'Top10EndStations',
 'withoutStationName',
 'Top10StartStations',
 'divvy_ridedata',
 'divvy_rides_by_month',
 'divvy_ridedata_merged',
 'RouteDistance']

In [13]:
# Find an example from the collection
Top10Routes.find_one()

{'_id': {'Start Station': 'Streeter Dr & Grand Ave',
  'End Station': 'Streeter Dr & Grand Ave'},
 'count': 12202,
 'start latitude': 41.892278,
 'start longitude': -87.612043,
 'end latitude': 41.892278,
 'end longitude': -87.612043}