In [1]:
import pymongo
from pymongo import MongoClient, UpdateOne
import json

# Adding for query to find top ten stations 
#from pymongo.collection import Collection
#from pymongo.aggregation import Aggregation

In [2]:
# Create an instance of MongoClient
mongo = MongoClient(port=27017)

In [3]:
# check our list of databases
print(mongo.list_database_names())

['admin', 'autosaurus', 'chicago_bikes', 'classDB', 'config', 'epa', 'fruits_db', 'gardenDB', 'local', 'met', 'travel_db', 'uk_food']


In [4]:
# assign the database to a variable name
db = mongo.chicago_bikes

In [5]:
# review the collections in our new database
print(db.list_collection_names())

['weather_daily', 'Top10Routes', 'withLatLong', 'Top10EndStations', 'withStationName', 'withoutStationName', 'RouteDistance', 'Top10StartStations', 'divvy_ridedata', 'divvy_ridedata_merged']


In [6]:
divvy_rides = db['divvy_ridedata']
weather_daily = db['weather_daily']
withStation = db['withStationName']
withoutStation = db['withoutStationName']
divvy_ridedata_merged = db['divvy_ridedata_merged']

In [7]:
# Create a pipeline query to find docouments that have lat/long  
pipeline = [
    {
        "$match": {
            "$and": [
                { "start_lat": { "$ne": "" } },
                { "start_lat": { "$ne": 0 } },
                { "start_lng": { "$ne": "" } },
                { "start_lng": { "$ne": 0 } },
                { "end_lat": { "$ne": "" } },
                { "end_lat": { "$ne": 0 } },
                { "end_lng": { "$ne": "" } },
                { "end_lng": { "$ne": 0 } }
            ]
        }
    }, 
    {"$out": "withLatLong"}
]

# Perform the aggregation
result = list(divvy_ridedata_merged.aggregate(pipeline))

In [8]:
# review the collections in our new database
print(db.list_collection_names())

['weather_daily', 'withLatLong', 'Top10Routes', 'Top10EndStations', 'withStationName', 'withoutStationName', 'RouteDistance', 'Top10StartStations', 'divvy_ridedata', 'divvy_ridedata_merged']


In [9]:
#Assign collection to a variable
withLatLong = db['withLatLong']

In [10]:
print(withLatLong.find_one())

{'_id': ObjectId('64e404c003505f880eefb45e'), 'end_lat': 42.01256011541, 'end_lng': -87.6743671152, 'end_station_id': 'RP-007', 'end_station_name': 'Clark St & Touhy Ave', 'ended_at': '2022-01-10 08:46:17', 'ended_at_date': '2022-01-10', 'ended_at_time': '08:46:17', 'member_casual': 'casual', 'ride_id': 'A6CF8980A652D272', 'rideable_type': 'electric_bike', 'start_lat': 42.012763, 'start_lng': -87.6659675, 'start_station_id': 525, 'start_station_name': 'Glenwood Ave & Touhy Ave', 'started_at': '2022-01-10 08:41:56', 'started_at_date': '2022-01-10', 'started_at_time': '08:41:56', 'weather_data': {'_id': ObjectId('64e413e0cf380c0c3c2737eb'), 'date': '2022-01-10', 'cloud_cover': 17.0, 'precipitation': 0.24, 'min_temp': 9.5, 'max_temp': 20.84, 'morning_temp': 13.69, 'afternoon_temp': 9.73, 'evening_temp': 14.0, 'night_temp': 20.75, 'max_windspeed': 16.35}}


In [11]:
# Create a pipeline query to find distance of each route in descending order by length  
pipeline = [
    {
        "$addFields": {
            "start_lat": { "$toDouble": "$start_lat" },
            "start_lng": { "$toDouble": "$start_lng" },
            "end_lat": { "$toDouble": "$end_lat" },
            "end_lng": { "$toDouble": "$end_lng" }
        }
    },
    {
        "$addFields": {
            "distance": {
                "$sqrt": {
                    "$add": [
                        {
                            "$pow": [
                                { "$subtract": ["$end_lat", "$start_lat"] },
                                2
                            ]
                        },
                        {
                            "$pow": [
                                {
                                    "$multiply": [
                                        { "$subtract": ["$end_lng", "$start_lng"] },
                                        { "$cos": { "$avg": ["$start_lat", "$end_lat"] } }
                                    ]
                                },
                                2
                            ]
                        }
                    ]
                }
            }
        }
    },
    {
        "$sort": {"distance": -1}
    },
    {"$out": "RouteDistance"}
]

# Perform the aggregation
result = list(withLatLong.aggregate(pipeline))

# Assign to a variable
RouteDistance = db['RouteDistance']

In [12]:
# review the collections in our new database
print(db.list_collection_names())

['weather_daily', 'withLatLong', 'Top10Routes', 'Top10EndStations', 'withStationName', 'withoutStationName', 'Top10StartStations', 'divvy_ridedata', 'divvy_ridedata_merged', 'RouteDistance']


In [13]:
RouteDistance = db['RouteDistance']

In [14]:
print(RouteDistance.find_one())

{'_id': ObjectId('64e404c003505f880eefe24f'), 'end_lat': 41.8646, 'end_lng': -87.681, 'end_station_id': 'Pawel Bialowas - Test- PBSC charging station', 'end_station_name': 'Pawel Bialowas - Test- PBSC charging station', 'ended_at': '2022-01-14 11:15:50', 'ended_at_date': '2022-01-14', 'ended_at_time': '11:15:50', 'member_casual': 'casual', 'ride_id': '3327172413547F64', 'rideable_type': 'electric_bike', 'start_lat': 45.635034323, 'start_lng': -73.79647696, 'start_station_id': 'Pawel Bialowas - Test- PBSC charging station', 'start_station_name': 'Pawel Bialowas - Test- PBSC charging station', 'started_at': '2022-01-14 11:13:15', 'started_at_date': '2022-01-14', 'started_at_time': '11:13:15', 'weather_data': {'_id': ObjectId('64e413e0cf380c0c3c2737f1'), 'date': '2022-01-14', 'cloud_cover': 34.0, 'precipitation': 0.0, 'min_temp': 30.38, 'max_temp': 38.8, 'morning_temp': 35.74, 'afternoon_temp': 35.15, 'evening_temp': 32.5, 'night_temp': 38.39, 'max_windspeed': 10.0}, 'distance': 14.027235