# Step 2 - Create Database with Pymongo

In [1]:
# Import Dependencies 
import pandas as pd
import pymongo

## DATA COLLECTION

In [2]:
files = ['Output/q219.csv','Output/q319.csv','Output/q220.csv','Output/q320.csv']
averages = []
lengths = []
trips = []
passholder_type = []
bike_type = []
for file in files:
    
    data_df = pd.read_csv(file, index_col=None)
    averages.append(data_df['duration'].mean())
    lengths.append(len(data_df))
    trips.append(data_df['trip_route_category'].value_counts())
    passholder_type.append(data_df['passholder_type'].value_counts())
    bike_type.append(data_df['bike_type'].value_counts())
    print("calculating....", file)
    


calculating.... Output/q219.csv
calculating.... Output/q319.csv
calculating.... Output/q220.csv
calculating.... Output/q320.csv


In [3]:
trips_df = pd.DataFrame(trips).T
trips_df.columns =[ "Q219","Q319","Q220","Q320"]
trips_df.head()

Unnamed: 0,Q219,Q319,Q220,Q320
One Way,189886,253109,147422,233699
Round Trip,16468,22088,39164,36286


In [4]:
passholder_type_df = pd.DataFrame(passholder_type).T
passholder_type_df.columns =[ "Q219","Q319","Q220","Q320"]
passholder_type_df.head()

Unnamed: 0,Q219,Q319,Q220,Q320
Indego30,133344.0,193213.0,129905.0,203931.0
Indego365,37843.0,47471.0,18515.0,30524.0
Day Pass,34197.0,34458.0,38165.0,35530.0
IndegoFlex,851.0,55.0,1.0,
Walk-up,84.0,,,


In [5]:
bike_type_df = pd.DataFrame(bike_type).T
bike_type_df.columns =[ "Q219","Q319","Q220","Q320"]
bike_type_df.head()

Unnamed: 0,Q219,Q319,Q220,Q320
standard,193132,215693,138198,209025
electric,13222,59504,48388,60960


In [6]:
duration_avg = pd.DataFrame([averages, lengths])
duration_avg.columns =["Q219","Q319","Q220","Q320"]
duration_avg.head()

Unnamed: 0,Q219,Q319,Q220,Q320
0,23.67407,26.047871,39.236009,27.292601
1,206354.0,275197.0,186586.0,269985.0


In [7]:
duration_avg = duration_avg.rename(index ={0:'Average_Duration', 1:'Total_Trips'})

In [8]:
step1_df = passholder_type_df.append(bike_type_df)

In [9]:
step2_df = step1_df.append(duration_avg)

In [10]:
summary_df = step2_df.append(trips_df)

In [11]:
summary_df = summary_df.reset_index()

In [17]:
summary_df = summary_df.rename(columns={'index':'labels'})

In [18]:
summary_df.head()

Unnamed: 0,labels,Q219,Q319,Q220,Q320
0,Indego30,133344.0,193213.0,129905.0,203931.0
1,Indego365,37843.0,47471.0,18515.0,30524.0
2,Day Pass,34197.0,34458.0,38165.0,35530.0
3,IndegoFlex,851.0,55.0,1.0,
4,Walk-up,84.0,,,


In [13]:
#summary_df.to_csv('Output/summary_correct.csv')

# Create Database Using Pymongo

In [19]:
#Establish connection to mongo db
conn = 'mongodb://localhost:27017'
client = pymongo.MongoClient(conn)

In [20]:
#Create the database indego_db
db = client.indego_db

In [21]:
#Setup collections, if collections exist drop them start anew, 
db.summary.drop()

## Summary convert

In [22]:
#Convert pandas dataframe to dictionary, orient as a record, keeps a copy
df_dict = summary_df.to_dict(orient='records').copy()

In [23]:
#Insert the dictionary into collection
db.summary.insert_many(df_dict)

<pymongo.results.InsertManyResult at 0x7ff7922f0648>

In [24]:
#print documents
cursor = db.summary.find({},limit=2)
for document in cursor:
    print(document)

{'_id': ObjectId('601b4ae7c12bea86bd79e203'), 'labels': 'Indego30', 'Q219': 133344.0, 'Q319': 193213.0, 'Q220': 129905.0, 'Q320': 203931.0}
{'_id': ObjectId('601b4ae7c12bea86bd79e204'), 'labels': 'Indego365', 'Q219': 37843.0, 'Q319': 47471.0, 'Q220': 18515.0, 'Q320': 30524.0}


## Read Collection into a Dataframe

In [None]:
#Read tripQ219 collection from mongodb into a dataframe tripQ219_df
summary_df = pd.DataFrame(list(db.summary.find({})))
summary_df

In [None]:
db.summary.find({})

In [None]:
#

data = []

for doc in list(db.summary.find()):
    row = {}
    row['labels'] = doc['labels']
    row['Q219'] = doc['Q219']
    row['Q220'] = doc['Q220']
    row['Q320'] = doc['Q320']
    row['Q319'] = doc['Q319']
    data.append(row)

print(data)
    

In [None]:
list(db.summary.find())