# Objective - Merge Dataframes from all analyses, output as JSON file

In [10]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import pymongo
import json

# Import cleaned CSV files from Resources folder

In [11]:
# Read in CSV files from Resources folder
crime_df = pd.read_csv('Resources/chicago_crime_ouput.csv')
weather_df = pd.read_csv('Resources/chicago_weather_ouput.csv')
traffic_df = pd.read_csv('Resources/Chicago_Monthly_Traffic_Crashes_2017-2018.csv')
visitors_df = pd.read_csv('Resources/chicago_visitors.csv')

visitors_df.head()

Unnamed: 0,Dates,Total Visitors
0,2017-01-01,668436
1,2017-02-01,629435
2,2017-03-01,727423
3,2017-04-01,688882
4,2017-05-01,781547


# Merge Crime and Weather outputs

In [12]:
#Rename chicago_weather column from 'dates' to 'Dates'
weather_df.rename(columns={'dates': 'Dates'}, inplace=True)

#PD Merge
chicago_master_output = pd.merge(crime_df, weather_df,  on='Dates')
chicago_master_output.head()

Unnamed: 0,Dates,Robbery,Assault,Sexual Assault,max_temp,min_temp,winds
0,2017-01,1075,1373,154,32.194971,27.789326,4.018817
1,2017-02,787,1295,124,39.188188,35.333724,4.34375
2,2017-03,783,1486,105,42.042916,36.36469,4.955645
3,2017-04,851,1638,146,57.387495,50.449995,4.586111
4,2017-05,903,1853,133,61.430484,54.624839,4.419355


# Merge traffic_df with chicago_master_output

In [13]:
#Strip down the traffic_df dates to yyyy-mm format
dates=[]
for row in traffic_df['Dates']:
    dates.append(row[:7])
traffic_df['Date'] = dates

#Rename chicago_weather column from 'dates' to 'Dates'
traffic_df = traffic_df.drop(['Dates'], axis=1)
traffic_df.rename(columns={'Date': 'Dates'}, inplace=True)
traffic_df.head()

#PD Merge with chicago_master
chicago_master_output = pd.merge(chicago_master_output, traffic_df,  on='Dates')
chicago_master_output.head()

Unnamed: 0,Dates,Robbery,Assault,Sexual Assault,max_temp,min_temp,winds,Crashes
0,2017-01,1075,1373,154,32.194971,27.789326,4.018817,8670
1,2017-02,787,1295,124,39.188188,35.333724,4.34375,8274
2,2017-03,783,1486,105,42.042916,36.36469,4.955645,10220
3,2017-04,851,1638,146,57.387495,50.449995,4.586111,10114
4,2017-05,903,1853,133,61.430484,54.624839,4.419355,11727


# Merge visitors_df with chicago_master_output

In [14]:
#Strip down the traffic_df dates to yyyy-mm format
dates=[]
for row in visitors_df['Dates']:
    dates.append(row[:7])
visitors_df['Dates'] = dates

chicago_master_output = pd.merge(chicago_master_output, visitors_df,  on='Dates')
chicago_master_output.head()

Unnamed: 0,Dates,Robbery,Assault,Sexual Assault,max_temp,min_temp,winds,Crashes,Total Visitors
0,2017-01,1075,1373,154,32.194971,27.789326,4.018817,8670,668436
1,2017-02,787,1295,124,39.188188,35.333724,4.34375,8274,629435
2,2017-03,783,1486,105,42.042916,36.36469,4.955645,10220,727423
3,2017-04,851,1638,146,57.387495,50.449995,4.586111,10114,688882
4,2017-05,903,1853,133,61.430484,54.624839,4.419355,11727,781547


# Covert chicago_master_output to JSON file

In [15]:
chicago_master_output.to_json(r'Resources\chicago_final.json', orient='index')

# Import JSON file, write to MongoDB

In [25]:
#Convert merged output dataframe to a dictionary
chicago_dict = chicago_master_output.to_dict(orient='list')
chicago_dict

# Initialize PyMongo to work with MongoDBs
conn = 'mongodb://localhost:27017'
client = pymongo.MongoClient(conn)

# Define Mongo Database name, collection
db = client.etl_project_chicago_db
collection = db.items

# Update the Mongo database using update and upsert=True
collection.insert_one(chicago_dict)


<pymongo.results.InsertOneResult at 0x209ed9a1f88>