In [1]:
import pandas as pd
from sqlalchemy import create_engine
import pymysql
pymysql.install_as_MySQLdb()
import json
import pprint

from sqlalchemy.ext.declarative import declarative_base
Base = declarative_base()
from config import dbuser, dbpasswd, dburi, dbport, dbname

### Store CSV into DataFrame

In [2]:
csv_file = "KCPD_Crime_Data_2018_cleaned.csv"
kcmo_data_df = pd.read_csv(csv_file)
kcmo_data_df.head()

Unnamed: 0,#,Report_No,Reported_Date,Reported_Time,Offense,Description,Address,City,Zip_Code,Area,...,Involvement,Race,Sex,Age,Firearm_Used_Flag,Location,Lat_Lng,Lat,Lng,18325
0,2,180097829,12/20/2018,15:52,401,Aggravated Assault,500 BOOTH AV,KANSAS CITY,64124,EPD,...,SUS,W,M,18,N,"500 BOOTH AV\nKANSAS CITY 64124\n(39.105511, -...","39.105511, -94.482144",39.105511,-94.482144,
1,4,180097829,12/20/2018,15:52,401,Aggravated Assault,500 BOOTH AV,KANSAS CITY,64124,EPD,...,SUS,P,M,18,N,"500 BOOTH AV\nKANSAS CITY 64124\n(39.105511, -...","39.105511, -94.482144",39.105511,-94.482144,
2,9,180093115,12/3/2018,17:07,401,Aggravated Assault,4100 FLORA AV,KANSAS CITY,64110,CPD,...,SUS,W,F,18,Y,"4100 FLORA AV\nKANSAS CITY 64110\n(39.052161, ...","39.052161, -94.566533",39.052161,-94.566533,
3,12,180091999,11/29/2018,14:11,401,Aggravated Assault,900 E 9 ST,KANSAS CITY,64108,CPD,...,SUS,W,M,18,N,"900 E 9 ST\nKANSAS CITY 64108\n(39.103158, -94...","39.103158, -94.57202",39.10315,-94.57202,
4,17,180089325,11/18/2018,12:28,401,Aggravated Assault,6000 E 10 ST,KANSAS CITY,64126,EPD,...,SUS,U,M,18,Y,"6000 E 10 ST\nKANSAS CITY 64126\n(39.100277, -...","39.100277, -94.511176",39.100277,-94.511176,


### Create new data with select columns

In [3]:
new_kcmo_data_df = kcmo_data_df[['Report_No', 'Reported_Date', 'Reported_Time', 'Offense', \
                                 'Description', 'Address', 'City', 'Zip_Code', 'Area', \
                                 'Invl_No', 'Involvement', 'Race', 'Sex', 'Age', \
                                 'Firearm_Used_Flag', 'Location', 'Lat_Lng', 'Lat', 'Lng']].copy()
new_kcmo_data_df.head()

Unnamed: 0,Report_No,Reported_Date,Reported_Time,Offense,Description,Address,City,Zip_Code,Area,Invl_No,Involvement,Race,Sex,Age,Firearm_Used_Flag,Location,Lat_Lng,Lat,Lng
0,180097829,12/20/2018,15:52,401,Aggravated Assault,500 BOOTH AV,KANSAS CITY,64124,EPD,2,SUS,W,M,18,N,"500 BOOTH AV\nKANSAS CITY 64124\n(39.105511, -...","39.105511, -94.482144",39.105511,-94.482144
1,180097829,12/20/2018,15:52,401,Aggravated Assault,500 BOOTH AV,KANSAS CITY,64124,EPD,1,SUS,P,M,18,N,"500 BOOTH AV\nKANSAS CITY 64124\n(39.105511, -...","39.105511, -94.482144",39.105511,-94.482144
2,180093115,12/3/2018,17:07,401,Aggravated Assault,4100 FLORA AV,KANSAS CITY,64110,CPD,1,SUS,W,F,18,Y,"4100 FLORA AV\nKANSAS CITY 64110\n(39.052161, ...","39.052161, -94.566533",39.052161,-94.566533
3,180091999,11/29/2018,14:11,401,Aggravated Assault,900 E 9 ST,KANSAS CITY,64108,CPD,2,SUS,W,M,18,N,"900 E 9 ST\nKANSAS CITY 64108\n(39.103158, -94...","39.103158, -94.57202",39.10315,-94.57202
4,180089325,11/18/2018,12:28,401,Aggravated Assault,6000 E 10 ST,KANSAS CITY,64126,EPD,1,SUS,U,M,18,Y,"6000 E 10 ST\nKANSAS CITY 64126\n(39.100277, -...","39.100277, -94.511176",39.100277,-94.511176


### Connect to local database

In [5]:
engine = create_engine(f"mysql://{dbuser}:{dbpasswd}@{dburi}:{dbport}/{dbname}")
Base.metadata.create_all(engine)

### Check for tables

In [6]:
engine.table_names()

['crime_data_2018']

### Use pandas to load csv converted DataFrame into database

In [7]:
new_kcmo_data_df.to_sql(name='crime_data_2018', con=engine, if_exists='replace', index=False)

In [8]:
pd.read_sql_query('select * from crime_data_2018', con=engine).head()

Unnamed: 0,Report_No,Reported_Date,Reported_Time,Offense,Description,Address,City,Zip_Code,Area,Invl_No,Involvement,Race,Sex,Age,Firearm_Used_Flag,Location,Lat_Lng,Lat,Lng
0,180097829,12/20/2018,15:52,401,Aggravated Assault,500 BOOTH AV,KANSAS CITY,64124,EPD,2,SUS,W,M,18,N,"500 BOOTH AV\nKANSAS CITY 64124\n(39.105511, -...","39.105511, -94.482144",39.105511,-94.482144
1,180097829,12/20/2018,15:52,401,Aggravated Assault,500 BOOTH AV,KANSAS CITY,64124,EPD,1,SUS,P,M,18,N,"500 BOOTH AV\nKANSAS CITY 64124\n(39.105511, -...","39.105511, -94.482144",39.105511,-94.482144
2,180093115,12/3/2018,17:07,401,Aggravated Assault,4100 FLORA AV,KANSAS CITY,64110,CPD,1,SUS,W,F,18,Y,"4100 FLORA AV\nKANSAS CITY 64110\n(39.052161, ...","39.052161, -94.566533",39.052161,-94.566533
3,180091999,11/29/2018,14:11,401,Aggravated Assault,900 E 9 ST,KANSAS CITY,64108,CPD,2,SUS,W,M,18,N,"900 E 9 ST\nKANSAS CITY 64108\n(39.103158, -94...","39.103158, -94.57202",39.10315,-94.57202
4,180089325,11/18/2018,12:28,401,Aggravated Assault,6000 E 10 ST,KANSAS CITY,64126,EPD,1,SUS,U,M,18,Y,"6000 E 10 ST\nKANSAS CITY 64126\n(39.100277, -...","39.100277, -94.511176",39.100277,-94.511176


### Confirm data and write full dataset to a file in JSON format

In [9]:
temp_json = new_kcmo_data_df.head(3).to_json(orient='records')
new_kcmo_data_df.to_json(orient='records', path_or_buf='kcpd_crime.json')

### Write the temp data to confirm it worked

In [10]:
parsed = json.loads(temp_json)
print(json.dumps(parsed, indent=4, sort_keys=True))
# pprint(temp_json)



[
    {
        "Address": "500  BOOTH AV",
        "Age": 18,
        "Area": "EPD",
        "City": "KANSAS CITY",
        "Description": "Aggravated Assault",
        "Firearm_Used_Flag": "N",
        "Invl_No": 2,
        "Involvement": "SUS",
        "Lat": 39.105511,
        "Lat_Lng": "39.105511, -94.482144",
        "Lng": "-94.482144",
        "Location": "500 BOOTH AV\nKANSAS CITY 64124\n(39.105511, -94.482144)",
        "Offense": 401,
        "Race": "W",
        "Report_No": 180097829,
        "Reported_Date": "12/20/2018",
        "Reported_Time": "15:52",
        "Sex": "M",
        "Zip_Code": 64124
    },
    {
        "Address": "500  BOOTH AV",
        "Age": 18,
        "Area": "EPD",
        "City": "KANSAS CITY",
        "Description": "Aggravated Assault",
        "Firearm_Used_Flag": "N",
        "Invl_No": 1,
        "Involvement": "SUS",
        "Lat": 39.105511,
        "Lat_Lng": "39.105511, -94.482144",
        "Lng": "-94.482144",
        "Location": "500 