This is a backup of the jupyter notebook where the code for pulling the date records was set up.
A script version should be on AWS to eventually automate the process to gather, say, last week's entries each Wednesday or something.

DPSS
- https://www.dpss.umich.edu/content/crime-safety-data/daily-crime-fire-log/
- http://dpss.umich.edu/api/GetCrimeLogCache?date=12/31/1999
- http://dpss.umich.edu/api/GetCrimeLogCache?date=12/20/2021
- http://dpss.umich.edu/api/GetCrimeLogCache?date=2/28/2022

Libraries
- https://realpython.com/python-sleep/
- https://stackoverflow.com/questions/993358/creating-a-range-of-dates-in-python
- https://stackoverflow.com/questions/20573459/getting-the-date-of-7-days-ago-from-current-date-in-python
- https://stackoverflow.com/questions/16573051/sound-alarm-when-code-finishes
- https://realpython.com/python-timer/

MongoDB
- https://docs.atlas.mongodb.com/tutorial/insert-data-into-your-cluster/
- https://enterprise-docs.anaconda.com/en/latest/data-science-workflows/data/mongodb.html
- https://www.mongodb.com/community/forums/t/dnspython-module-must-be-installed/56664

In [12]:
datetime.strptime("1999-12-30 10:51:12.0", "%Y-%m-%d %H:%M:%S.%f")

datetime.datetime(1999, 12, 30, 10, 51, 12)

In [None]:

d = datetime.datetime.strptime("2017-10-13T10:53:53.000Z", "%Y-%m-%dT%H:%M:%S.000Z")

with MongoClient() as mongo:
    db = mongo.get_database("test")
    db['dates'].insert({"date" : d})

In [1]:
from datetime import date, datetime, timedelta
from pprint import pprint
from pymongo import MongoClient
import requests

In [18]:
def mongoConnect():
    """Connect to the the DPSS data cluster and return connection to the CrimeLog database"""
    #### connect to MongoDB, change the << MONGODB URL >> to reflect your own connection string
    user, password = ("McUser","pmxNrbLaO7Zfqqwv")
    connection_string = f"mongodb+srv://{user}:{password}@umich-dpss-data.7otst.mongodb.net/myFirstDatabase?retryWrites=true&w=majority"

    #### Instantiate client
    client = MongoClient(host=connection_string)

    #### Make DB called CrimeLog
    database = client.CrimeLog
    return(database)


def getDateRangeThisWeek():
    """Provides list of dates from today, going back six days, properly formatted for the API request"""
    #### Get current date
    now = date.today()
    print(f"Today's Date :: {now}")
    
    #### Dates for today, and the past 6 days.
    week = [date.today() - timedelta(days=i) for i in range(0,7)]
    
    #### Coerce MM/DD/YYYY strings from the datetimes
    weekFormatted = [f"{eachDate.month}/{eachDate.day}/{eachDate.year}" for eachDate in week]
    print(f"Week :: {weekFormatted}")
    return(weekFormatted)


def pushCrimeData(targetDate, dataForDate, database):
    """
    Takes properly-formatted date string, associated JSON response {count, data}, and database connnection
    This is the function that forwards the response data to MongoDB for backup
    """
    
    #### Make collections for entries and for dates 
    datesCollected = database.DatesHarvested
    reportsCollected = database.CrimeReports
    
    #### Insert Crime Records
    for crimeReport in dataForDate.json()["data"]:
        #### Apply .strip() to cut whitespace from dictionary values
        reportStripped = { key:value.strip() for key, value in crimeReport.items()}
        
        #### Coerce the date to a timestamp that'll enter MongoDB as IsoDate
        # datetime.strptime("1999-12-30 10:51:12.0", "%Y-%m-%d %H:%M:%S.%f")
        # reportStripped["date"] = datetime.strptime(reportStripped["date"], "%Y-%m-%d %H:%M:%S.%f")
        
        reportsCollected.insert_one(reportStripped)
        print(".... entry posted")
    
    #### Record each date, when we requested the data for it, and record count
    #### Doing this to create an index of what's been collected successfully
    #### This line also won't hit if we get error'd in prior steps
    datesCollected.insert_one({
        "date": targetDate,
        "requested": str(datetime.now()),
        "count": dataForDate.json()["count"]
    })
    print(f"Success :: Copied {targetDate} to MongoDB")

    
def grabCrimeData(targetDate, database):
    """Takes properly-formatted date string, issues the request to the DPSS endpoint, calls pushCrimeData"""
    print(f"Requesting... {targetDate}")
    requestURL = f"http://dpss.umich.edu/api/GetCrimeLogCache?date={targetDate}"
    dataForDate = requests.get(requestURL)
    
    if(dataForDate.status_code==200):
        pushCrimeData(targetDate=targetDate, dataForDate=dataForDate, database=mongoConnection)
    else:
        print(f"Error :: {dataForDate.status_code}")
        errorCollection = database.ErrorPulls
        errorCollection.insert_one({
            "date": targetDate,
            "requested": str(datetime.now()),
            "error": dataForDate.status_code
        })

In [19]:
mongoConnection = mongoConnect()

In [21]:
#grabCrimeData(targetDate="12/31/1999", database=mongoConnection)
#grabCrimeData(targetDate="3/31/2022", database=mongoConnection)

***
#### Grab a month at a time

In [25]:
def getDateRange(startDate, span):
    """Provides list of dates from startDate, going forward X days, properly formatted for API call"""
    #### Get current date
    startDateStr = f"{startDate.month}/{startDate.day}/{startDate.year}"
    print(f"Target Start Date :: {startDateStr}")
    
    #### Dates for today, and the past 6 days.
    week = [startDate + timedelta(days=i) for i in range(0,span)]
    
    #### Coerce MM/DD/YYYY strings from the datetimes
    weekFormatted = [f"{eachDate.month}/{eachDate.day}/{eachDate.year}" for eachDate in week]
    #print([str(i) for i in weekFormatted])
    
    print(f"Target End Date :: {weekFormatted[-1]}")
    return(weekFormatted)

# LEAP YEARS 2004, 2008, 2012, 2016, 2020, ???
targetMonth = getDateRange(startDate=date(year=2022, month=4, day=1), span=30)

Target Start Date :: 4/1/2022
Target End Date :: 4/30/2022


In [None]:
2000 to 2021

2022 up through April

In [26]:
import os
import time
tic = time.perf_counter()

for theDate in targetMonth:
    grabCrimeData(targetDate=theDate, database=mongoConnection)
    sleep(2)
    
toc = time.perf_counter()
print(f"Year completed in {toc - tic:0.4f} seconds")

os.system("say 'Completed'")

Requesting... 4/1/2022
.... entry posted
.... entry posted
.... entry posted
.... entry posted
.... entry posted
Success :: Copied 4/1/2022 to MongoDB
Requesting... 4/2/2022
.... entry posted
.... entry posted
Success :: Copied 4/2/2022 to MongoDB
Requesting... 4/3/2022
.... entry posted
.... entry posted
.... entry posted
.... entry posted
Success :: Copied 4/3/2022 to MongoDB
Requesting... 4/4/2022
.... entry posted
.... entry posted
.... entry posted
Success :: Copied 4/4/2022 to MongoDB
Requesting... 4/5/2022
.... entry posted
.... entry posted
.... entry posted
.... entry posted
Success :: Copied 4/5/2022 to MongoDB
Requesting... 4/6/2022
.... entry posted
.... entry posted
.... entry posted
.... entry posted
.... entry posted
.... entry posted
Success :: Copied 4/6/2022 to MongoDB
Requesting... 4/7/2022
.... entry posted
.... entry posted
.... entry posted
.... entry posted
.... entry posted
.... entry posted
Success :: Copied 4/7/2022 to MongoDB
Requesting... 4/8/2022
.... entry

0

In [141]:
#grabCrimeData(targetDate="7/31/2012", database=mongoConnection)

Requesting... 7/31/2012
Success :: Copied 7/31/2012 to MongoDB


In [None]:
Requesting... 5/30/2012 Error :: 500

Requesting... 6/6/2012  Error :: 500
Requesting... 6/10/2012 Error :: 500
Requesting... 6/16/2012 Error :: 500
Requesting... 6/19/2012 Error :: 500
Requesting... 6/24/2012 Error :: 500
Requesting... 6/25/2012 Error :: 500
Requesting... 6/27/2012 Error :: 500

Requesting... 7/16/2012 Error :: 500
Requesting... 7/17/2012 Error :: 500
Requesting... 7/20/2012 Error :: 500
Requesting... 7/23/2012 Error :: 500
Requesting... 7/27/2012 Error :: 500
Requesting... 7/29/2012 Error :: 500
Requesting... 7/30/2012 Error :: 500
Requesting... 7/31/2012 Error :: 500

Requesting... 8/1/2012
BREAK

In [181]:
temp = requests.get("http://dpss.umich.edu/api/GetCrimeLogCache?date=1/2/2018")

In [182]:
temp

<Response [200]>

In [179]:
for each in dir(temp):
    if(each[0]!="_"):
        print(each)

apparent_encoding
close
connection
content
cookies
elapsed
encoding
headers
history
is_permanent_redirect
is_redirect
iter_content
iter_lines
json
links
next
ok
raise_for_status
raw
reason
request
status_code
text
url


In [180]:
temp.content

b'\n<html><head>\n<meta http-equiv="content-type" content="text/html;charset=utf-8">\n<title>500 Server Error</title>\n</head>\n<body text=#000000 bgcolor=#ffffff>\n<h1>Error: Server Error</h1>\n<h2>The server encountered an error and could not complete your request.<p>Please try again in 30 seconds.</h2>\n<h2></h2>\n</body></html>\n'

In [125]:
temp.json()

{'count': 20,
 'data': [{'id': '122120055',
   'date': '2012-07-30 08:53:46.0',
   'description': 'Traffic Accident (off roadway)',
   'location': 'LOT SC-7',
   'address': '1202 KIPKE',
   'disposition': 'Incident Report #12-002250',
   'narrative': 'A vehicle attempting to park struck a parked vehicle resulting in no injuries and unknown damage.'},
  {'id': '122120064',
   'date': '2012-07-30 10:45:39.0',
   'description': 'Larceny (other)',
   'location': '',
   'address': '1000 BLOCK SOUTH UNIVERSITY',
   'disposition': 'Incident Report #12-002253',
   'narrative': 'A bicycle reportedly was taken from the bike rack near West Hall July 23 between 1 and 5pm.  No suspects. ',
   'status': '- closed as of 7/31/2012'},
  {'id': '122120068',
   'date': '2012-07-30 11:15:54.0',
   'description': 'Ambulance request',
   'location': 'SCHOOL OF EDUCATION',
   'address': '610 EAST UNIVERSITY',
   'disposition': 'Assistance Provided',
   'narrative': 'A staff member was taken to the U-M ER by 