# street crime data


### Import

- This notebook shows how to import the street crime data into MongoDB. 


In [14]:
import pymongo
import datetime
import matplotlib as plt
import pandas as pd

Install street crime data [link](https://data.police.uk/data/archive/)

In [27]:
!ls ..


1-10NB	 20-23NB  data	     EMA:TMA02	WorkingTests
11-16NB  25-26NB  _DS_Store  github


### Find and import the data

### Import files

In [16]:
!find ../2018J_TMA02_data/street-crimes  -type f | wc -l 

find: ‘../2018J_TMA02_data/street-crimes’: No such file or directory
0


In [17]:
!find ../2018J_TMA02_data/street-crimes -name '*csv'

find: ‘../2018J_TMA02_data/street-crimes’: No such file or directory


- DB named as crimes

In [18]:
!find ../2018J_TMA02_data/street-crimes -name '*csv' \
  -exec mongoimport --port 27351 --db crimes --collection street \
    --type csv --headerline --ignoreBlanks \
    --file {} \;

find: ‘../2018J_TMA02_data/street-crimes’: No such file or directory


### After import is done

In [19]:
# Open a connection to the Mongo server
client = pymongo.MongoClient('mongodb://localhost:27351/')

In [20]:
# Connect to the crimes database
crimes_db = client.crimes
street = crimes_db.street

In [21]:
 ## crimes_db.street.drop()

#### Test data 1

In [22]:
street.find_one({'Falls within': 'Bedfordshire Police'})

{'Crime ID': '78cde503559d174152518a99410574620a60ef5d06f0b9b4c0ccc518a1385c7a',
 'Crime type': 'Violence and sexual offences',
 'Falls within': 'Bedfordshire Police',
 'LSOA code': 'E01017658',
 'LSOA name': 'Aylesbury Vale 009D',
 'Last outcome category': 'Awaiting court outcome',
 'Latitude': 51.873389,
 'Location': 'On or near Northall Road',
 'Longitude': -0.600189,
 'Month': '2017-10',
 'Reported by': 'Bedfordshire Police',
 '_id': ObjectId('5ca63402fa8926a25a414834'),
 'date': datetime.datetime(2017, 10, 31, 0, 0),
 'loc': {'coordinates': [-0.600189, 51.873389], 'type': 'Point'}}

In [23]:
street.count_documents({'Falls within': 'Bedfordshire Police'})

463722

In [24]:
# street.find({'LSOA name': 'County Durham 030E'})

pd.DataFrame(list(street.find({'Month': '2017-10', 'LSOA code': 'E01017658'}, 
                                 ['Latitude', 'Longitude','LSOA name'],
                                 limit=10)))

Unnamed: 0,LSOA name,Latitude,Longitude,_id
0,Aylesbury Vale 009D,51.873389,-0.600189,5ca63402fa8926a25a414834
1,Aylesbury Vale 009D,51.873389,-0.600189,5ca63402fa8926a25a414835
2,Aylesbury Vale 009D,51.853617,-0.618473,5ca63405fa8926a25a429e19
3,Aylesbury Vale 009D,51.862951,-0.590478,5ca63405fa8926a25a429e1a
4,Aylesbury Vale 009D,51.862951,-0.590478,5ca63405fa8926a25a429e1b
5,Aylesbury Vale 009D,51.862951,-0.590478,5ca63405fa8926a25a429e1c
6,Aylesbury Vale 009D,51.868097,-0.586918,5ca63405fa8926a25a429e1d
7,Aylesbury Vale 009D,51.867488,-0.587939,5ca63405fa8926a25a429e1e
8,Aylesbury Vale 009D,51.873389,-0.600189,5ca7859afa8926a25aa61431
9,Aylesbury Vale 009D,51.873389,-0.600189,5ca7859afa8926a25aa61432


In [25]:
!ls dataset/


ls: cannot access 'dataset/': No such file or directory


In [26]:
import geopandas
boros = geopandas.read_file(geopandas.datasets.get_path("dataset/area.csv"))

boros.BoroName

boro_locations = geopandas.tools.geocode(boros.BoroName)

boro_locations

ValueError: The dataset 'dataset/area.csv' is not available

- Below should show how many more items are pulled out of the Mongodb 
- as on the single sheet streetCrime2018May 
- there was some **8739** row of data pulled in.
- Where as we see form the mongodb we have some **198,738** row pulled in for just Bedfordshire police force.

#### Test data 2

In [None]:
street.count_documents({'Falls within':'Avon and Somerset Constabulary'})

- We now see some ** 539,406 ** row pulled out of mongodb
-  For Somerset Constabulary

### Geo

 I wanted to be able to group street crime by "loc" but not working

In [None]:
from bson.son import SON
from pymongo import MongoClient, GEO2D
# query = {"loc": SON([("$nearSphere", [-0.155734, 51.49268]), ("$maxDistance", 10)])}
# cursor=street.find(query)[5]

query = {"loc": SON([("$nearSphere", [-0.155734, 51.49268]), ("$maxDistance", 1000)])}
for doc in street.find(query).limit(4):
    pprint.pprint(doc)
    
    

In [None]:
pipeline = [{'$group': {'_id': '$geoNear',
                        'length': {'$avg': '$LenNet'},
                        'count': {'$sum': 1}}}]
results = list(street.aggregate(pipeline))
results

In [None]:
street.create_index({'Point':"2dsphere"});

pipeline = [
   {
     '$geoNear': {
        'near': { 'type': "Point", 'coordinates': [ -73.99279 , 40.719296 ] },
        'distanceField': "dist.calculated",
        'maxDistance': 20,
        'query': { 'type': "public" },
        'includeLocs': "dist.location",
        'num': 5,
        'spherical': 'true'
     }
   }
]

results = list(street.aggregate(pipeline))
results

In [None]:
for a in street.find({'point': {'$near': [-84.26060492426588, 30.45023887165371]}}):
    Print(a)

In [None]:
pipeline = [{'$geoNear':{
                        'near': { 'type': "Point", 'coordinates': [ -73.99279 , 40.719296 ] },
            'distanceField': "dist.calculated",
        'maxDistance': 2000,                    
    'count': {'$sum': 1}}}]
results = list(street.aggregate(pipeline))
results


In [None]:
pipeline = [{'$group': {'_id': '$LSOA name',
                        'Totals': {'$sum': 1}}}]
results_of_crimeZ = list(street.aggregate(pipeline))
results_of_crimeZ

not a lot of the same names above,
- lets have a go with long lat **NOT WORKING**

In [None]:
pipeline = [{'$nearSphere': {'geometry':{
          type: "Point" ,
          'coordinates' [ -1.073626, 51.806445]
       }},
    '$maxDistance': 50,
       '$minDistance': 50  }]

results_of_cri = list(street.aggregate(pipeline))
results_of_cri


In [None]:
pipeline = [
{
    'loc': {
    
  {   '$near': {
       '$geometry': {
          'type': "Point" ,
          'coordinates': [ -1.073626, 51.806445 ]
       },
       '$maxDistance': 50,
       '$minDistance': 50
     }
    }
   }
}]
results_of_cri = list(street.aggregate(pipeline))
results_of_cri


In [None]:
street.find_one({
   'loc': {
     '$near': {
       '$geometry': {
          'type': "Point" ,
          'coordinates': [ -1.073, 51.806 ]
       }
         ,
       '$maxDistance': 6000,
       '$minDistance': 1000
     }
   }
})


In [None]:
street.create_index({'loc':"2dsphere"});

In [None]:
street.find_one(
   {
     'location':
       { '$near' :
          {
            '$geometry': { 'type': "Point",  'coordinates': [ -1.073, 51.806 ] },
            '$minDistance': 1000,
            '$maxDistance': 5000
          }
       }
   }
)


In [None]:
for a in street.find(
   { 'loc' :
       { '$near' :
          {
            '$geometry' : {
               'type' : "Point" ,
               'coordinates' : [-1.073, 51.806 ] },
            '$maxDistance' : 100
          }
       }
    },limit=1):
    print(a)

In [None]:
# for s in street.find({'Longitude': {'$exists': True}}, ['Longitude', 'Latitude', 'Month']):
#     first_of_month = datetime.datetime.strptime(s['Month'], '%Y-%m')
#     last_of_month = last_day_of_month(first_of_month)

#     street.update_one({'_id': s['_id']},
#                       {'$set': {'loc.coordinates': [s['Longitude'], s['Latitude']],
#                                 'loc.type': 'Point',
#                                 'date': last_of_month}})

Create a geospatial index for the crimes. See the [PyMongo documentation](http://api.mongodb.com/python/current/api/pymongo/collection.html#pymongo.collection.Collection.create_index) for more on creating indexes.

street.create_index([("loc", pymongo.GEOSPHERE)])

In [None]:
street.index_information()

In [None]:
for a in street.find({}, limit=1):
    print(a)