In [1]:
from django.contrib.gis.geos import Point

import pandas as pd
from tqdm import tqdm, tqdm_notebook, tqdm_pandas

from data.models import Allegation, Area
from data_importer.base.storage import AzureStorage
from data_importer.base.cleaner import DataCleaner, to_int, strip

In [2]:
storage = AzureStorage(account_name='cpdbv2data', 
                       account_key='<add-your-key-here>',
                       share='cleaned-data')
import_db = 'import'

## Load GEO data file

In [3]:
df = pd.read_csv(
    storage.path_for('complaints/allegation_areas.csv'),
    usecols=['CRID', 'lat', 'lng']
)

#### Reformat CRID to string

In [4]:
DataCleaner(
    schema={
        'CRID': [strip, to_int]
    }
).perform(df)

100%|██████████| 1/1 [00:04<00:00,  4.26s/it]


Unnamed: 0,CRID,lat,lng
0,258996,41.894921,-87.759053
1,258997,41.877184,-87.733444
2,258998,41.890882,-87.628025
3,258999,41.875600,-87.624400
4,259000,41.894921,-87.759053
5,259001,41.737596,-87.552228
6,259002,41.734883,-87.664603
7,259003,41.875600,-87.624400
8,259005,41.875600,-87.624400
9,259006,41.875600,-87.624400


## Import GEO data to database

In [5]:
def import_allegation_area(data):
    lat = data['lat']
    lng = data['lng']
    if lat and lng:
        point = Point(x=lng, y=lat)
        crid = str(int(float(data['CRID'])))
        allegation = Allegation.objects.using(import_db).get(crid=crid)
        allegation.areas.clear()
        areas = Area.objects.using(import_db).filter(polygon__intersects=point)
        for area in areas:
            allegation.areas.add(area)
    

In [6]:
tqdm.pandas(tqdm_notebook(), desc='Importing allegation areas')
df.progress_apply(import_allegation_area, axis=1)

A Jupyter Widget

Importing allegation areas: 100%|██████████| 109339/109339 [58:52<00:00, 30.95it/s] 


0         None
1         None
2         None
3         None
4         None
5         None
6         None
7         None
8         None
9         None
10        None
11        None
12        None
13        None
14        None
15        None
16        None
17        None
18        None
19        None
20        None
21        None
22        None
23        None
24        None
25        None
26        None
27        None
28        None
29        None
          ... 
109309    None
109310    None
109311    None
109312    None
109313    None
109314    None
109315    None
109316    None
109317    None
109318    None
109319    None
109320    None
109321    None
109322    None
109323    None
109324    None
109325    None
109326    None
109327    None
109328    None
109329    None
109330    None
109331    None
109332    None
109333    None
109334    None
109335    None
109336    None
109337    None
109338    None
Length: 109339, dtype: object