In [1]:
import string

import pandas as pd

from data.models import PoliceUnit
from data_importer.base.storage import AzureStorage
from data_importer.base.cleaner import DataCleaner, ToNaN, ZFill, strip, titleize, capitalise_generation_suffix

pd.set_option('display.max_rows', 1000)

In [2]:
storage = AzureStorage(account_name='cpdbv2data', 
                       account_key='your-key-here',
                       share='context-data')

## Import Police Unit from FOIA data
FOIA police unit data is available [here](https://github.com/invinst/chicago-police-data/blob/master/Previous_Format/context_data/Unit%20names.pdf)

### Clean up the data

In [None]:
df = pd.read_csv(storage.path_for('police-units/PoliceUnit.csv'))
df = df.loc[:, ['unit_name', 'description']]

cleaner = DataCleaner(
    schema={
        'unit_name': [ZFill(3)],
        'description': [strip, titleize, capitalise_generation_suffix, ToNaN(value_set=['Unknown'])]
    }
)
cleaner.perform(df)

In [4]:
import_db = 'import'
PoliceUnit.objects.using(import_db).all().delete()

(0, {})

In [5]:
df = df.where((pd.notnull(df)), None)
police_units = df.to_dict(orient='records')

for police_unit in police_units:
    PoliceUnit.objects.using(import_db).create(**police_unit)
    
PoliceUnit.objects.using(import_db).count()

237

## Add Unit 720

In [6]:
PoliceUnit.objects.using(import_db).create(unit_name='720')

<PoliceUnit: 720>

## Print out the imported data

In [7]:
pd.DataFrame.from_records(PoliceUnit.objects.using(import_db).all().values()).loc[:, ['id', 'unit_name', 'description']]

Unnamed: 0,id,unit_name,description
0,1,0,Area 0
1,2,1,District 001
2,3,2,District 002
3,4,3,District 003
4,5,4,District 004
5,6,5,District 005
6,7,6,District 006
7,8,7,District 007
8,9,8,District 008
9,10,9,District 009


** Done! **