In [1]:
import pandas as pd
from tqdm import tqdm, tqdm_notebook, tqdm_pandas

from data.models import AllegationCategory
from data_importer.base.storage import AzureStorage
from data_importer.base.cleaner import DataCleaner, strip, titleize, lower, ToBool

In [2]:
pd.set_option("display.max_rows", 500)

In [3]:
storage = AzureStorage(account_name='cpdbv2data', 
                       account_key='your-key-here',
                       share='context-data')

import_db = 'import'

## Read Allegation Category data

In [4]:
df = pd.read_excel(storage.path_for('categories/Categories.xlsx')).loc[
    :, [111, 'DESCRIPTION', 'CATEGORY', 'ON / OFF DUTY', 'CITIZEN / DEPT']
]


df = df.rename(columns={
    111: 'category_code',
    'DESCRIPTION': 'allegation_name',
    'CATEGORY': 'category',
    'ON / OFF DUTY': 'on_duty',
    'CITIZEN / DEPT': 'citizen_dept'
})

## Clean Allegation Category data

** Do following clean steps **
- Change `on_duty` from ON/OFF to True/False
- Change `citizen_dept` to dept/citizen


In [5]:
cleaner = DataCleaner(
    schema={
        'allegation_name': [strip, titleize],
        'category': [strip, titleize],
        'citizen_dept': [strip, lower],
        'on_duty': [strip, ToBool(true_set=['ON'], false_set=['OFF'])]
    }
)
cleaner.perform(df)

tqdm.pandas(tqdm_notebook(), desc='Clean allegation category')

df['citizen_dept'] = df['citizen_dept'].progress_apply(
    lambda x: None if x == '?' else x
)
df

100%|██████████| 4/4 [00:00<00:00, 75.96it/s]


A Jupyter Widget

Clean allegation category: 100%|██████████| 407/407 [00:00<00:00, 155161.04it/s]


Unnamed: 0,category_code,allegation_name,category,on_duty,citizen_dept
0,001,Unfit For Duty,Operation/Personnel Violations,True,dept
1,002,Absence Without Permission,Operation/Personnel Violations,True,dept
2,003,Medical Roll Violation,Operation/Personnel Violations,True,dept
3,003A,Medical Roll - Fail To Notify Spvsr,Operation/Personnel Violations,True,dept
4,003B,Medical Roll - Fail To Rpt In Person To Mss 2N...,Operation/Personnel Violations,True,dept
5,003C,Medical Roll - Absent From Residence,Operation/Personnel Violations,True,dept
6,003D,Medical Roll - Fail To Rpt To Mss Or Other Med...,Operation/Personnel Violations,True,dept
7,003E,Medical Roll - Other And Specify,Operation/Personnel Violations,True,dept
8,004,Tardiness,Operation/Personnel Violations,True,dept
9,005,Court Appearance Violation,Operation/Personnel Violations,True,dept


## Import Allegation Category

** Delete the current allegation categories **

In [6]:
AllegationCategory.objects.using(import_db).delete()

(0, {})

** Import new allegation categories into database **

In [7]:
allegation_categories = df.to_dict(orient='records')
for allegation_category in allegation_categories:
    if not allegation_category['citizen_dept']:
        allegation_category.pop('citizen_dept')
    AllegationCategory.objects.using(import_db).create(**allegation_category)
AllegationCategory.objects.using(import_db).count()

407

## Print out imported Allegation Category

In [8]:
pd.DataFrame.from_records(AllegationCategory.objects.using(import_db).all().values(
    'category_code', 'category', 'allegation_name', 'citizen_dept', 'on_duty', 'id'
)).sort_values('id')

Unnamed: 0,allegation_name,category,category_code,citizen_dept,id,on_duty
0,Unfit For Duty,Operation/Personnel Violations,001,dept,1,True
1,Absence Without Permission,Operation/Personnel Violations,002,dept,2,True
2,Medical Roll Violation,Operation/Personnel Violations,003,dept,3,True
3,Medical Roll - Fail To Notify Spvsr,Operation/Personnel Violations,003A,dept,4,True
4,Medical Roll - Fail To Rpt In Person To Mss 2N...,Operation/Personnel Violations,003B,dept,5,True
5,Medical Roll - Absent From Residence,Operation/Personnel Violations,003C,dept,6,True
6,Medical Roll - Fail To Rpt To Mss Or Other Med...,Operation/Personnel Violations,003D,dept,7,True
7,Medical Roll - Other And Specify,Operation/Personnel Violations,003E,dept,8,True
8,Tardiness,Operation/Personnel Violations,004,dept,9,True
9,Court Appearance Violation,Operation/Personnel Violations,005,dept,10,True
