# Notebook to automatically generate hate crime report based on crime DB and MO Codes

Using a rule based method and division into crime categories. NLP libraries used to adjust tense and correct grammar

In [171]:
import pandas as pd
import numpy as np
from IPython.display import display, HTML
from gingerit.gingerit import GingerIt
import warnings
warnings.filterwarnings("ignore")

In [172]:
pd.options.display.max_rows = 50

### Read data

In [173]:
# Read in data from Excel sheet
pd.set_option('display.max_rows', 10)
df = pd.read_excel('MO codes.xlsx', sheet_name='Sheet5')  # MO code number and descrioption
# csv_desc = pd.read_csv('Crime_descriptions_extra.csv') # Josh's crime reports
csv_desc = pd.read_csv('crime_queries_05_31.csv') 
df.head()

Unnamed: 0,MO_CODE,Description,Category,Sub Category
0,601,Business,Crime Related To,Dispute involved
1,602,Family,Crime Related To,Dispute involved
2,603,Landlord/Tenant/Neighbor,Crime Related To,Dispute involved
3,604,Reproductive Health Services/Facilities,Crime Related To,Dispute involved
4,605,Traffic Accident/Traffic related incident,Crime Related To,Dispute involved


In [174]:
csv_desc.columns

Index(['dr_no', 'date_rptd', 'date_occ', 'time_occ', 'area_id', 'area_name',
       'rpt_dist_no', 'crm_cd', 'crm_cd_desc', 'mocodes', 'vict_age',
       'vict_sex', 'vict_descent', 'premis_cd', 'premis_desc',
       'weapon_used_cd', 'weapon_desc', 'status', 'status_desc', 'crm_cd_1',
       'crm_cd_2', 'crm_cd_3', 'crm_cd_4', 'location', 'cross_street',
       'location_1', 'zipcode', 'neighborhood', 'tract',
       'neighborhood_council', 'longitude', 'latitude', 'small_bundle_name'],
      dtype='object')

In [175]:
csv_desc.rename(columns={'crm_cd_desc':'Damages', 'premis_desc':'Premesis', 
                         'small_bundle_name':'Bundle Name','mocodes':'MO Codes',
                         'vict_age':'Victim Age','vict_sex':'Victim Sex',
                         'vict_descent':'Victim Descent','weapon_desc':'Weapon Description'},inplace=True)

# relevant_columns = ['Damages', 'Premesis', 'Bundle Name','MO Codes','Victim Age',
#                     'Victim Sex','Victim Descent']

relevant_columns = ['Premesis','MO Codes','Victim Age','Victim Sex','Victim Descent']

csv_desc['Crime Description'] = ''

In [176]:
# To prevent duplicates of this word in generated sentences
# df['Description'] = df['Description'].str.replace('Susp ', '')
df['Description'] = df['Description'].str.replace('Suspect ', '')
df['Description'] = df['Description'].str.replace('Victim ', '')

In [177]:
# Get csv into useful data
# desc_df = csv_desc.dropna(subset=['Premesis'])
# desc_df = csv_desc.dropna(subset=relevant_columns)
desc_df = csv_desc.dropna(subset=relevant_columns)
# desc_df = csv_desc.drop(['Number', 'Neighborhood','Location','@Photos','Datetime','Crime','Weapon'],axis=1)
desc_df['MO Codes'] = desc_df['MO Codes'].apply(lambda x: list(map(int, x.split())))
desc_df['Damages'] = desc_df['Damages'].apply(lambda x: x.lower())
desc_df['Premesis'] = desc_df['Premesis'].apply(lambda x: x.lower())
desc_df['Weapon Description'] = desc_df['Weapon Description'].apply(lambda x: x.lower() if type(x) == str else x)
desc_df['Weapon Description'] = desc_df['Weapon Description'].fillna('')
desc_df[''] = desc_df['Premesis'].apply(lambda x: x.lower())
desc_df.head()

Unnamed: 0,dr_no,date_rptd,date_occ,time_occ,area_id,area_name,rpt_dist_no,crm_cd,Damages,MO Codes,...,location_1,zipcode,neighborhood,tract,neighborhood_council,longitude,latitude,Bundle Name,Crime Description,Unnamed: 21
0,200809698,2020-05-20 00:00:00,2020-05-20 00:00:00,2000,,West LA,818,230,"assault with deadly weapon, aggravated assault","[432, 2036, 2035, 903, 431, 319, 216]",...,0101000020E6100000DC68006F819C5DC0705F07CE1909...,90095.0,westwood,265301.0,63.0,-118.4454,34.0711,assault with a deadly weapon,,market
1,201213171,2020-05-20 00:00:00,2020-05-20 00:00:00,700,,77th Street,1266,230,"assault with deadly weapon, aggravated assault","[1309, 1822, 340, 416, 446, 2036, 903, 2055, 1...",...,0101000020E61000007B832F4CA6925DC088855AD3BCFB...,90044.0,vermont-knolls,238310.0,,-118.2914,33.9667,assault with a deadly weapon,,street
2,200311536,2020-05-19 00:00:00,2020-05-19 00:00:00,1030,,Southwest,319,230,"assault with deadly weapon, aggravated assault","[910, 1402, 2049, 930, 2051, 1536, 1822, 447, ...",...,0101000020E6100000E7FBA9F1D2915DC0EBE2361AC003...,90007.0,university-park,224420.0,773.0,-118.2785,34.0293,assault with a deadly weapon,,mta bus
3,200111818,2020-05-17 00:00:00,2020-05-17 00:00:00,2120,,Central,145,624,battery - simple assault,"[903, 1536, 2054, 416, 1822]",...,0101000020E6100000F241CF66D58F5DC020D26F5F0706...,90013.0,downtown,207302.0,52.0,-118.2474,34.0471,assorted assault,,office building/office
4,201909852,2020-05-16 00:00:00,2020-05-16 00:00:00,739,,Mission,1971,761,brandish weapon,"[906, 1514, 1310, 1822, 2055, 903, 334, 2035, ...",...,0101000020E610000091ED7C3F359E5DC0A1F831E6AE1D...,91343.0,north-hills,117407.0,112.0,-118.472,34.2319,threats,,street


In [178]:
# Replacing values in dataframe with better reading phrases
# Replacing the MO code descriptions
df.replace('Hit-Hit w/ weapon','hit',inplace=True)
df.replace('Vict knocked to ground','victim knocked to ground',inplace=True)
df.replace('Multi-susps overwhelm','multiple suspects overwhelmed',inplace=True)
df.replace('Susp uses vehicle','vehicle involved',inplace=True)


# Replacing the crimes df with better descriptions
desc_df.replace('restaurant/fast food','restaurant',inplace=True)
# desc_df.replace('church/chapel', 'church',inplace=True)
desc_df.replace('church/chapel (changed 03-03 from church/temple)','church',inplace=True)
desc_df.replace('multi-unit dwelling (apartment, duplex, etc)','apartment',inplace=True)
desc_df.replace('single family dwelling', 'house',inplace=True)
desc_df.replace('other business', 'local business',inplace=True)
desc_df.replace('bar/cocktail/nightclub', 'bar or nightclub',inplace=True)
desc_df.replace('assault with a deadly weapon','assaulted',inplace=True)
desc_df.replace('assorted assault','assaulted',inplace=True)
desc_df.replace('miscellaneous crimes','miscellaneous crimes toward',inplace=True)
desc_df.replace(regex=['\*'],value='',inplace=True)

# Weapon description
desc_df.replace('other knife', 'knife',inplace=True)
desc_df.replace('other cutting instrument','cutting instrument',inplace=True)
desc_df.replace('other firearm','firearm',inplace=True)
desc_df.replace('unknown weapon/other weapon','unknown weapon',inplace=True)
desc_df.replace('unknown type cutting instrument','unkown cutting instrument',inplace=True)
desc_df.replace('unknown firearm','firearm',inplace=True)
desc_df.replace('strong-arm (hands, fist, feet or bodily force)','',inplace=True)
desc_df.replace('rock/thrown object','rock/projectile',inplace=True)

desc_df.head(2)

Unnamed: 0,dr_no,date_rptd,date_occ,time_occ,area_id,area_name,rpt_dist_no,crm_cd,Damages,MO Codes,...,location_1,zipcode,neighborhood,tract,neighborhood_council,longitude,latitude,Bundle Name,Crime Description,Unnamed: 21
0,200809698,2020-05-20 00:00:00,2020-05-20 00:00:00,2000,,West LA,818,230,"assault with deadly weapon, aggravated assault","[432, 2036, 2035, 903, 431, 319, 216]",...,0101000020E6100000DC68006F819C5DC0705F07CE1909...,90095.0,westwood,265301.0,63.0,-118.4454,34.0711,assaulted,,market
1,201213171,2020-05-20 00:00:00,2020-05-20 00:00:00,700,,77th Street,1266,230,"assault with deadly weapon, aggravated assault","[1309, 1822, 340, 416, 446, 2036, 903, 2055, 1...",...,0101000020E61000007B832F4CA6925DC088855AD3BCFB...,90044.0,vermont-knolls,238310.0,,-118.2914,33.9667,assaulted,,street


In [179]:
# Function which returns the number of values for a key
def checkKey(this_dict, key):       
    if key in this_dict.keys(): 
        return True
    else: 
        return False

# Function which returns the number of values for a key
def checkValLen(this_dict, key):       
    if key in this_dict: 
        return len(this_dict[key])
    else: 
        return 0

In [180]:
descent_dict = {'A':'other Asian','B':'black','C':'Chinese','D':'Cambodian',\
                'F':'Filipino','G':'Guamanian','H':'Hispanic',\
                'I':'American Indian','J':'Japanese','K':'Korean',\
                'L':'Laotian','O':'Other','P':'Pacific Islander','S':'Samoan',\
                'U':'Hawaiian','V':'Vietnamese','W':'white','X':'Unknown','Z':'Asian Indian'}

gender_dict = {'F':'female','M':'male',' H':'hermaphrodite','X':'Unknown'}

# damages_dict = {'causing $399 worth of damage or under':'causing under $400 worth of damage',\
#                 'causing $400 worth of damage & over':'causing more than $400 worth of damage'}

# Function to create sentence

In [181]:
# Adds MO code to crime category dictionary for a particular set of MO codes

def get_crime_categories(mo_codes):

    description_dict = {}

    # Divide crime up by category
    for code in mo_codes:
        
        if code >= 601 and code <= 605:
            if 'dispute_involved' not in description_dict.keys():
                description_dict['dispute_involved'] = []
            description_dict['dispute_involved'].append(df[df['MO_CODE'] == code]['Description'].values[0].lower())
        
        if code == 701 or code == 901 or (code >= 901 and code <= 947) or (code >= 1100 and code <= 1101):
            if 'crime_related_to' not in description_dict.keys():
                description_dict['crime_related_to'] = []
            description_dict['crime_related_to'].append(df[df['MO_CODE'] == code]['Description'].values[0].lower())
        
        if code >= 1300 and code <= 1318:
            if 'vehicle_involved' not in description_dict.keys():
                description_dict['vehicle_involved'] = []
            description_dict['vehicle_involved'].append(df[df['MO_CODE'] == code]['Description'].values[0].lower())
        
        if code >= 1505 and code <= 1539:
            if 'bias' not in description_dict.keys():
                description_dict['bias'] = []
            description_dict['bias'].append(df[df['MO_CODE'] == code]['Description'].values[0][6:].lower())
        
        if code >= 1900 and code <= 1916:
            if 'computer_crimes' not in description_dict.keys():
                description_dict['computer_crimes'] = []
            description_dict['computer_crimes'].append(df[df['MO_CODE'] == code]['Description'].values[0].lower())            
        
        if code >= 1601 and code <= 1612:
            if 'entry_device' not in description_dict.keys():
                description_dict['entry_device'] = []
            description_dict['entry_device'].append(df[df['MO_CODE'] == code]['Description'].values[0].lower())            
        
        if code >= 1401 and code <= 1420:
            if 'evidence' not in description_dict.keys():
                description_dict['evidence'] = []
            description_dict['evidence'].append(df[df['MO_CODE'] == code]['Description'].values[0].lower())            
        
        if code >= 100 and code <= 123:
            if 'suspect_impersonates' not in description_dict.keys():
                description_dict['suspect_impersonates'] = []
            description_dict['suspect_impersonates'].append(df[df['MO_CODE'] == code]['Description'].values[0].lower())                        
        
        if code >= 200 and code <= 220:
            if 'suspect_wore_disguise' not in description_dict.keys():
                description_dict['suspect_wore_disguise'] = []
            description_dict['suspect_wore_disguise'].append(df[df['MO_CODE'] == code]['Description'].values[0].lower())                        
        
        if (code >= 301 and code <= 360) or (code >= 362 and code <= 363) or (code >= 365 and code <= 399) or (code >= 2035 and code <= 2037) or\
            (code >= 2040 and code <= 2041) or (code == 2046) or (code >= 401 and code <= 433) or (code >= 442 and code <= 451):
            if 'suspects_actions' not in description_dict.keys():
                description_dict['suspects_actions'] = []
            description_dict['suspects_actions'].append(df[df['MO_CODE'] == code]['Description'].values[0].lower())

#         if (code >= 401 and code <= 433) or (code >= 442 and code <= 451):
#             if 'force_used' not in description_dict.keys():
#                 description_dict['force_used'] = []
#             description_dict['force_used'].append(df[df['MO_CODE'] == code]['Description'].values[0].lower())            
    
        if code >= 434 and code <= 441:
            if 'bindings_used' not in description_dict.keys():
                description_dict['bindings_used'] = []
            description_dict['bindings_used'].append(df[df['MO_CODE'] == code]['Description'].values[0].lower())                        
        
        if (code >= 500 and code <= 551) or code == 563:
            if 'sex_related_acts' not in description_dict.keys():
                description_dict['sex_related_acts'] = []
            description_dict['sex_related_acts'].append(df[df['MO_CODE'] == code]['Description'].values[0].lower())            
        
        if code >= 552 and code <= 562 or code == 360:
            if 'suspects_biological_relationship_to_victim' not in description_dict.keys():
                description_dict['suspects_biological_relationship_to_victim'] = []
            description_dict['suspects_biological_relationship_to_victim'].append(df[df['MO_CODE'] == code]['Description'].values[0].lower())                        
        
        if code >= 1000 and code <= 1028:
            if 'suspects_offers_solicits' not in description_dict.keys():
                description_dict['suspects_offers_solicits'] = []
            description_dict['suspects_offers_solicits'].append(df[df['MO_CODE'] == code]['Description'].values[0].lower())                        
        
        if code == 361 or code == 364 or code == 913 or (code >= 1801 and code <= 1824) or code == 2038:
            if 'suspects_association_with_victim' not in description_dict.keys():
                description_dict['suspects_association_with_victim'] = []
            description_dict['suspects_association_with_victim'].append(df[df['MO_CODE'] == code]['Description'].values[0].lower())                        
        
        if (code >= 2001 and code <= 2028) or (code >= 2044 and code <= 2045) or (code >= 2051 and code <= 2052):
            if 'suspect_was' not in description_dict.keys():
                description_dict['suspect_was'] = []
            description_dict['suspect_was'].append(df[df['MO_CODE'] == code]['Description'].values[0].lower())                        
        
#         if code >= 1257 and code <= 1259:
#             if 'victims_age' not in description_dict.keys():
#                 description_dict['victims_age'] = []
#             description_dict['victims_age'].append(df[df['MO_CODE'] == code]['Description'].values[0].lower())                        
        
        if code >= 2029 and code <= 2033:
            if 'victims_actions' not in description_dict.keys():
                description_dict['victims_actions'] = []
            description_dict['victims_actions'].append(df[df['MO_CODE'] == code]['Description'].values[0].lower())            
        
        if (code >= 1201 and code <= 1281) or code == 2034 or  code == 2039 or (code >= 2042 and code <= 2050) or (code >= 2053 and code <= 2059):
            if 'target_victim_was' not in description_dict.keys():
                description_dict['target_victim_was'] = []
            description_dict['target_victim_was'].append(df[df['MO_CODE'] == code]['Description'].values[0].lower())            
        
        if (code >= 3001 and code <= 3040) or code == 3062:
            if 'MV_involved_with_and_special_conditions' not in description_dict.keys():
                description_dict['MV_involved_with_and_special_conditions'] = []
            description_dict['MV_involved_with_and_special_conditions'].append(df[df['MO_CODE'] == code]['Description'].values[0].lower())            
        
        if (code >= 3101 and code <= 3104) or code == 3201 or code == 3301 or code == 3401 or code == 3501 or (code >= 3601and code <= 3603) or code == 3701 or code == 3801 or code == 3901:
            if 'primary_collision_factor' not in description_dict.keys():
                description_dict['primary_collision_factor'] = []
            description_dict['primary_collision_factor'].append(df[df['MO_CODE'] == code]['Description'].values[0].lower())                        
        
        if code >= 4001 and code <= 4027:
            if 'citywide_traffic_handling_rate' not in description_dict.keys():
                description_dict['citywide_traffic_handling_rate'] = []
            description_dict['citywide_traffic_handling_rate'].append(df[df['MO_CODE'] == code]['Description'].values[0].lower())            

    return description_dict

In [182]:
# Adds weapon sentence
def generate_weapon(sentence, crime_description, print_dict = False):
    
    if crime_description['weapon'][0]:
        sentence += ' The suspect used a ' + crime_description['weapon'][0]

    for i in range(1,len(crime_description['weapon'])):
        if len(crime_description['weapon']) != i+1:
            sentence += ', '
        else:
            sentence += ' and '
        sentence += crime_description['weapon'][i]

    sentence += '.'
    return sentence


# Adds victim description sentence
def generate_evidence(sentence, crime_description, print_dict = False):
    
    if crime_description['evidence'][0]:
        sentence += ' There is ' + crime_description['evidence'][0]

    for i in range(1,len(crime_description['evidence'])):
        if len(crime_description['evidence']) != i+1:
            sentence += ', '
        else:
            sentence += ' and '
        sentence += crime_description['evidence'][i]

    sentence += ' of the crime.'
    return sentence

# Adds bias sentence to report
def generate_bias(sentence, crime_description, print_dict = False):

    sentence += ' The crime was classified by the LAPD as a hate crime with an "' + crime_description['bias'][0] + '"'

    for i in range(1,len(crime_description['bias'])):
        if len(crime_description['bias']) != i+1:
            sentence += ', '
        else:
            sentence += ' and '
        sentence += '"' + crime_description['bias'][i] + '"'

    sentence += ' bias.'
    return sentence

# Adds victim description sentence NEEEDS FIXING
def generate_vehicle(sentence, crime_description, print_dict = False):
    
    if crime_description['vehicle_involved'][0]:
        sentence += ' There was a ' + crime_description['vehicle_involved'][0]

    for i in range(1,len(crime_description['vehicle_involved'])):
        if len(crime_description['vehicle_involved']) != i+1:
            sentence += ', '
        else:
            sentence += ' and '
        sentence += crime_description['vehicle_involved'][i]

    sentence += ' in the crime.'
    return sentence

# Adds victim description sentence
def generate_victim(sentence, crime_description, print_dict = False):
    
    if crime_description['target_victim_was'][0]:
    
        sentence += ' The victim was ' + crime_description['target_victim_was'][0]

    for i in range(1,len(crime_description['target_victim_was'])):
        if len(crime_description['target_victim_was']) != i+1:
            sentence += ', '
        else:
            sentence += ' and '
        sentence += crime_description['target_victim_was'][i]

    sentence += '.'
    return sentence

# def generate_victim_actions(sentence, crime_description, print_dict = False):
#     if crime_description.get('victims_actions'):
#         if crime_description['victims_actions'][0]:
#             sentence += ' who ' + crime_description['victims_actions'][0]

#         for i in range(1,len(crime_description['victims_actions'])):
#             if len(crime_description['victims_actions']) != i+1:
#                 sentence += ', '
#             else:
#                 sentence += ' and '
#             sentence += crime_description['victims_actions'][i]

#     sentence += '.'
#     return sentence

In [183]:
# Suspect actions description translation
susp_used ={'attacks from rear':'attacked from the rear',
            'crime on upper floor':'committed the crime on the upper floor',
            'demands jewelry':'demanded jewelry',
            'drive-by shooting':'drove-by and shot',
            'graffiti':'graffitied',
            'gun in waistband':'had a gun in waistband',
            'hot prowl':'hot prowled',
            'makes victim give money':'made victim give money',
            'pillowcase/suitcase':'used a pillowcase or suitcase',
            'profanity used':'used profanities on',
            'quiet polite':'was quiet and polite',
            'takes money from register':'took money from register',
            'victims vehicle taken':'stole vehicle',
            'mailbox bombing':'mailboxed bombed',
            'mailbox vandalism':'vandalised mailbox',
            'brandishes weapon':'brandished their weapon',
            'cases location':'cased location',
            'chain snatch':'snatched chain',
            'demands money':'demanded money',
            'disables telephone':'disabled telephone',
            'disables video camera':'disabled video camera',
            'follows victim/follows victim home':'followed',
            'makes vict lie down':'made the victim lie down',
#             'multi-susps overwhelm':'multiple suspects overwhelmed',
            'multiple suspects overwhelm':'multiple suspects overwhelmed',
            'orders vict to rear room':'ordered victim to rear room',
            'removes vict property':'stole',
            'riding bike':'rode bike',
            'snatch property and runs':'snatched property and ran',
            'stalks vict':'stalked',
            'takeover other':'tookover',
            'takes mail':'took mail',
            "took victim's clothing or jewelry":'took clothes or jewelry',
            'weapon concealed':'concealed weapon',
            'takes car keys':'took car keys',
            'spits on victim':'spat on',
            'cuts or breaks purse strap':'cut or broke purse strap',
            'forces entry':'forced entry',
            'attempts to carry victim away':'attempted to carry victim away',
            'home invasion':'home invaded',
            'takeover robbery':'robbed',
            'ordered vict to open safe':'ordered the safe to be opened',
            'speaks foreign language':'spoke a foreign language',
            'speaks spanish':'spoke Spanish',
            'frisks victim/pats down victim/searches victim':'searched victim',
            'gang affiliation questions asked/made gang statement':'made gang related statement',
            'handicapped/in wheelchair':'was handicapped',
            'gang signs/threw gang signs using hands':'threw gang signs',
            'removes cash register':'removed cash register',
            'makes victim kneel':'made victim kneel',
            "takes vict's identification/driver license":'took driver license or ID',
            'brings own bag':'brought own bag',
            'turns off lights/electricity':'turned off lights or electricity',
            'distracts victim':'distracted',
            'apologizes':'suspect apologized',
            'weapon (other than gun) in waistband':'had a non-firearm weapon in waistband',
            'suspect points laser at plane/helicopter':'pointed laser at plane of helicopter',
            'knock-knock':'knocked on',
            'purse snatch':'snatched purse',
            'false emergency reporting':'falsely reported an emergency',
            '911 abuse': 'abused 911',
            'susp takes ups, fedex, usps packages':'stole UPS, Fedex or USPS packages',
            'murder/suicide':'murdered or committed suicide',
            'cut lock (to bicycle, gate, etc.':'cut lock (to bicycle, gate, etc.)',
            'roof access (remove a/c, equip, etc.)':'accessed roof',
            'vehicle to vehicle shooting':'shot from vehicle to vehicle',
            'racial slurs':'used racial slurs',
            'hate-related language': 'used hate-related language',
            'threats via social media': 'threatened via social media',
            'harassment via social media': 'harassed via social media'
}

# Suspect force actions descriptions
susp_force = {
    'bomb threat, bomb found':'caused a bomb threat, the bomb was found',
    'bomb Threat, no bomb':'casused a bomb threat, no bomb was found',
    'brutal assault':'brutally assaulted',
    'burned victim':'burned',
    'choked/uses choke hold/strangulation':'choked',
    'cover mouth w/hands':'covered mouth with hands',
    "covered victim's face":"covered the victim's face ",
    'handcuffed/metal':'used metal handcuffs on',
    'hit-hit w/ weapon':'hit',
    'pulled victims hair':"pulled the victim's hair",
    'threaten to kill':'threatened to kill',
    'threaten victims family':"threatened the victim's family",
    'tied victim to object':'tied the victim to an object',
    'tore clothes off victim':'tore clothes off',
    'vict knocked to ground':'knocked to ground',
    'vict shot':'shot',
    'intimidation':'intimidated',
    'makes victim kneel':'made the victim kneel',
    'active shooter/armed person who has used deadly physical force on other person &':'was an armed shooter',
    'threaten to harm victim (other than kill)':'threatened to harm but not kill',
    'suspect swung weapon':'swung weapon at',
    'suspect swung fist':'swung fist at',
    'suspect threw object at victim':'threw object at',
    'puts a weapon to body':'put a weapon on the body of',
    'suspect shot at':'shot at'
}

In [184]:
parser = GingerIt()

In [185]:
def remove_phrases(string):
    string = string.replace('at the victim','')
    string = string.replace('removes vict property','stole')
    string = string.replace('threaten ','threatened ')
    string = string.replace('graffiti ','graffitied ')
    string = string.replace('at a street','on the street')
    string = string.replace('profanities','profanity')
    string = string.replace(' (any crime)','')
    string = string.replace('attacks from rear','attacked from the rear')
    string = string.replace('other store','store')
    string = string.replace('intimidation','intimidated')
    string = string.replace('uber/lyft','Uber or Lyft')
    string = string.replace('(other than kill)','but not kill')
    string = string.replace('spits on victim','spat on')
#     string = string.replace('hate-related language','used hate-related language')

    for key, value in susp_force.items():
        string = string.replace(key,value)
    
#     for key, value in susp_used.items():
#         string = string.replace(key,value)
    
    return string

# Removes duplicate words from a string
def remove_duplicates(text):
    list_words = text.split()
    previous_value = None
    new_lst = []
    for elem in list_words:
        if elem != previous_value:
            new_lst.append(elem)
            previous_value = elem
    return ' '.join(new_lst)


# Gets the relevant crime information from the dataframe row
def get_info(row, print_dict, specific_code, num = 0, multi=False):
    this_row = row

    # Translating crime codes to dictionary for crime
    crime_description = this_row['Crime Description']
    crime_dict = get_crime_categories(this_row['MO Codes'])

    # Additional information about crime from in table row information
    victim_description = []
    if this_row['Victim Age'] != 0:
        victim_description.append(str(this_row['Victim Age']) + ' year-old')
    if this_row['Victim Descent'] != 'X' and this_row['Victim Descent'] != 'O':
        victim_description.append(descent_dict[this_row['Victim Descent']])
    if this_row['Victim Sex'] != 'X':
        victim_description.append(gender_dict[this_row['Victim Sex']])
    if len(victim_description) != 0:
#         if checkKey(crime_dict,'victims_actions'):
#             victim_description[-1] = generate_victim_actions(victim_description[-1], crime_dict)
        crime_dict['victim_description'] = victim_description
    
    if this_row['Weapon Description'] != '':
        crime_dict['weapon'] = [this_row['Weapon Description']]
    if this_row['Premesis'] != 'other premise':
        crime_dict['premesis'] = [this_row['Premesis']]
    crime_dict['damages'] = this_row['Damages'].split('-')
    if this_row['Bundle Name'] == 'vandalism':
        crime_dict['property'] = ['property of the']

    # For vandalisms
    if len(crime_dict['damages']) > 1:
        if '$' in crime_dict['damages'][1]:
            crime_dict['damages'][1] = crime_dict['damages'][1][crime_dict['damages'][1].find("(")+1:crime_dict['damages'][1].find(")")]
            if ',' in crime_dict['damages'][1]:
                crime_dict['damages'][1] = crime_dict['damages'][1][0:crime_dict['damages'][1].find(",")]
            crime_dict['damages'][1] = crime_dict['damages'][1][0:4] + ' worth of damage ' + crime_dict['damages'][1][5:]
            crime_dict['damages'] = [crime_dict['damages'][1]]
        else:
            del crime_dict['damages']
    else:
        del crime_dict['damages']

    # Join suspect_actions by ',' and change suspect actions 
#     print('\n-----------------------------------------------------------------------------------\n\nCrime %i:'%num)
    if crime_dict.get('suspects_actions'):
        if len(crime_dict.get('suspects_actions'))>1:                
            
            # Exception for multiple suspects
            if 'multiple suspects overwhelm' in crime_dict['suspects_actions']:
                new_actions = crime_dict['suspects_actions']
                new_actions.insert(0, new_actions.pop(new_actions.index('multiple suspects overwhelm')))
    
            crime_dict['suspects_actions'] = [susp_used.get(i) if susp_used.get(i) else i for i in crime_dict.get('suspects_actions')]
            
        
            # Reorder suspect actions if there is a 'used'
            used_already = False
            num_popped = 0
            for i in range(len(crime_dict['suspects_actions'])):
                i -= num_popped
                reordered_actions = crime_dict['suspects_actions']
                action = crime_dict['suspects_actions'][i]
                if action[:4] == 'used':
                    if used_already:
                        replacement = action[5:]
                        reordered_actions.pop(i)
                        reordered_actions.append(replacement)
                    else:
                        reordered_actions.append(reordered_actions.pop(i))
                        used_already = True
                    crime_dict['suspects_actions'] = reordered_actions
                    num_popped += 1

            # Improve sentence fluidity with 'and' and commas
            join_string = ', '.join(crime_dict.get('suspects_actions')[:-1])
            join_string += ' and ' + crime_dict.get('suspects_actions')[-1]
            
            if used_already:
                join_string += ' on'
            
            crime_dict['suspects_actions'] = [join_string]
    
    # For printing specific codes for debugging
    if specific_code is None or specific_code in crime_dict.keys():
        if not multi:
            print('\n-----------------------------------------------------------------------------------\n\nCrime %i:'%num)
            # Printing
            if print_dict:
                print('\nDictionary: ',crime_dict)
            if crime_description != '':
                print('\nReal report:\n\n' + crime_description)
            print('\nGenerated report:\n')

            return crime_dict
        else:
            if len(crime_dict[specific_code]) > 1:
                print('\n-----------------------------------------------------------------------------------\n\nCrime %i:'%num)
                # Printing
                if print_dict:
                    print('\nDictionary: ',crime_dict)

                print('\nReal report:\n\n' + crime_description)
                print('\nGenerated report:\n')

                return crime_dict 
            else:
                return {}
    else:
        return {}

# Include strings for each part of the first sentence
def sentence_template():
    # Keys are category of MO codes, ordered by usual order of use
    key_1 = ['force_used','suspects_actions', 'computer_crimes', 'property', 'victim_description', 'premesis', 'damages']

    # Words which precede MO code description
    pre_text = {'force_used':'The suspect ','suspects_actions':'The suspect ',\
                'premesis':' at a ','victim_description':' the ','damages':' causing ',\
                'property':' the ', 'computer_crimes':'The suspect sent '}

    # Words inbewtween descriptions where multiple
    multiple_text = {'force_used':' and ','suspects_actions':' and ', 'victim_description':' ','computer_crimes':' and '}

    # Words after description
    post_text = {'victim_description':' victim', 'computer_crimes': ' of'}
    
    # Single use
    single_use = {'The suspect ':False,' victim':False,' causing ':False,' at a ':False, 'The suspect sent ':False}
    multi_use = {' the ':False,' a ':False}

    # If the pre text has not been used before
    sentence_parts = {'The suspect ':False,' the ':False,' victim':False,' at a ':False,' a ':False,' causing ':False, 'The suspect sent ':False}

    return key_1, pre_text, multiple_text, post_text, single_use, multi_use, sentence_parts


# Generate the first sentence
def generate_report(row,crime_num=0,print_dict=False,specific_code=None,multi=False):
    
    crime_dict = get_info(row, print_dict, specific_code, crime_num, multi)
    
    if crime_dict == {}:
        return
        
    key_1, pre_text, multiple_text, post_text, single_use, multi_use, sentence_parts = sentence_template()
    
    sentence = ''
    sentence_started = False
    
    if crime_dict.get('suspects_actions'):
        if 'multiple suspects overwhelmed' in crime_dict.get('suspects_actions')[0]:
#             substr = crime_dict['suspects_actions'][0]
            crime_dict['suspects_actions'] = ['M'+crime_dict['suspects_actions'][0][1:]]
            single_use[pre_text['suspects_actions']] = True
#             multi_use[pre_text['suspects_actions']] = True
#             sentence += 'Multiple suspects overwhelmed, ' + substr
            sentence_started = True
    
    # If there is a victim and no crime description
    if not crime_dict.get('force_used') and not crime_dict.get('suspects_actions') \
    and not crime_dict.get('computer_crimes') and (row['Bundle Name'] == 'threats' or 'assaulted' \
    in row['Bundle Name'] or 'miscellaneous' in row['Bundle Name']):

        if 'miscellaneous' in row['Bundle Name']:
            sentence = 'The suspect committed ' + row['Bundle Name']
        else:
            sentence = 'The suspect ' + row['Bundle Name']
        sentence_started = True
    
    
    # If there is no victim
    if len(crime_dict) == 2 and not sentence_started:
        sentence += 'The suspect committed a hate crime' + pre_text['premesis'] + crime_dict['premesis'][0]
        
    # If there is a premesis it is a vandalism
    elif len(crime_dict) == 3 and crime_dict.get('suspects_actions') == ['vandalized']:
        sentence += 'The suspect vandalized the property of a ' + crime_dict['premesis'][0]
            
    # Check each key and add descriptions to sentence
    else:
        if len(crime_dict) == 3 and not crime_dict.get('suspects_actions') and not sentence_started:
            sentence += 'There was a hate crime toward '
        
        for index in range(len(key_1)):
            category = key_1[index]

            # Check if key category in dictionary
            if checkValLen(crime_dict, category) and checkValLen(pre_text, category):

                # Loop over each MO code description in category
                for j in range(len(crime_dict[category])):

                    # Single use pretext
                    if checkKey(single_use, pre_text[category]):
                        # First use of MO category
                        if not single_use[pre_text[category]]:
                            sentence += pre_text[category] + crime_dict[category][j]
                            single_use[pre_text[category]] = True
                        # Multiple use of MO cateory
                        else:
                            sentence += multiple_text[category] + crime_dict[category][j]

                    else:
                        # First use of MO category
                        if not multi_use[pre_text[category]]:
                            sentence += pre_text[category] + crime_dict[category][j]
                            multi_use[pre_text[category]] = True
                        # Multiple use of MO cateory
                        else:
                            sentence += multiple_text[category] + crime_dict[category][j]

                # Words which follow
                if checkKey(post_text, category):
                    sentence += post_text[category]
    sentence += '.'

    # Additional sentences
    if crime_dict.get('target_victim_was'):
        sentence = generate_victim(sentence, crime_dict)
    if crime_dict.get('weapon'):
        sentence = generate_weapon(sentence, crime_dict)
    if crime_dict.get('bias'):
        sentence = generate_bias(sentence, crime_dict)
    if crime_dict.get('vehicle_involved'):
        sentence = generate_vehicle(sentence, crime_dict)
    if crime_dict.get('evidence'):
        sentence = generate_evidence(sentence, crime_dict)

    sentence = remove_duplicates(sentence)
    sentence = remove_phrases(sentence)
    print('Raw generated sentence:')

    if sentence[:3] == 'and':
        sentence = sentence[4:]

    print(sentence)
    
#     ginger_parser = parser.parse(sentence)
#     if len(ginger_parser.get('corrections')) != 0:
#         sentence = ginger_parser.get('result')
#         print('\nFixed sentence: ')
#         print(sentence)

    return sentence

In [186]:
crime_num = [4]

for i in crime_num:
    generate_report(desc_df.iloc[[-i][0]],crime_num=i,print_dict=True)
    print('')
    print(desc_df.iloc[[-i][0]])


-----------------------------------------------------------------------------------

Crime 4:

Dictionary:  {'suspects_actions': ['graffitied and vandalized'], 'crime_related_to': ['hatred/prejudice'], 'evidence': ['photographs'], 'victim_description': ['59 year-old', 'male'], 'premesis': ['church'], 'damages': ['$399 worth of damage or under'], 'property': ['property of the']}

Generated report:

Raw generated sentence:
The suspect graffitied and vandalized the property of the 59 year-old male victim at a church causing $399 worth of damage or under. There is photographs of the crime.

dr_no                          101604392
date_rptd            2010-01-09 00:00:00
date_occ             2010-01-09 00:00:00
time_occ                               1
area_id                              NaN
                            ...         
longitude                       -118.413
latitude                         34.2884
Bundle Name                    vandalism
Crime Description                   

In [160]:
# Generate all hate crime segments
list_of_reports = []
for i in range(len(desc_df)):
#     print('\n----------------------------------------------------------------------\n\nCrime %i:'%i)
#     print(desc_df.iloc[[-i][0]])

#     list_of_reports.append(generate_report(desc_df.iloc[[-i][0]], crime_num=i,print_dict=True))
    generate_report(desc_df.iloc[[-i][0]], crime_num=i, print_dict=True, specific_code='suspects_actions')
#     generate_report(desc_df.iloc[[-i][0]], crime_num=i, print_dict=True, specific_code='vehicle_involved')
#     generate_report(desc_df.iloc[[-i][0]], crime_num=i, print_dict=True, specific_code='suspects_actions',multi=True)

#     print('')
#     print(desc_df.iloc[[-i][0]])


-----------------------------------------------------------------------------------

Crime 0:

Dictionary:  {'suspects_actions': ['intimidation, sprayed with chemical, used hate-related language, racial slurs and profanities on on'], 'crime_related_to': ['hatred/prejudice'], 'suspect_wore_disguise': ['wore hood/hoodie'], 'victim_description': ['41 year-old', 'black', 'male'], 'weapon': ['mace/pepper spray'], 'premesis': ['market']}

Generated report:

Raw generated sentence:
The suspect intimidated, sprayed with chemical, used hate-related language, racial slurs and profanity on the 41 year-old black male victim at a market. The suspect used a mace/pepper spray.

-----------------------------------------------------------------------------------

Crime 1:

Dictionary:  {'suspects_actions': ['hit, threaten to kill, threaten victims family and used profanities on on'], 'crime_related_to': ['hatred/prejudice'], 'target_victim_was': ['customer'], 'victim_description': ['30 year-old', 'His

Crime 89:

Dictionary:  {'suspects_actions': ['multiple suspects overwhelmed, made unusual statement and hit'], 'crime_related_to': ['hatred/prejudice'], 'victim_description': ['23 year-old', 'black', 'male'], 'premesis': ['liquor store']}

Generated report:

Raw generated sentence:
Multiple suspects overwhelmed, made unusual statement and hit the 23 year-old black male victim at a liquor store.

-----------------------------------------------------------------------------------

Crime 90:

Dictionary:  {'suspects_actions': ['hit'], 'crime_related_to': ['hatred/prejudice'], 'victim_description': ['22 year-old', 'Hispanic', 'female'], 'premesis': ['parking lot']}

Generated report:

Raw generated sentence:
The suspect hit the 22 year-old Hispanic female victim at a parking lot.

-----------------------------------------------------------------------------------

Crime 93:

Dictionary:  {'suspects_actions': ['vandalized'], 'crime_related_to': ['hatred/prejudice'], 'victim_description': [

Crime 163:

Dictionary:  {'suspects_actions': ['threaten to kill and intimidation'], 'crime_related_to': ['hatred/prejudice'], 'victim_description': ['23 year-old', 'Hispanic', 'male'], 'premesis': ['parking lot']}

Generated report:

Raw generated sentence:
The suspect threatened to kill and intimidated the 23 year-old Hispanic male victim at a parking lot.

-----------------------------------------------------------------------------------

Crime 164:

Dictionary:  {'suspects_actions': ['threaten to kill and intimidation'], 'crime_related_to': ['hatred/prejudice'], 'victim_description': ['21 year-old', 'Hispanic', 'male'], 'premesis': ['parking lot']}

Generated report:

Raw generated sentence:
The suspect threatened to kill and intimidated the 21 year-old Hispanic male victim at a parking lot.

-----------------------------------------------------------------------------------

Crime 165:

Dictionary:  {'suspects_actions': ['vandalized'], 'crime_related_to': ['hatred/prejudice'], 'v

Crime 241:

Dictionary:  {'suspects_actions': ['removes vict property'], 'crime_related_to': ['hatred/prejudice'], 'vehicle_involved': ['vehicle involved'], 'evidence': ['evidence booked (any crime)'], 'entry_device': ['open/unlocked'], 'victim_description': ['44 year-old', 'black', 'male'], 'premesis': ['parking underground/building'], 'damages': ['$950 worth of damage & under']}

Generated report:

Raw generated sentence:
The suspect stole the 44 year-old black male victim at a parking underground/building causing $950 worth of damage & under. There was a vehicle involved in the crime. There is evidence booked of the crime.

-----------------------------------------------------------------------------------

Crime 242:

Dictionary:  {'suspects_actions': ['hit'], 'crime_related_to': ['hatred/prejudice'], 'evidence': ['evidence booked (any crime)'], 'suspect_was': ['homeless/transient'], 'victim_description': ['43 year-old', 'black', 'female'], 'weapon': ['scissors'], 'premesis': ['sid

Crime 301:

Dictionary:  {'suspects_actions': ['vandalized'], 'crime_related_to': ['hatred/prejudice'], 'entry_device': ['cutting tool'], 'victim_description': ['29 year-old', 'male'], 'premesis': ['parking lot'], 'damages': ['$400 worth of damage & over'], 'property': ['property of the']}

Generated report:

Raw generated sentence:
The suspect vandalized the property of the 29 year-old male victim at a parking lot causing $400 worth of damage & over.

-----------------------------------------------------------------------------------

Crime 302:

Dictionary:  {'suspects_actions': ['hit, threaten to harm victim (other than kill) and used profanities on on'], 'crime_related_to': ['hatred/prejudice'], 'suspect_was': ['intoxicated/drunk'], 'victim_description': ['33 year-old', 'Hispanic', 'male'], 'premesis': ['local business']}

Generated report:

Raw generated sentence:
The suspect hit, threatened to harm victim but not kill and used profanity on the 33 year-old Hispanic male victim at 

Crime 373:

Dictionary:  {'suspects_actions': ['hit and kicked'], 'crime_related_to': ['hatred/prejudice'], 'victim_description': ['53 year-old', 'black', 'male'], 'premesis': ['sidewalk']}

Generated report:

Raw generated sentence:
The suspect hit and kicked the 53 year-old black male victim at a sidewalk.

-----------------------------------------------------------------------------------

Crime 374:

Dictionary:  {'suspects_actions': ['hit'], 'crime_related_to': ['hatred/prejudice'], 'victim_description': ['29 year-old', 'male'], 'premesis': ['sidewalk']}

Generated report:

Raw generated sentence:
The suspect hit the 29 year-old male victim at a sidewalk.

-----------------------------------------------------------------------------------

Crime 375:

Dictionary:  {'suspects_actions': ['vandalized'], 'crime_related_to': ['hatred/prejudice'], 'victim_description': ['40 year-old', 'female'], 'premesis': ['vehicle, passenger/truck'], 'damages': ['$400 worth of damage & over'], 'prope

Crime 447:

Dictionary:  {'suspect_was': ['homeless/transient'], 'crime_related_to': ['hatred/prejudice'], 'suspects_actions': ['hit'], 'victim_description': ['46 year-old', 'Hispanic', 'male'], 'premesis': ['street']}

Generated report:

Raw generated sentence:
The suspect hit the 46 year-old Hispanic male victim on the street.

-----------------------------------------------------------------------------------

Crime 448:

Dictionary:  {'suspects_actions': ['multiple suspects overwhelmed, attacked from the rear, hit, threaten to kill and used profanities on on'], 'suspects_association_with_victim': ['stranger'], 'crime_related_to': ['hatred/prejudice'], 'target_victim_was': ['homosexual/gay'], 'victim_description': ['2 year-old', 'Hispanic', 'male'], 'weapon': ['stick'], 'premesis': ['street']}

Generated report:

Raw generated sentence:
Multiple suspects overwhelmed, attacked from the rear, hit, threatened to kill and used profanity on the 2 year-old Hispanic male victim on the stre

Crime 494:

Dictionary:  {'suspects_actions': ['vandalized, graffitied and intimidation'], 'crime_related_to': ['hatred/prejudice'], 'victim_description': ['32 year-old', 'Hispanic', 'female'], 'premesis': ['vehicle, passenger/truck'], 'damages': ['$400 worth of damage & over'], 'property': ['property of the']}

Generated report:

Raw generated sentence:
The suspect vandalized, graffitied and intimidated the property of the 32 year-old Hispanic female victim at a vehicle, passenger/truck causing $400 worth of damage & over.

-----------------------------------------------------------------------------------

Crime 495:

Dictionary:  {'suspects_actions': ['spits on victim'], 'crime_related_to': ['hatred/prejudice'], 'victim_description': ['32 year-old', 'white', 'female'], 'premesis': ['sidewalk']}

Generated report:

Raw generated sentence:
The suspect spat on the 32 year-old white female victim at a sidewalk.

---------------------------------------------------------------------------

Crime 562:

Dictionary:  {'suspects_actions': ['vandalized'], 'crime_related_to': ['hatred/prejudice'], 'vehicle_involved': ['vehicle involved'], 'victim_description': ['45 year-old', 'black', 'male'], 'premesis': ['vehicle, passenger/truck'], 'damages': ['$400 worth of damage & over'], 'property': ['property of the']}

Generated report:

Raw generated sentence:
The suspect vandalized the property of the 45 year-old black male victim at a vehicle, passenger/truck causing $400 worth of damage & over. There was a vehicle involved in the crime.

-----------------------------------------------------------------------------------

Crime 563:

Dictionary:  {'crime_related_to': ['hatred/prejudice'], 'suspects_actions': ['spits on victim'], 'suspect_was': ['homeless/transient'], 'target_victim_was': ['is 6 years old thru 13 years old'], 'victim_description': ['7 year-old', 'Hispanic', 'female'], 'premesis': ['other store']}

Generated report:

Raw generated sentence:
The suspect spat on the 7 


-----------------------------------------------------------------------------------

Crime 620:

Dictionary:  {'suspects_actions': ['choked/uses choke hold/strangulation, threaten to kill, victim knocked to ground, pushed and swung weapon'], 'crime_related_to': ['hatred/prejudice', 'knew the suspect'], 'suspects_association_with_victim': ['knew the suspect'], 'target_victim_was': ['homosexual/gay'], 'victim_description': ['36 year-old', 'Hispanic', 'female'], 'premesis': ['house']}

Generated report:

Raw generated sentence:
The suspect choked, threatened to kill, victim knocked to ground, pushed and swung weapon the 36 year-old Hispanic female victim at a house. The victim was homosexual/gay.

-----------------------------------------------------------------------------------

Crime 624:

Dictionary:  {'target_victim_was': ['homosexual/gay'], 'suspects_actions': ['hit and brandished their weapon'], 'crime_related_to': ['hate incident', 'hatred/prejudice'], 'victim_description': ['37 

Crime 681:

Dictionary:  {'suspects_actions': ['stole, hit, kicked, victim knocked to ground and took driver license or ID'], 'crime_related_to': ['hatred/prejudice'], 'victim_description': ['48 year-old', 'white', 'male'], 'weapon': ['unknown weapon'], 'premesis': ['parking lot']}

Generated report:

Raw generated sentence:
The suspect stole, hit, kicked, victim knocked to ground and took driver license or ID the 48 year-old white male victim at a parking lot. The suspect used a unknown weapon.

-----------------------------------------------------------------------------------

Crime 682:

Dictionary:  {'crime_related_to': ['hatred/prejudice'], 'suspects_actions': ['vandalized and graffitied'], 'victim_description': ['white', 'male'], 'premesis': ['high school'], 'damages': ['$399 worth of damage or under'], 'property': ['property of the']}

Generated report:

Raw generated sentence:
The suspect vandalized and graffitied the property of the white male victim at a high school causing 

Crime 743:

Dictionary:  {'crime_related_to': ['hatred/prejudice'], 'suspects_actions': ['hit, kicked, multiple suspects overwhelmed, brandished their weapon, swung fist and used profanities on on'], 'evidence': ['evidence booked (any crime)'], 'suspects_association_with_victim': ['acquaintance'], 'victim_description': ['22 year-old', 'Hispanic', 'male'], 'premesis': ['street']}

Generated report:

Raw generated sentence:
Mit, kicked, multiple suspects overwhelmed, brandished their weapon, swung fist and used profanity on the 22 year-old Hispanic male victim on the street. There is evidence booked of the crime.

-----------------------------------------------------------------------------------

Crime 744:

Dictionary:  {'crime_related_to': ['hatred/prejudice'], 'suspects_actions': ['vandalized'], 'victim_description': ['63 year-old', 'white', 'male'], 'premesis': ['local business'], 'damages': ['$399 worth of damage or under'], 'property': ['property of the']}

Generated report:

Raw 

Crime 804:

Dictionary:  {'crime_related_to': ['hatred/prejudice'], 'suspect_was': ['transgender'], 'suspects_actions': ['threw object at victim'], 'victim_description': ['26 year-old', 'Hispanic', 'female'], 'weapon': ['verbal threat'], 'premesis': ['street']}

Generated report:

Raw generated sentence:
The threw object at the 26 year-old Hispanic female victim on the street. The suspect used a verbal threat.

-----------------------------------------------------------------------------------

Crime 807:

Dictionary:  {'evidence': ['photographs'], 'crime_related_to': ['knew the suspect', 'hatred/prejudice'], 'suspects_association_with_victim': ['knew the suspect'], 'suspects_biological_relationship_to_victim': ["is victim's father"], 'target_victim_was': ['homosexual/gay'], 'suspects_actions': ['hit'], 'victim_description': ['19 year-old', 'female'], 'premesis': ['house']}

Generated report:

Raw generated sentence:
The suspect hit the 19 year-old female victim at a house. The victim 

Crime 864:

Dictionary:  {'suspects_association_with_victim': ['stranger'], 'suspects_actions': ['hit'], 'crime_related_to': ['hatred/prejudice'], 'victim_description': ['42 year-old', 'black', 'male'], 'premesis': ['street']}

Generated report:

Raw generated sentence:
The suspect hit the 42 year-old black male victim on the street.

-----------------------------------------------------------------------------------

Crime 865:

Dictionary:  {'crime_related_to': ['hatred/prejudice'], 'suspects_actions': ['kicked, victim knocked to ground, hit and attacked from the rear'], 'victim_description': ['30 year-old', 'male'], 'premesis': ['street']}

Generated report:

Raw generated sentence:
The suspect kicked, victim knocked to ground, hit and attacked from the rear the 30 year-old male victim on the street.

-----------------------------------------------------------------------------------

Crime 866:

Dictionary:  {'suspects_actions': ['multiple suspects overwhelmed, threw object at vict


-----------------------------------------------------------------------------------

Crime 926:

Dictionary:  {'crime_related_to': ['hatred/prejudice', 'knew the suspect'], 'suspects_actions': ['grabbed and used profanities on on'], 'suspects_association_with_victim': ['is neighbor', 'knew the suspect'], 'sex_related_acts': ['made sexually suggestive remarks', 'fondles self'], 'victim_description': ['54 year-old', 'Hispanic', 'female'], 'premesis': ['apartment']}

Generated report:

Raw generated sentence:
The suspect grabbed and used profanity on the 54 year-old Hispanic female victim at a apartment.

-----------------------------------------------------------------------------------

Crime 927:

Dictionary:  {'crime_related_to': ['hatred/prejudice', 'gangs'], 'evidence': ['evidence booked (any crime)'], 'suspects_actions': ['brandished their weapon, intimidation, aimed gun, rode bike and used profanities on on'], 'suspects_association_with_victim': ['is neighbor'], 'victim_descripti


-----------------------------------------------------------------------------------

Crime 983:

Dictionary:  {'suspects_actions': ['threaten to harm victim (other than kill)'], 'computer_crimes': ['threatening e-mail/text messages', 'harrassing e-mail/text message/other electronic communication'], 'crime_related_to': ['knew the suspect', 'hate incident', 'hatred/prejudice'], 'suspects_association_with_victim': ['knew the suspect'], 'victim_description': ['45 year-old', 'white', 'male'], 'weapon': ['verbal threat'], 'premesis': ['college/junior college/university']}

Generated report:

Raw generated sentence:
The suspect threatened to harm victim but not killThe suspect sent threatening e-mail/text messages and harrassing e-mail/text message/other electronic communication of the 45 year-old white male victim at a college/junior college/university. The suspect used a verbal threat.

-----------------------------------------------------------------------------------

Crime 984:

Diction

Dictionary:  {'evidence': ['evidence booked (any crime)'], 'crime_related_to': ['hatred/prejudice', 'gangs'], 'suspects_association_with_victim': ['stranger', 'is/was known gang member'], 'suspects_actions': ['hit, threaten to kill, brandished their weapon and swung weapon'], 'victim_description': ['45 year-old', 'Hispanic', 'male'], 'premesis': ['parking lot']}

Generated report:

Raw generated sentence:
The suspect hit, threatened to kill, brandished their weapon and swung weapon the 45 year-old Hispanic male victim at a parking lot. There is evidence booked of the crime.

-----------------------------------------------------------------------------------

Crime 1024:

Dictionary:  {'suspects_actions': ['hit, made gang related statement, spoke Spanish, kicked and intimidation'], 'target_victim_was': ['was a student'], 'crime_related_to': ['hatred/prejudice'], 'victim_description': ['25 year-old', 'male'], 'premesis': ['sidewalk']}

Generated report:

Raw generated sentence:
The suspe

Crime 1076:

Dictionary:  {'crime_related_to': ['hatred/prejudice', 'knew the suspect'], 'dispute_involved': ['landlord/tenant/neighbor'], 'suspects_association_with_victim': ['is neighbor', 'knew the suspect'], 'suspects_actions': ['hit'], 'victim_description': ['37 year-old', 'Hispanic', 'male'], 'premesis': ['apartment']}

Generated report:

Raw generated sentence:
The suspect hit the 37 year-old Hispanic male victim at a apartment.

-----------------------------------------------------------------------------------

Crime 1077:

Dictionary:  {'crime_related_to': ['hatred/prejudice', 'knew the suspect'], 'dispute_involved': ['landlord/tenant/neighbor'], 'suspects_association_with_victim': ['is neighbor', 'knew the suspect'], 'suspects_actions': ['hit'], 'victim_description': ['23 year-old', 'Hispanic', 'male'], 'premesis': ['apartment']}

Generated report:

Raw generated sentence:
The suspect hit the 23 year-old Hispanic male victim at a apartment.

---------------------------------


-----------------------------------------------------------------------------------

Crime 1144:

Dictionary:  {'suspects_actions': ['vandalized and graffitied'], 'vehicle_involved': ['vehicle involved'], 'crime_related_to': ['hatred/prejudice', 'gangs'], 'victim_description': ['36 year-old', 'female'], 'premesis': ['street'], 'damages': ['$400 worth of damage & over'], 'property': ['property of the']}

Generated report:

Raw generated sentence:
The suspect vandalized and graffitied the property of the 36 year-old female victim on the street causing $400 worth of damage & over. There was a vehicle involved in the crime.

-----------------------------------------------------------------------------------

Crime 1145:

Dictionary:  {'suspects_actions': ['vandalized'], 'crime_related_to': ['hatred/prejudice'], 'victim_description': ['43 year-old', 'male'], 'premesis': ['elementary school'], 'damages': ['$400 worth of damage & over'], 'property': ['property of the']}

Generated report:

R

Crime 1207:

Dictionary:  {'suspects_actions': ['threaten to kill'], 'crime_related_to': ['hatred/prejudice'], 'victim_description': ['53 year-old', 'male'], 'weapon': ['verbal threat'], 'premesis': ['house']}

Generated report:

Raw generated sentence:
The suspect threatened to kill the 53 year-old male victim at a house. The suspect used a verbal threat.

-----------------------------------------------------------------------------------

Crime 1208:

Dictionary:  {'crime_related_to': ['knew the suspect', 'hatred/prejudice'], 'suspects_association_with_victim': ['knew the suspect'], 'suspects_actions': ['hit and spat on'], 'victim_description': ['24 year-old', 'black', 'male'], 'premesis': ['sidewalk']}

Generated report:

Raw generated sentence:
The suspect hit and spat on the 24 year-old black male victim at a sidewalk.

-----------------------------------------------------------------------------------

Crime 1209:

Dictionary:  {'crime_related_to': ['hatred/prejudice'], 'vehicle_

Crime 1281:

Dictionary:  {'suspects_actions': ['hit'], 'crime_related_to': ['hatred/prejudice'], 'victim_description': ['39 year-old', 'Hispanic', 'male'], 'premesis': ['sidewalk']}

Generated report:

Raw generated sentence:
The suspect hit the 39 year-old Hispanic male victim at a sidewalk.

-----------------------------------------------------------------------------------

Crime 1282:

Dictionary:  {'suspects_association_with_victim': ['stranger'], 'suspect_wore_disguise': ['wore hood/hoodie'], 'suspects_actions': ['made unusual statement and aimed gun'], 'target_victim_was': ['leaving business area'], 'crime_related_to': ['hatred/prejudice'], 'victim_description': ['41 year-old', 'black', 'female'], 'weapon': ['hand gun'], 'premesis': ['bus stop']}

Generated report:

Raw generated sentence:
The suspect made unusual statement and aimed gun the 41 year-old black female victim at a bus stop. The victim was leaving business area. The suspect used a hand gun.

-----------------------

Crime 1331:

Dictionary:  {'crime_related_to': ['hatred/prejudice'], 'suspects_association_with_victim': ['stranger'], 'suspects_actions': ['vandalized and graffitied'], 'premesis': ['bank'], 'damages': ['$400 worth of damage & over'], 'property': ['property of the']}

Generated report:

Raw generated sentence:
The suspect vandalized and graffitied the property of the at a bank causing $400 worth of damage & over.

-----------------------------------------------------------------------------------

Crime 1332:

Dictionary:  {'suspects_association_with_victim': ['stranger'], 'suspects_actions': ['vandalized and graffitied'], 'computer_crimes': ['hate crime materials/printouts/e-mails'], 'crime_related_to': ['hatred/prejudice'], 'premesis': ['local business'], 'damages': ['$399 worth of damage or under'], 'property': ['property of the']}

Generated report:

Raw generated sentence:
The suspect vandalized and graffitiedThe suspect sent hate crime materials/printouts/e-mails of the property

Crime 1386:

Dictionary:  {'suspects_actions': ['spits on victim'], 'crime_related_to': ['hatred/prejudice'], 'victim_description': ['43 year-old', 'black', 'male'], 'weapon': ['unknown weapon'], 'premesis': ['sidewalk']}

Generated report:

Raw generated sentence:
The suspect spat on the 43 year-old black male victim at a sidewalk. The suspect used a unknown weapon.

-----------------------------------------------------------------------------------

Crime 1387:

Dictionary:  {'suspects_actions': ['vandalized'], 'crime_related_to': ['hatred/prejudice'], 'victim_description': ['65 year-old', 'black', 'female'], 'premesis': ['junior high school'], 'damages': ['$399 worth of damage or under'], 'property': ['property of the']}

Generated report:

Raw generated sentence:
The suspect vandalized the property of the 65 year-old black female victim at a junior high school causing $399 worth of damage or under.

-----------------------------------------------------------------------------------

Crime 1434:

Dictionary:  {'suspects_actions': ['threaten to kill, intimidation and brandished their weapon'], 'crime_related_to': ['hatred/prejudice'], 'target_victim_was': ['homosexual/gay'], 'victim_description': ['41 year-old', 'Hispanic', 'female'], 'weapon': ['verbal threat'], 'premesis': ['sidewalk']}

Generated report:

Raw generated sentence:
The suspect threatened to kill, intimidated and brandished their weapon the 41 year-old Hispanic female victim at a sidewalk. The victim was homosexual/gay. The suspect used a verbal threat.

-----------------------------------------------------------------------------------

Crime 1435:

Dictionary:  {'crime_related_to': ['hatred/prejudice'], 'suspects_actions': ['vandalized and hit'], 'victim_description': ['33 year-old', 'male'], 'premesis': ['local business'], 'damages': ['$399 worth of damage or under'], 'property': ['property of the']}

Generated report:

Raw generated sentence:
The suspect vandalized and hit the property of the 33 

Crime 1479:

Dictionary:  {'suspects_actions': ['brandished their weapon, threaten to kill, pushed, attacked from the rear, spoke Spanish and used profanities on on'], 'crime_related_to': ['hatred/prejudice'], 'target_victim_was': ['homosexual/gay'], 'dispute_involved': ['landlord/tenant/neighbor'], 'victim_description': ['31 year-old', 'Hispanic', 'male'], 'weapon': ['verbal threat'], 'premesis': ['yard (residential/business)']}

Generated report:

Raw generated sentence:
The suspect brandished their weapon, threatened to kill, pushed, attacked from the rear, spoke Spanish and used profanity on the 31 year-old Hispanic male victim at a yard (residential/business). The victim was homosexual/gay. The suspect used a verbal threat.

-----------------------------------------------------------------------------------

Crime 1480:

Dictionary:  {'suspects_actions': ['vandalized'], 'crime_related_to': ['hatred/prejudice'], 'victim_description': ['84 year-old', 'white', 'female'], 'premesis': 

Crime 1532:

Dictionary:  {'victims_actions': ['used hate-related language'], 'suspects_actions': ['grabbed, attacked from the rear and used profanities on on'], 'crime_related_to': ['hatred/prejudice'], 'victim_description': ['48 year-old', 'Hispanic', 'female'], 'premesis': ['sidewalk']}

Generated report:

Raw generated sentence:
The suspect grabbed, attacked from the rear and used profanity on the 48 year-old Hispanic female victim at a sidewalk.

-----------------------------------------------------------------------------------

Crime 1533:

Dictionary:  {'crime_related_to': ['public transit (metrolink/train station,metro rail red line,metro rail blue line,subway,station,adjacent transit parking lots, tracks/tunnels,mta(rtd), and other municipal lines.', 'hatred/prejudice'], 'suspects_actions': ['threaten to kill and hit'], 'victim_description': ['29 year-old', 'black', 'male'], 'premesis': ['street']}

Generated report:

Raw generated sentence:
The suspect threatened to kill and

Crime 1585:

Dictionary:  {'suspects_actions': ['threaten to harm victim (other than kill) and hit'], 'crime_related_to': ['hatred/prejudice'], 'suspect_was': ['juvenile'], 'target_victim_was': ['targeted based on race/ethnicity/ancestry'], 'bias': ['anti-white'], 'victim_description': ['57 year-old', 'white', 'female'], 'premesis': ['restaurant']}

Generated report:

Raw generated sentence:
The suspect threatened to harm victim but not kill and hit the 57 year-old white female victim at a restaurant. The victim was targeted based on race/ethnicity/ancestry. The crime was classified by the LAPD as a hate crime with an "anti-white" bias.

-----------------------------------------------------------------------------------

Crime 1586:

Dictionary:  {'crime_related_to': ['hatred/prejudice'], 'suspects_association_with_victim': ['stranger'], 'suspects_actions': ['snatched chain, brandished their weapon, hit, kicked and took clothes or jewelry'], 'target_victim_was': ['targeted based on rac

Crime 1630:

Dictionary:  {'crime_related_to': ['hatred/prejudice'], 'suspects_association_with_victim': ['stranger'], 'suspects_actions': ['brandished their weapon, swung weapon, threaten to kill, cut/stabbed, used racial slurs and hate-related language on'], 'target_victim_was': ['targeted based on race/ethnicity/ancestry'], 'bias': ['anti-black or african american'], 'victim_description': ['46 year-old', 'black', 'male'], 'weapon': ['knife'], 'premesis': ['street']}

Generated report:

Raw generated sentence:
The suspect brandished their weapon, swung weapon, threatened to kill, cut/stabbed, used racial slurs and hate-related language on the 46 year-old black male victim on the street. The victim was targeted based on race/ethnicity/ancestry. The suspect used a knife. The crime was classified by the LAPD as a hate crime with an "anti-black or african american" bias.

-----------------------------------------------------------------------------------

Crime 1631:

Dictionary:  {'susp

Dictionary:  {'crime_related_to': ['hatred/prejudice'], 'computer_crimes': ['threatening e-mail/text messages'], 'evidence': ['evidence booked (any crime)'], 'suspects_actions': ['threaten to kill'], 'target_victim_was': ['targeted based on race/ethnicity/ancestry'], 'bias': ['anti-black or african american'], 'victim_description': ['28 year-old', 'black', 'male'], 'weapon': ['verbal threat'], 'premesis': ['apartment']}

Generated report:

Raw generated sentence:
The suspect threatened to killThe suspect sent threatening e-mail/text messages of the 28 year-old black male victim at a apartment. The victim was targeted based on race/ethnicity/ancestry. The suspect used a verbal threat. The crime was classified by the LAPD as a hate crime with an "anti-black or african american" bias. There is evidence booked of the crime.

-----------------------------------------------------------------------------------

Crime 1673:

Dictionary:  {'crime_related_to': ['hatred/prejudice'], 'suspects_act

Crime 1713:

Dictionary:  {'suspects_actions': ['hit'], 'crime_related_to': ['hatred/prejudice'], 'victim_description': ['59 year-old', 'black', 'male'], 'premesis': ['street']}

Generated report:

Raw generated sentence:
The suspect hit the 59 year-old black male victim on the street.

-----------------------------------------------------------------------------------

Crime 1714:

Dictionary:  {'suspects_actions': ['hit, threaten to kill and used hate-related language on'], 'crime_related_to': ['hatred/prejudice'], 'target_victim_was': ['targeted based on religion'], 'bias': ['anti-jewish'], 'victim_description': ['49 year-old', 'white', 'male'], 'weapon': ['verbal threat'], 'premesis': ['gas station']}

Generated report:

Raw generated sentence:
The suspect hit, threatened to kill and used hate-related language on the 49 year-old white male victim at a gas station. The victim was targeted based on religion. The suspect used a verbal threat. The crime was classified by the LAPD as a 

Crime 1757:

Dictionary:  {'suspects_actions': ['hit and pushed'], 'suspect_was': ['homeless/transient'], 'target_victim_was': ['aged (60 & over) or blind/crippled/unable to care for self', 'targeted based on race/ethnicity/ancestry'], 'crime_related_to': ['hatred/prejudice'], 'bias': ['anti-hispanic or latino'], 'victim_description': ['68 year-old', 'Hispanic', 'male'], 'premesis': ['street']}

Generated report:

Raw generated sentence:
The suspect hit and pushed the 68 year-old Hispanic male victim on the street. The victim was aged (60 & over) or blind/crippled/unable to care for self and targeted based on race/ethnicity/ancestry. The crime was classified by the LAPD as a hate crime with an "anti-hispanic or latino" bias.

-----------------------------------------------------------------------------------

Crime 1758:

Dictionary:  {'suspects_actions': ['hit'], 'crime_related_to': ['hatred/prejudice'], 'target_victim_was': ['targeted based on sexual orientation'], 'bias': ['anti-gay

Crime 1802:

Dictionary:  {'suspects_actions': ['hate-related language'], 'crime_related_to': ['hatred/prejudice'], 'target_victim_was': ['targeted based on race/ethnicity/ancestry'], 'bias': ['anti-black or african american'], 'victim_description': ['69 year-old', 'black', 'female'], 'premesis': ['parking lot']}

Generated report:

Raw generated sentence:
The suspect hate-related language the 69 year-old black female victim at a parking lot. The victim was targeted based on race/ethnicity/ancestry. The crime was classified by the LAPD as a hate crime with an "anti-black or african american" bias.

-----------------------------------------------------------------------------------

Crime 1803:

Dictionary:  {'crime_related_to': ['hatred/prejudice'], 'suspects_actions': ['vandalized'], 'target_victim_was': ['targeted based on religion'], 'bias': ['anti-jewish'], 'victim_description': ['46 year-old', 'white', 'female'], 'premesis': ['apartment'], 'damages': ['$400 worth of damage & over'

Crime 1850:

Dictionary:  {'suspects_actions': ['shot at victim (no hits) and used hate-related language on'], 'crime_related_to': ['hatred/prejudice', 'gangs', 'shots fired'], 'bias': ['anti-hispanic or latino'], 'target_victim_was': ['targeted based on race/ethnicity/ancestry'], 'evidence': ['bullets/casings', 'firearm booked as evidence'], 'victim_description': ['24 year-old', 'Hispanic', 'male'], 'weapon': ['firearm'], 'premesis': ['street']}

Generated report:

Raw generated sentence:
The shot at victim (no hits) and used hate-related language on the 24 year-old Hispanic male victim on the street. The victim was targeted based on race/ethnicity/ancestry. The suspect used a firearm. The crime was classified by the LAPD as a hate crime with an "anti-hispanic or latino" bias. There is bullets/casings and firearm booked as evidence of the crime.

-----------------------------------------------------------------------------------

Crime 1851:

Dictionary:  {'crime_related_to': ['hatred

Crime 1895:

Dictionary:  {'suspects_actions': ['brandished their weapon, intimidation and threaten to kill'], 'crime_related_to': ['hatred/prejudice'], 'target_victim_was': ['targeted based on gender non-conforming'], 'bias': ['anti-transgender'], 'victim_description': ['36 year-old', 'black', 'female'], 'weapon': ['verbal threat'], 'premesis': ['sidewalk']}

Generated report:

Raw generated sentence:
The suspect brandished their weapon, intimidated and threatened to kill the 36 year-old black female victim at a sidewalk. The victim was targeted based on gender non-conforming. The suspect used a verbal threat. The crime was classified by the LAPD as a hate crime with an "anti-transgender" bias.

-----------------------------------------------------------------------------------

Crime 1896:

Dictionary:  {'suspects_actions': ['vandalized and used hate-related language on'], 'crime_related_to': ['hatred/prejudice'], 'target_victim_was': ['targeted based on race/ethnicity/ancestry'], 'b


-----------------------------------------------------------------------------------

Crime 1936:

Dictionary:  {'suspects_actions': ['hit, grabbed, stole and victim knocked to ground'], 'crime_related_to': ['hatred/prejudice', 'public transit (metrolink/train station,metro rail red line,metro rail blue line,subway,station,adjacent transit parking lots, tracks/tunnels,mta(rtd), and other municipal lines.'], 'target_victim_was': ['was homeless/transient', 'targeted based on sexual orientation'], 'bias': ['anti-lesbian/gay/bisexual or transgender (mixed group)'], 'victim_description': ['44 year-old', 'Hispanic', 'male'], 'premesis': ['street']}

Generated report:

Raw generated sentence:
The suspect hit, grabbed, stole and victim knocked to ground the 44 year-old Hispanic male victim on the street. The victim was homeless/transient and targeted based on sexual orientation. The crime was classified by the LAPD as a hate crime with an "anti-lesbian/gay/bisexual or transgender (mixed group)

Crime 1983:

Dictionary:  {'crime_related_to': ['hatred/prejudice'], 'suspects_actions': ['hate-related language'], 'victim_description': ['39 year-old', 'white', 'male'], 'premesis': ['health spa/gym']}

Generated report:

Raw generated sentence:
The suspect hate-related language the 39 year-old white male victim at a health spa/gym.

-----------------------------------------------------------------------------------

Crime 1984:

Dictionary:  {'target_victim_was': ['spouse', 'co-habitants', 'aged (60 & over) or blind/crippled/unable to care for self'], 'suspects_association_with_victim': ['susp is/was current/former spouse/co-habitant', 'spouse', 'knew the suspect'], 'evidence': ['evidence booked (any crime)'], 'suspects_actions': ['cut/stabbed'], 'crime_related_to': ['knew the suspect', 'hatred/prejudice'], 'victim_description': ['65 year-old', 'black', 'female'], 'weapon': ['kitchen knife'], 'premesis': ['apartment']}

Generated report:

Raw generated sentence:
The suspect cut/stab


-----------------------------------------------------------------------------------

Crime 2036:

Dictionary:  {'suspects_actions': ['spat on and used hate-related language on'], 'suspect_was': ['homeless/transient', 'aged (60+over)'], 'target_victim_was': ['targeted based on race/ethnicity/ancestry', 'uber/lyft driver'], 'bias': ['anti-hispanic or latino'], 'evidence': ['photographs'], 'crime_related_to': ['hatred/prejudice'], 'victim_description': ['25 year-old', 'Hispanic', 'male'], 'weapon': ['unknown weapon'], 'premesis': ['street']}

Generated report:

Raw generated sentence:
The suspect spat on and used hate-related language on the 25 year-old Hispanic male victim on the street. The victim was targeted based on race/ethnicity/ancestry and Uber or Lyft driver. The suspect used a unknown weapon. The crime was classified by the LAPD as a hate crime with an "anti-hispanic or latino" bias. There is photographs of the crime.

----------------------------------------------------------

Crime 2084:

Dictionary:  {'target_victim_was': ['targeted based on gender', 'is 6 years old thru 13 years old'], 'bias': ['anti-hispanic or latino'], 'crime_related_to': ['hatred/prejudice'], 'suspects_actions': ['threaten to harm victim (other than kill) and intimidation'], 'suspect_was': ['juvenile'], 'victim_description': ['5 year-old', 'Hispanic', 'female'], 'weapon': ['verbal threat'], 'premesis': ['project/tenement/public housing']}

Generated report:

Raw generated sentence:
The suspect threatened to harm victim but not kill and intimidated the 5 year-old Hispanic female victim at a project/tenement/public housing. The victim was targeted based on gender and is 6 years old thru 13 years old. The suspect used a verbal threat. The crime was classified by the LAPD as a hate crime with an "anti-hispanic or latino" bias.

-----------------------------------------------------------------------------------

Crime 2085:

Dictionary:  {'target_victim_was': ['targeted based on gender', '

Crime 2134:

Dictionary:  {'target_victim_was': ['targeted based on sexual orientation'], 'bias': ['anti-gay (male)'], 'suspects_actions': ['multiple suspects overwhelmed, hit and used hate-related language on'], 'crime_related_to': ['hatred/prejudice'], 'suspect_wore_disguise': ['wore hood/hoodie'], 'suspects_association_with_victim': ['stranger'], 'victim_description': ['23 year-old', 'Hispanic', 'male'], 'premesis': ['sidewalk']}

Generated report:

Raw generated sentence:
Multiple suspects overwhelmed, hit and used hate-related language on the 23 year-old Hispanic male victim at a sidewalk. The victim was targeted based on sexual orientation. The crime was classified by the LAPD as a hate crime with an "anti-gay (male)" bias.

-----------------------------------------------------------------------------------

Crime 2135:

Dictionary:  {'target_victim_was': ['transgender'], 'crime_related_to': ['hatred/prejudice'], 'suspects_actions': ['aimed gun, spoke Spanish, kicked and used pro

Crime 2190:

Dictionary:  {'suspect_was': ['homeless/transient'], 'suspects_actions': ['followed, used profanities on and hate-related language on'], 'bias': ['anti-gay (male)'], 'target_victim_was': ['was homeless/transient', 'targeted based on sexual orientation'], 'crime_related_to': ['hatred/prejudice'], 'victim_description': ['51 year-old', 'white', 'male'], 'premesis': ['sidewalk']}

Generated report:

Raw generated sentence:
The suspect followed, used profanity on and hate-related language on the 51 year-old white male victim at a sidewalk. The victim was homeless/transient and targeted based on sexual orientation. The crime was classified by the LAPD as a hate crime with an "anti-gay (male)" bias.

-----------------------------------------------------------------------------------

Crime 2191:

Dictionary:  {'target_victim_was': ['targeted based on religion', 'aged (60 & over) or blind/crippled/unable to care for self'], 'bias': ['anti-jewish'], 'crime_related_to': ['hatred/pre

Crime 2227:

Dictionary:  {'bias': ['anti-gay (male)'], 'target_victim_was': ['aged (60 & over) or blind/crippled/unable to care for self', 'targeted based on sexual orientation'], 'suspects_association_with_victim': ['stranger'], 'vehicle_involved': ['vehicle involved'], 'suspects_actions': ['hit and used profanities on on'], 'crime_related_to': ['hatred/prejudice'], 'victim_description': ['61 year-old', 'white', 'male'], 'weapon': ['unknown weapon'], 'premesis': ['sidewalk']}

Generated report:

Raw generated sentence:
The suspect hit and used profanity on the 61 year-old white male victim at a sidewalk. The victim was aged (60 & over) or blind/crippled/unable to care for self and targeted based on sexual orientation. The suspect used a unknown weapon. The crime was classified by the LAPD as a hate crime with an "anti-gay (male)" bias. There was a vehicle involved in the crime.

-----------------------------------------------------------------------------------

Crime 2228:

Dictiona

Crime 2274:

Dictionary:  {'target_victim_was': ['targeted based on gender non-conforming'], 'crime_related_to': ['hatred/prejudice'], 'suspects_actions': ['threaten to kill'], 'bias': ['anti-gender non-conforming'], 'victim_description': ['37 year-old', 'black', 'female'], 'weapon': ['verbal threat'], 'premesis': ['house']}

Generated report:

Raw generated sentence:
The suspect threatened to kill the 37 year-old black female victim at a house. The victim was targeted based on gender non-conforming. The suspect used a verbal threat. The crime was classified by the LAPD as a hate crime with an "anti-gender non-conforming" bias.

-----------------------------------------------------------------------------------

Crime 2275:

Dictionary:  {'suspects_actions': ['vandalized'], 'crime_related_to': ['hatred/prejudice'], 'target_victim_was': ['targeted based on religion'], 'bias': ['anti-other christian'], 'premesis': ['church (changed 03-03 from church/temple)'], 'damages': ['$400 worth of 

In [150]:
# Crimes numbers to work on
# Errors
'''
2 Text: The suspect threatens to kill and racial slurs and hate-related language and profanity used the 25 year-old Hispanic female victim at a restaurant.
3 """"
4 Text: suspect hit and attacks from rear the 44
5 
9 Text: The suspect intimidation and spits on victim and hate-related language the 31 year-old Hispanic female victim at a mta - red line - hollywood/highland.
11
12 Text: There was an in vehicle in the crime.
13 Text: There was a vehicle involved in the crime. There are photographs of the crime.
14 Text: The suspect hit and threaten to harm the victim (other than kill) and hate-related language the 54 year-old black male victim at a other store.
16 Text: The suspect hate-related language and vandalized the property of the at a synagogue/temple
17 Text: the property of the at a local business
18 Missing information: suspect is intoxicated or drunk
19 Text: suspect graffiti the property
20 Text: suspect racial slurs and graffiti and removes Vict property and vandalized and profanity used in an elementary school
21 Text: graffiti the property 
22 Text: suspect threw objects at the victim and pushed and hit and profanity used and spits on victim the 17 year-old female victim of a ban
   Remove victim age: is 14 years old thru 17 years old
23 Text: suspect threw objects at victim and pushed and hit and profanity used and spits on victim 
27 Text: and graffiti the property of the at a high school
28 Text: property of the at a bar or nightclub
30 Text: There was a uses vehicle in the crime.
31 Text: suspect swung weapon the 50 year-old Hispanic female victim of a street.
32 Text: suspect swung weapon the female victim of a street.
33 Text: suspect swung weapon the 10 year-old Hispanic female victim of a street.
   Remove victim age
35 Text: The suspect sent threatening emails/text messages of at a local business.
36 Text: The suspect hit and multiple suspects overwhelm and profanity used the 58 year-old Hispanic male victim in a parking lot
37 Text: suspect, victim knocked to the ground and pushed and kicked and hit and removes Vict property and attacks from the rear the 28 year-old white male victi
38 Text: The suspect kicked and aimed gun and profanity used and speaks spanish the 20 year-old Hispanic male victim at a parking lot
39 Text: The suspect hit and hate-related language and multiple suspects overwhelm the 23 year-old Hispanic male victim at a sidewalk.z
40 Text: Threw objects at victim the 19 year-old Hispanic female victim 
41 Text: suspect hit and threw objects at victim the 23 year-old black male victim
42 Text: suspect threatens to harm the victim (other than kill) and threw objects at the victim and bit and multiple suspects overwhelm and racial slurs
   Text: There was a uses vehicle in the crime.
44 Text: suspect bit and victim knocked to the ground and spits on victim and racial slurs and hate-related language the 28 year-old black male victim of a street
   Text: There was a uses vehicle in the crime.
45 Text: suspect pulled victim's hair and hit and profanity used and speaks Spanish and vandalized and hate-related language the 23 year-old Hispanic victim of a street

'''

'\n2 Text: The suspect threatens to kill and racial slurs and hate-related language and profanity used the 25 year-old Hispanic female victim at a restaurant.\n3 """"\n4 Text: suspect hit and attacks from rear the 44\n5 \n9 Text: The suspect intimidation and spits on victim and hate-related language the 31 year-old Hispanic female victim at a mta - red line - hollywood/highland.\n11\n12 Text: There was an in vehicle in the crime.\n13 Text: There was a vehicle involved in the crime. There are photographs of the crime.\n14 Text: The suspect hit and threaten to harm the victim (other than kill) and hate-related language the 54 year-old black male victim at a other store.\n16 Text: The suspect hate-related language and vandalized the property of the at a synagogue/temple\n17 Text: the property of the at a local business\n18 Missing information: suspect is intoxicated or drunk\n19 Text: suspect graffiti the property\n20 Text: suspect racial slurs and graffiti and removes Vict property and v

## Grammarbot

In [438]:
text = "The suspect threaten to kill and racial slurs and hate-related language and profanity used the \
25 year-old Hispanic female victim in/at/on a restaurant."

In [None]:
from grammarbot import GrammarBotClient

In [None]:
client = GrammarBotClient()

In [None]:
res = client.check(text)

In [None]:
# Inspecting the GrammarBotApiResponse object
# ===========================================

# check detected language
res.detected_language # "en-US"

# check if the result is incomplete
res.result_is_incomplete # False

# see the suggestions / corrections suggested by the GrammarBot API
# returns a list of GrammarBotMatch objects describing each replacement
res.matches # [GrammarBotMatch(offset=2, length=4, rule={'CANT'}, category={'TYPOS'}), GrammarBotMatch(offset=26, length=5, rule={'CONFUSION_RULE'}, category={'TYPOS'})]


# Inspecting the GrammarBotMatch object
# =====================================

match0 = match[0] # GrammarBotMatch(offset=2, length=4, rule={'CANT'}, category={'TYPOS'})


# get replacement information
match0.replacement_offset # 2
match0.replacement_length # 4

# get suggested replacements
match0.replacements # ["can't", 'cannot']

# get list of possible correct sentences after applying the replacements
match0.corrections # ["I can't remember how to go their", 'I cannot remember how to go their']

# get the rules, type and category information of the match
match0.rule # 'CANT'
match0.category # 'TYPOS'
match0.type # 'Other'


In [None]:
for text in list_of_reports[:100]:

    res = client.check(text)
    matches = res.matches
    for match in matches:
        print('\n')
        print(match.replacements)
        print(match.corrections)
        print(match.rule)
        print(match.category)
        print(match.type)
    
    

In [None]:
text = "I can't remeber how to go their"

In [None]:
res = client.check(text)
res

In [None]:
res.detected_language

In [None]:
res.result_is_incomplete

In [None]:
# import the client library
from grammarbot import GrammarBotClient

# Creating the client
# ===================
client = GrammarBotClient()

# or, signup for an API Key to get higher usage limits here: https://www.grammarbot.io/
# client = GrammarBotClient(api_key='6ac8fe69aamshcc30411e7824b49p12a1cajsnc2267278951c') # GrammarBotClient(api_key=my_api_key_here)

# you can even set the base URI to a different server
# client = GrammarBotClient(base_uri='http://backup.grammarbot.io:80')

# Analyzing the text
# ==================

# There is only one method to perform the analysis, viz. GrammarBotClient.check
# method.

text = 'I cant remember how to go their'

# check the text, returns GrammarBotApiResponse object
res = client.check(text) # GrammarBotApiResponse(matches=[GrammarBotMatch(offset=2, length=4, rule={'CANT'}, category={'TYPOS'}), GrammarBotMatch(offset=26, length=5, rule={'CONFUSION_RULE'}, category={'TYPOS'})])

# Inspecting the GrammarBotApiResponse object
# ===========================================

# check detected language
res.detected_language # "en-US"

# check if the result is incomplete
res.result_is_incomplete # False

# see the suggestions / corrections suggested by the GrammarBot API
# returns a list of GrammarBotMatch objects describing each replacement
res.matches # [GrammarBotMatch(offset=2, length=4, rule={'CANT'}, category={'TYPOS'}), GrammarBotMatch(offset=26, length=5, rule={'CONFUSION_RULE'}, category={'TYPOS'})]


# Inspecting the GrammarBotMatch object
# =====================================

match0 = res.matches[0] # GrammarBotMatch(offset=2, length=4, rule={'CANT'}, category={'TYPOS'})


# get replacement information
match0.replacement_offset # 2
match0.replacement_length # 4

# get suggested replacements
match0.replacements # ["can't", 'cannot']

# get list of possible correct sentences after applying the replacements
match0.corrections # ["I can't remember how to go their", 'I cannot remember how to go their']


# get the rules, type and category information of the match
match0.rule # 'CANT'
match0.category # 'TYPOS'
match0.type # 'Other'

# getting a friendly message regarding the replacement suggestion
match0.message # 'Did you mean "can\'t" or "cannot"?'

# Getting even more information
# =============================

# if the information provided by the class properties is not enough, you can
# always access the complete original JSON response from GrammarBotApiResponse
# object
res.raw_json

In [None]:
res.result_is_incomplete

## Spacy

In [None]:
import spacy

nlp = spacy.load('en')
grammar = Grammar(nlp)
nlp.add_pipe(grammar)
doc = nlp('I can haz cheeseburger.')
doc._.has_grammar_error  # True

## Language Check

In [None]:
import language_check
tool = language_check.LanguageTool('en-US')

In [None]:
for text in list_of_reports:
    print('\n')
    # text = list_of_reports[9]
    matches = tool.check(text)
    len(matches)

    print(text)
    for i in range(len(matches)):
        print(matches[i].fromy, matches[i].fromx)
        print(matches[i].ruleId, matches[i].replacements)

In [None]:
language_check.correct(text, matches)

# Ginger it
This will probably help

In [22]:
from gingerit.gingerit import GingerIt

In [23]:
from gingerit.gingerit import GingerIt

text = 'The smelt of fliwers bring back memories.'

parser = GingerIt()
parser.parse(text)

{'text': 'The smelt of fliwers bring back memories.',
 'result': 'The smell of flowers brings back memories.',
 'corrections': [{'start': 21,
   'text': 'bring',
   'correct': 'brings',
   'definition': None},
  {'start': 13,
   'text': 'fliwers',
   'correct': 'flowers',
   'definition': 'a plant cultivated for its blooms or blossoms'},
  {'start': 4, 'text': 'smelt', 'correct': 'smell', 'definition': None}]}

In [26]:
parser = GingerIt()

for text in list_of_reports:
    print('\n')
    this_parser = parser.parse(text)
    print(this_parser)

In [25]:
this_parser = parser.parse()

IndexError: list index out of range

In [None]:
parser.parse().get('result')

In [None]:
text=list_of_reports[0]
text='There is evidence booked (any crime) of the crime.'
text

In [None]:
this_parser = parser.parse(text)
print(this_parser)

In [None]:
text_file = open("test.txt", "w")
n = text_file.write(list_of_reports[2])
text_file.close()

# NLTK

In [None]:
import nltk
from nltk.stem.wordnet import WordNetLemmatizer

In [None]:
text = "And now for something completely different".split()

In [None]:
words = ['gave','went','going','dating']
for word in words:
    print(word+"-->"+WordNetLemmatizer().lemmatize(word,'v'))   

In [None]:
for word in words:
    

In [None]:
nltk.pos_tag(text)

## Stanford Parser

In [None]:
!pip install stanfordnlp

In [None]:
pip install stanfordnlp -U

In [None]:
import stanfordnlp