In [1]:
# Dependencies
import pandas as pd

In [2]:
# Read in file
file = 'Resources/clean_separated_list_count.csv'
df = pd.read_csv(file, encoding="ISO-8859-1")
df.head()

Unnamed: 0,date,team,player,position,status,crimeraw,desc,outcome,crime1,crime2,crime3,crime_list,crime_count
0,1/17/2020,NE,Joejuan Williams,CB,Arrested,drugs,"Pulled over for speeding in Nashville, accused...",Resolution undetermined.,drugs,,,['drugs'],1
1,1/16/2020,CLE,Odell Beckham,WR,Warrant,battery,Accused of slapping the buttocks of a police o...,Warrant rescinded.,battery,,,['battery'],1
2,1/11/2020,NE,Julian Edelman,WR,Arrested,vandalism,Accused of jumping on the hood of a Mercedes i...,Resolution undetermined.,vandalism,,,['vandalism'],1
3,12/29/2019,MIA,Xavien Howard,CB,Arrested,domestic violence,"Police in Davie, Fla., say he pushed his fianc...",Resolution undetermined.,domestic violence,,,['domestic violence'],1
4,12/20/2019,PIT,Kameron Kelly,S,Arrested,disorderly conduct,Accused of making threats and resisting arrest...,Resolution undetermined. Team released him sam...,disorderly conduct,,,['disorderly conduct'],1


In [3]:
# Splits "crimeraw" category into individual rows based on how many crimes are in list
reshape = \
(df.set_index(df.columns.drop('crimeraw',1).tolist())
   .crimeraw.str.split(',', expand=True)
   .stack()
   .reset_index()
   .rename(columns={0:'crimeraw'})
   .loc[:, df.columns]
)

df = pd.DataFrame(reshape)
df.head(10)

Unnamed: 0,date,team,player,position,status,crimeraw,desc,outcome,crime1,crime2,crime3,crime_list,crime_count
0,1/17/2020,NE,Joejuan Williams,CB,Arrested,drugs,"Pulled over for speeding in Nashville, accused...",Resolution undetermined.,drugs,,,['drugs'],1
1,1/16/2020,CLE,Odell Beckham,WR,Warrant,battery,Accused of slapping the buttocks of a police o...,Warrant rescinded.,battery,,,['battery'],1
2,1/11/2020,NE,Julian Edelman,WR,Arrested,vandalism,Accused of jumping on the hood of a Mercedes i...,Resolution undetermined.,vandalism,,,['vandalism'],1
3,12/29/2019,MIA,Xavien Howard,CB,Arrested,domestic violence,"Police in Davie, Fla., say he pushed his fianc...",Resolution undetermined.,domestic violence,,,['domestic violence'],1
4,12/20/2019,PIT,Kameron Kelly,S,Arrested,disorderly conduct,Accused of making threats and resisting arrest...,Resolution undetermined. Team released him sam...,disorderly conduct,,,['disorderly conduct'],1
5,12/3/2019,DAL,Antwaun Woods,DT,Arrested,drugs,"Pulled over for speeding in Frisco, Texas, and...",Resolution undetermined,drugs,,,['drugs'],1
6,11/19/2019,MIA,Mark Walton,RB,Arrested,domestic violence,Police say he punched his pregnant girlfriend ...,Resolution undetermined. Team released him sam...,domestic violence,,,['domestic violence'],1
7,11/6/2019,DAL,Daniel Ross,DT,Arrested,drugs,Accused of marijuana possession and unlawful c...,Resolution undetermined.,drugs,gun,,"['drugs', ' gun']",2
8,11/6/2019,DAL,Daniel Ross,DT,Arrested,gun,Accused of marijuana possession and unlawful c...,Resolution undetermined.,drugs,gun,,"['drugs', ' gun']",2
9,10/27/2019,MIN,Jayron Kearse,S,Arrested,dui,"Accused of driving drunk in Minneapolis, with ...",Resolution undetermined.,dui,drugs,,"['dui', ' drugs']",2


In [4]:
# Cleaning up crime labels, renamed crimeraw to crime
df = df.rename(columns={"crimeraw":"crime"})
df.crime = df.crime.replace({
 ' assault': 'assault',
 ' battery': 'battery',
 ' child abuse': 'child abuse',
 ' disorderly conduct': 'disorderly conduct',
 ' drugs': 'drugs',
 ' privacy invasion': 'privacy invasion',
 ' rape': 'rape',
 ' reckless driving': 'reckless endangerment',
 ' resisting arrest': 'resisting arrest',   
 ' alcohol': 'alcohol - other',
 ' gun': 'firearms',
 ' guns':'firearms',
 ' license': 'traffic - other',
 ' manslaughter': 'murder',
 ' resisting arrrest': 'resisting arrest',
 ' sex': 'sex - other',
 ' stolen possession': 'theft',
 ' weapon': 'weapons',
 'alcohol': 'alcohol - other',
 'animal cruelty': 'animal abuse',
 'animal neglect': 'animal abuse',
 'disturbing the peace': 'disorderly conduct',
 'dogfighting': 'animal abuse',
 'domestic assault': 'domestic violence',
 'domestic dispute': 'domestic violence',
 'eluding police': 'evading police',
 'evading arrest': 'evading police',
 'false information': 'fraud',
 'false name': 'fraud',
 'gun': 'firearms',
 'guns': 'firearms',
 'handicap parking': 'traffic - other',
 'interfering with police': 'obstruction',
 'license': 'traffic - other',
 'leaving scene.': 'leaving scene',
 'manslaughter': 'murder',
 'police interference': 'obstruction',
 'public intoxication': 'alcohol - other',
 'reckless driving' : 'reckless endangerment',
 'reckless endagerment': 'reckless endangerment',
 'resisting officer': 'resisting arrest',
 'selling alcohol to minor': 'alcohol - other',
 'sex' : 'sex - other',
 'sexual assault': 'sexual abuse',
 'sexual battery': 'sexual abuse',
 'solicitation' : 'sex - other',
 'speeding': 'traffic - other',
 'suspended license': 'traffic - other',
 'traffic warrant': 'traffic - other',
 'traffic warrants': 'traffic - other',
 'weapon': 'weapons'})

In [5]:
df = df[['date', 'team', 'player', 'position', 'crime', 'desc']]
df.head()

Unnamed: 0,date,team,player,position,crime,desc
0,1/17/2020,NE,Joejuan Williams,CB,drugs,"Pulled over for speeding in Nashville, accused..."
1,1/16/2020,CLE,Odell Beckham,WR,battery,Accused of slapping the buttocks of a police o...
2,1/11/2020,NE,Julian Edelman,WR,vandalism,Accused of jumping on the hood of a Mercedes i...
3,12/29/2019,MIA,Xavien Howard,CB,domestic violence,"Police in Davie, Fla., say he pushed his fianc..."
4,12/20/2019,PIT,Kameron Kelly,S,disorderly conduct,Accused of making threats and resisting arrest...


In [6]:
df.to_csv('Resources/crime_split.csv', index=False)