In [1]:
import pandas as pd
import json
import os

In [2]:
#Load raw homicide data from local JSON file
with open('../data/homicide_raw.json') as f:
    data = json.load(f)

#Extract relevant fields from each incident
rows = []
for feature in data['features']:
    attr = feature['attributes']
    geom = feature.get('geometry', {})

    #Construct a row dictionary for each incident
    row = {
    'Latitude': geom.get('y'),
    'Longitude': geom.get('x'),
    'Incident_Date': attr.get('DATE_REPORTED'),
    'NPA': attr.get('NPA'),
    'Weapon': attr.get('WEAPON'),
    'Age': attr.get('AGE'),
    'Gender': attr.get('GENDER'),
    'Race': attr.get('RACE_ETHNICITY')
    }
    rows.append(row)
#Convert list of dictionaries to a DataFrame
df = pd.DataFrame(rows)

df.dropna(subset=['Latitude', 'Longitude'], inplace=True)
df['Incident_Date'] = pd.to_datetime(df['Incident_Date'], unit='ms')

df['Year'] = df['Incident_Date'].dt.year
df['Month'] = df['Incident_Date'].dt.month
df['Weekday'] = df['Incident_Date'].dt.dayofweek
df['Hour'] = df['Incident_Date'].dt.hour
#Filters to just Charlotte
df = df[
    (df['Latitude'] >= 34.5) & (df['Latitude'] <= 36) &
    (df['Longitude'] >= -82) & (df['Longitude'] <= -78)
]


os.makedirs('../data', exist_ok=True)
df.to_csv('../data/homicide_cleaned.csv', index=False)

In [3]:
df.head()
df.info()
df[['Weapon', 'Gender', 'Race']].value_counts().head()
print('File saved:', os.path.exists('../Charlotte Homicide Project/homicide_cleaned.csv'))


<class 'pandas.core.frame.DataFrame'>
Index: 923 entries, 0 to 926
Data columns (total 12 columns):
 #   Column         Non-Null Count  Dtype         
---  ------         --------------  -----         
 0   Latitude       923 non-null    float64       
 1   Longitude      923 non-null    float64       
 2   Incident_Date  923 non-null    datetime64[ns]
 3   NPA            923 non-null    int64         
 4   Weapon         912 non-null    object        
 5   Age            894 non-null    object        
 6   Gender         916 non-null    object        
 7   Race           916 non-null    object        
 8   Year           923 non-null    int32         
 9   Month          923 non-null    int32         
 10  Weekday        923 non-null    int32         
 11  Hour           923 non-null    int32         
dtypes: datetime64[ns](1), float64(2), int32(4), int64(1), object(4)
memory usage: 79.3+ KB
File saved: False
