# Los Angeles - Crime Data Analysis (2020 - Present)

### Imports

In [1]:
import os
import requests
from tqdm import tqdm
import pandas as pd

### Data Download and Loading

In [2]:
DATASET_URL = "https://data.lacity.org/api/views/2nrs-mtv8/rows.csv?accessType=DOWNLOAD"
DATASET_FILENAME = "Crime_Data_from_2020_to_Present.csv"

In [3]:
if DATASET_FILENAME not in os.listdir("./"):
    response = requests.get(DATASET_URL)
    if response.status_code == 200:
        total_size = int(response.headers.get('content-length', 0))
        with tqdm(total=total_size, unit='B', unit_scale=True, desc="Downloading") as pbar:
            with open(DATASET_FILENAME, "wb") as f:
                for chunk in response.iter_content(chunk_size=1024):
                    if chunk:
                        f.write(chunk)
                        pbar.update(len(chunk))
    else:
        print("Failed to download the file.")

### Preliminary Inspection

In [4]:
df = pd.read_csv(DATASET_FILENAME)
df.head()

Unnamed: 0,DR_NO,Date Rptd,DATE OCC,TIME OCC,AREA,AREA NAME,Rpt Dist No,Part 1-2,Crm Cd,Crm Cd Desc,...,Status,Status Desc,Crm Cd 1,Crm Cd 2,Crm Cd 3,Crm Cd 4,LOCATION,Cross Street,LAT,LON
0,10304468,01/08/2020 12:00:00 AM,01/08/2020 12:00:00 AM,2230,3,Southwest,377,2,624,BATTERY - SIMPLE ASSAULT,...,AO,Adult Other,624.0,,,,1100 W 39TH PL,,34.0141,-118.2978
1,190101086,01/02/2020 12:00:00 AM,01/01/2020 12:00:00 AM,330,1,Central,163,2,624,BATTERY - SIMPLE ASSAULT,...,IC,Invest Cont,624.0,,,,700 S HILL ST,,34.0459,-118.2545
2,200110444,04/14/2020 12:00:00 AM,02/13/2020 12:00:00 AM,1200,1,Central,155,2,845,SEX OFFENDER REGISTRANT OUT OF COMPLIANCE,...,AA,Adult Arrest,845.0,,,,200 E 6TH ST,,34.0448,-118.2474
3,191501505,01/01/2020 12:00:00 AM,01/01/2020 12:00:00 AM,1730,15,N Hollywood,1543,2,745,VANDALISM - MISDEAMEANOR ($399 OR UNDER),...,IC,Invest Cont,745.0,998.0,,,5400 CORTEEN PL,,34.1685,-118.4019
4,191921269,01/01/2020 12:00:00 AM,01/01/2020 12:00:00 AM,415,19,Mission,1998,2,740,"VANDALISM - FELONY ($400 & OVER, ALL CHURCH VA...",...,IC,Invest Cont,740.0,,,,14400 TITUS ST,,34.2198,-118.4468


In [5]:
n_samples, n_features = df.shape
print(f"Dataset has {n_samples} samples and {n_features} features.")

df.info()

Dataset has 788767 samples and 28 features.
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 788767 entries, 0 to 788766
Data columns (total 28 columns):
 #   Column          Non-Null Count   Dtype  
---  ------          --------------   -----  
 0   DR_NO           788767 non-null  int64  
 1   Date Rptd       788767 non-null  object 
 2   DATE OCC        788767 non-null  object 
 3   TIME OCC        788767 non-null  int64  
 4   AREA            788767 non-null  int64  
 5   AREA NAME       788767 non-null  object 
 6   Rpt Dist No     788767 non-null  int64  
 7   Part 1-2        788767 non-null  int64  
 8   Crm Cd          788767 non-null  int64  
 9   Crm Cd Desc     788767 non-null  object 
 10  Mocodes         680162 non-null  object 
 11  Vict Age        788767 non-null  int64  
 12  Vict Sex        685415 non-null  object 
 13  Vict Descent    685407 non-null  object 
 14  Premis Cd       788758 non-null  float64
 15  Premis Desc     788300 non-null  object 
 16  Weapon Used 

##### Dataset Description

| #  | Column         | Description |
|----|----------------|----------------|
| 0  | DR_NO          | Report Number |
| 1  | Date Rptd      | Date when the crime was reported |
| 2  | DATE OCC       | Date when the crime occurred |
| 3  | TIME OCC       | Time when the crime occurred |
| 4  | AREA           | Area code of the crime location |
| 5  | AREA NAME      | Name of the are where the crime occurred |
| 6  | Rpt Dist No    | Crime Reporting District Number |
| 7  | Part 1-2       | (Unknown: No information in METADATA) |
| 8  | Crm Cd         | Crime Code |
| 9  | Crm Cd Desc    | Crime Description |
| 10 | Mocodes        | (Unknown: No information in METADATA) |
| 11 | Vict Age       | Victim Age |
| 12 | Vict Sex       | Victim Sex |
| 13 | Vict Descent   | Victim Race (Possibly irrelevant information since it's coded and there's no information on coding format ) |
| 14 | Premis Cd      | Code for premises where the crime occurred |
| 15 | Premis Desc    | Description of the premises where the crime occurred |
| 16 | Weapon Used Cd | Weapon Used in the crime |
| 17 | Weapon Desc    | Weapon Description |
| 18 | Status         | Case Status |
| 19 | Status Desc    | Case Status Description |
| 20 | Crm Cd 1       | Additional Crime Codes |
| 21 | Crm Cd 2       | Additional Crime Codes |
| 22 | Crm Cd 3       | Additional Crime Codes |
| 23 | Crm Cd 4       | Additional Crime Codes |
| 24 | LOCATION       | Exact address of crime location |
| 25 | Cross Street   | Name of the cross street |
| 26 | LAT            | Latitiude |
| 27 | LON            | Longitude |