## Installers

In [2]:
!pip install pandas==1.5.3 --quiet
!pip install sodapy --quiet

## Get Current Chicago Crime Data from Socrata Data Services
#### Free for limited gets or put a uid/pw/appkey into your .env file for larger downloads

In [2]:
# IMPORTS
import os
import pandas as pd
from sodapy import Socrata
from dotenv import load_dotenv

# Load env vars
load_dotenv()
socrata_app_token = os.getenv('SOCRATE_APP_TOKEN')
socrata_user_id = os.getenv('SOCRATA_MYLOGIN')
socrata_password = os.getenv('SOCRATA_MYPW')

def extract_data(token, user_id, password, dataset_id="ijzp-q8t2", rows_to_download=50):
    """Extract data from the Socrata API."""
    client = Socrata("data.cityofchicago.org", token, username=user_id, password=password)
    client.timeout = 30  # Set request timeout
    return pd.DataFrame.from_records(client.get(dataset_id, limit=rows_to_download))

def transform_data(df):
    """Simplify the DataFrame by creating and dropping columns."""
    # Create 'crime' column by concatenating 'primary_type' and 'description'
    df['crime'] = df['primary_type'] + ' - ' + df['description']

    # If you want to set the time to midnight (00:00:00)
    df['date'] = pd.to_datetime(df['date']).dt.normalize()

    # To set the time to noon (12:00:00), use:
    # df['date'] = pd.to_datetime(df['date']).dt.normalize() + pd.Timedelta(hours=12)

    # Identify columns to drop (computed regions, coordinates, and 'updated_on')
    unwanted_cols = [col for col in df.columns if col.startswith(':@computed_region_')] + [
        'id', 'iucr', 'year', 'x_coordinate', 'y_coordinate', 'case_number', 'updated_on', 'primary_type', 'description'
    ]
    # Drop unwanted columns
    df.drop(columns=unwanted_cols, inplace=True)

    return df


def save_to_csv(df, filename="./data/chicago_iucr.csv"):
    """Save DataFrame to a CSV file."""
    df.to_csv(filename, index=False)

def main():
    # Extract data
    df_raw = extract_data(socrata_app_token, socrata_user_id, socrata_password, rows_to_download=300000)
    # Transform data
    df_transformed = transform_data(df_raw)
    # Save transformed data to CSV
    save_to_csv(df_transformed)

if __name__ == "__main__":
    main()
