In [1]:
# # Fetch IUCR descriptions
# iucr_url = 'https://data.cityofchicago.org/resource/c7ck-438e.json'
# iucr_response = requests.get(iucr_url)

# if iucr_response.status_code == 200:
#     iucr_data = iucr_response.json()
#     iucr_df = pd.DataFrame(iucr_data)
    
#     # Convert 'iucr' in iucr_df to match the type in results_df if necessary
#     # Assuming 'iucr' in results_df is already string, if not, convert it
#     results_df['iucr'] = results_df['iucr'].astype(str)
#     iucr_df['iucr'] = iucr_df['iucr'].astype(str)
    
#     # Merge iucr_df with results_df on 'iucr' to add descriptions
#     results_df = pd.merge(results_df, iucr_df[['iucr', 'primary_description', 'secondary_description']], on='iucr', how='left')
#     results_df = results_df.rename(columns={
#                                             'primary_description': 'primary_iucr_desc',
#                                             'secondary_description': 'secondary_iucr_desc'})
# else:
#     print(f"Failed to fetch IUCR data. Status code: {iucr_response.status_code}")


In [2]:
!pip install pandas==1.5.3 --quiet
!pip install sodapy --quiet

In [8]:
# IMPORTS
import os
import pandas as pd
from sodapy import Socrata
from dotenv import load_dotenv

# Load environment variables
load_dotenv()
socrata_app_token = os.getenv('SOCRATE_APP_TOKEN')
socrata_user_id = os.getenv('SOCRATA_MYLOGIN')
socrata_password = os.getenv('SOCRATA_MYPW')

def extract_data(token, user_id, password, dataset_id="ijzp-q8t2", rows_to_download=50):
    """Extract data from the Socrata API."""
    client = Socrata("data.cityofchicago.org", token, username=user_id, password=password)
    client.timeout = 30  # Set request timeout
    # Fetch data from dataset
    return pd.DataFrame.from_records(client.get(dataset_id, limit=rows_to_download))

def transform_data(df):
    """Simplify the DataFrame by creating and dropping columns."""
    # Create 'crime' column by concatenating 'primary_type' and 'description'
    df['crime'] = df['primary_type'] + ' - ' + df['description']

    # If you want to set the time to midnight (00:00:00)
    df['date'] = pd.to_datetime(df['date']).dt.normalize()

    # Alternatively, to set the time to noon (12:00:00), use:
    # df['date'] = pd.to_datetime(df['date']).dt.normalize() + pd.Timedelta(hours=12)

    # Identify columns to drop (computed regions, coordinates, and 'updated_on')
    unwanted_cols = [col for col in df.columns if col.startswith(':@computed_region_')] + [
        'id', 'iucr', 'year', 'x_coordinate', 'y_coordinate', 'case_number', 'updated_on', 'primary_type', 'description'
    ]
    # Drop unwanted columns
    df.drop(columns=unwanted_cols, inplace=True)

    return df


def save_to_csv(df, filename="chicago_iucr_2.csv"):
    """Save DataFrame to a CSV file."""
    df.to_csv(filename, index=False)

def main():
    # Extract data
    df_raw = extract_data(socrata_app_token, socrata_user_id, socrata_password, rows_to_download=200000)
    # Transform data
    df_transformed = transform_data(df_raw)
    # Save transformed data to CSV
    save_to_csv(df_transformed)

if __name__ == "__main__":
    main()


In [12]:
dfnew = pd.read_csv('chicago_iucr_2.csv')
dfnew.count()

date                    200000
block                   200000
location_description    199100
arrest                  200000
domestic                200000
beat                    200000
district                200000
ward                    200000
community_area          200000
fbi_code                200000
latitude                199862
longitude               199862
location                199862
crime                   200000
dtype: int64

In [13]:
dfnew.sort_values(by='date', ascending=True)

Unnamed: 0,date,block,location_description,arrest,domestic,beat,district,ward,community_area,fbi_code,latitude,longitude,location,crime
199999,2023-08-23,042XX S MAPLEWOOD AVE,STREET,False,False,921,9,12,58,06,41.816536,-87.688346,"{'latitude': '41.816536269', 'longitude': '-87...",THEFT - $500 AND UNDER
199967,2023-08-23,022XX W DEVON AVE,SIDEWALK,False,False,2413,24,50,2,03,41.997801,-87.685838,"{'latitude': '41.997800745', 'longitude': '-87...",ROBBERY - ARMED - HANDGUN
199966,2023-08-23,017XX N ASHLAND AVE,PARK PROPERTY,False,False,1433,14,32,24,02,41.913233,-87.667765,"{'latitude': '41.913233349', 'longitude': '-87...",CRIMINAL SEXUAL ASSAULT - NON-AGGRAVATED
199965,2023-08-23,050XX S UNION AVE,VEHICLE NON-COMMERCIAL,False,True,935,9,20,61,08B,41.803218,-87.642952,"{'latitude': '41.803217886', 'longitude': '-87...",BATTERY - DOMESTIC BATTERY SIMPLE
199964,2023-08-23,046XX S ASHLAND AVE,STREET,False,False,924,9,15,61,26,41.809995,-87.665009,"{'latitude': '41.809994975', 'longitude': '-87...",OTHER OFFENSE - OTHER VEHICLE OFFENSE
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9,2024-06-06,041XX W CORTLAND ST,SCHOOL - PUBLIC BUILDING,False,False,2534,25,26,20,08B,41.915295,-87.730486,"{'latitude': '41.915294916', 'longitude': '-87...",BATTERY - SIMPLE
11,2024-06-06,060XX S INGLESIDE AVE,STREET,False,False,235,2,5,42,06,41.785467,-87.602798,"{'latitude': '41.785466516', 'longitude': '-87...",THEFT - OVER $500
12,2024-06-06,011XX W BRYN MAWR AVE,CTA STATION,False,False,2023,20,48,77,03,41.983649,-87.658712,"{'latitude': '41.983649153', 'longitude': '-87...",ROBBERY - STRONG ARM - NO WEAPON
6,2024-06-06,001XX N WOOD ST,APARTMENT,False,False,1223,12,27,28,14,41.883811,-87.671812,"{'latitude': '41.883811077', 'longitude': '-87...",CRIMINAL DAMAGE - TO PROPERTY
