##### This code allows you to extract data from CSV files and store it in a Parquet file format, which facilitates easier loading and retrieval of the data in the future.

In [2]:
import os
import pandas as pd

In [3]:
def read_files(directory_path, desired_police_stations):
    """
    Read in all CSV files in a directory and concatenate them into a single dataframe.
    
    Args:
        directory_path (str): Path to the directory containing the CSV files.
        desired_police_stations (list): List of desired police station names.
        
    Returns:
        pd.DataFrame: Combined dataframe containing data from all CSV files.
    """
    # Clean desired police station names
    desired_police_stations = [station.lower().replace('-', '') for station in desired_police_stations]
    
    # Get a list of all CSV files in the directory
    file_names = [f for f in os.listdir(directory_path) if f.endswith('.csv')]
    
    # Initialize an empty list to store dataframes
    dfs = []
    
    # Read each CSV file and append the dataframes to the list
    for file_name in file_names:
        # Extract the police station name from the file name
        police_station = ''.join(file_name.lower().split('-'))[6:-10]
        
        if police_station in desired_police_stations:
            print(f'Reading file: {file_name}')
            file_path = os.path.join(directory_path, file_name)
            df = pd.read_csv(file_path)
            dfs.append(df)
    
    # Concatenate all dataframes into a single dataframe
    combined_df = pd.concat(dfs, ignore_index=True)
    
    return combined_df


# Set the directory path
directory_path = 'Data/street'

# Define the desired police stations
police_stations = ['metropolitan']

# Read the files and get the combined dataframe
combined_data = read_files(directory_path, police_stations)

Reading file: 2010-12-metropolitan-street.csv
Reading file: 2011-01-metropolitan-street.csv
Reading file: 2011-02-metropolitan-street.csv
Reading file: 2011-03-metropolitan-street.csv
Reading file: 2011-04-metropolitan-street.csv
Reading file: 2011-05-metropolitan-street.csv
Reading file: 2011-06-metropolitan-street.csv
Reading file: 2011-07-metropolitan-street.csv
Reading file: 2011-08-metropolitan-street.csv
Reading file: 2011-09-metropolitan-street.csv
Reading file: 2011-10-metropolitan-street.csv
Reading file: 2011-11-metropolitan-street.csv
Reading file: 2011-12-metropolitan-street.csv
Reading file: 2012-01-metropolitan-street.csv
Reading file: 2012-02-metropolitan-street.csv
Reading file: 2012-03-metropolitan-street.csv
Reading file: 2012-04-metropolitan-street.csv
Reading file: 2012-05-metropolitan-street.csv
Reading file: 2012-06-metropolitan-street.csv
Reading file: 2012-07-metropolitan-street.csv
Reading file: 2012-08-metropolitan-street.csv
Reading file: 2012-09-metropolitan

In [4]:
combined_data.head(5)

Unnamed: 0,Crime ID,Month,Reported by,Falls within,Longitude,Latitude,Location,LSOA code,LSOA name,Crime type,Last outcome category,Context
0,,2010-12,Metropolitan Police Service,Metropolitan Police Service,0.145949,51.593914,On or near Providence Place,E01000027,Barking and Dagenham 001A,Anti-social behaviour,,
1,,2010-12,Metropolitan Police Service,Metropolitan Police Service,0.140192,51.582311,On or near Hatch Grove,E01000027,Barking and Dagenham 001A,Anti-social behaviour,,
2,,2010-12,Metropolitan Police Service,Metropolitan Police Service,0.134947,51.588063,On or near Mead Grove,E01000027,Barking and Dagenham 001A,Anti-social behaviour,,
3,,2010-12,Metropolitan Police Service,Metropolitan Police Service,0.134947,51.588063,On or near Mead Grove,E01000027,Barking and Dagenham 001A,Anti-social behaviour,,
4,,2010-12,Metropolitan Police Service,Metropolitan Police Service,0.134947,51.588063,On or near Mead Grove,E01000027,Barking and Dagenham 001A,Anti-social behaviour,,


In [5]:
# Filter the combined dataframe for 'Burglary' crime type
burglary_df = combined_data[combined_data['Crime type'] == 'Burglary'].copy()

In [6]:
dropping_columns = ['Contex', 'Crime ID', 'Reported by', 'Last outcome category']
burglary_df

Unnamed: 0,Crime ID,Month,Reported by,Falls within,Longitude,Latitude,Location,LSOA code,LSOA name,Crime type,Last outcome category,Context
29,,2010-12,Metropolitan Police Service,Metropolitan Police Service,0.140619,51.583427,On or near Rams Grove,E01000027,Barking and Dagenham 001A,Burglary,,
47,,2010-12,Metropolitan Police Service,Metropolitan Police Service,0.128479,51.583414,On or near Cavalier Close,E01000028,Barking and Dagenham 001B,Burglary,,
60,,2010-12,Metropolitan Police Service,Metropolitan Police Service,0.134801,51.578470,On or near Portland Close,E01000029,Barking and Dagenham 001C,Burglary,,
61,,2010-12,Metropolitan Police Service,Metropolitan Police Service,0.131156,51.582323,On or near Lake Road,E01000029,Barking and Dagenham 001C,Burglary,,
84,,2010-12,Metropolitan Police Service,Metropolitan Police Service,0.140466,51.584159,On or near Bagleys Spring,E01000030,Barking and Dagenham 001D,Burglary,,
...,...,...,...,...,...,...,...,...,...,...,...,...
12992652,400f7419cc445a8de4b2140c3153d63d99358e69aa77a9...,2023-03,Metropolitan Police Service,Metropolitan Police Service,,,No Location,,,Burglary,Under investigation,
12992653,dd317e1582794d8f24562a1bde1024129a7466cb2f3993...,2023-03,Metropolitan Police Service,Metropolitan Police Service,,,No Location,,,Burglary,Under investigation,
12992654,9974a1809c3da57ba717f92911e62c86b590c92d0469c2...,2023-03,Metropolitan Police Service,Metropolitan Police Service,,,No Location,,,Burglary,Under investigation,
12992655,e2483074dc8848d0823d61711e9f18d8cb33f05ca4b1d9...,2023-03,Metropolitan Police Service,Metropolitan Police Service,,,No Location,,,Burglary,Under investigation,


In [None]:
# Save the burglary dataframe to a Parquet file
burglary_df.to_parquet('burglary.parquet') 