In [1]:
import os
import pandas as pd

In [2]:
def read_files(directory_path, desired_police_stations):
    """
    Read in all CSV files in a directory and concatenate them into a single dataframe.
    
    Args:
        directory_path (str): Path to the directory containing the CSV files.
        desired_police_stations (list): List of desired police station names.
        
    Returns:
        pd.DataFrame: Combined dataframe containing data from all CSV files.
    """
    # Clean desired police station names
    desired_police_stations = [station.lower().replace('-', '') for station in desired_police_stations]
    
    # Get a list of all CSV files in the directory
    file_names = [f for f in os.listdir(directory_path) if f.endswith('.csv')]
    
    # Initialize an empty list to store dataframes
    dfs = []
    # Read each CSV file and append the dataframes to the list
    for file_name in file_names:
        # Extract the police station name from the file name
        police_station = ''.join(file_name.lower().split('-'))[6:-17]
        if police_station in desired_police_stations:
            print(f'Reading file: {file_name}')
            file_path = os.path.join(directory_path, file_name)
            df = pd.read_csv(file_path)
            dfs.append(df)
    
    # Concatenate all dataframes into a single dataframe
    combined_df = pd.concat(dfs, ignore_index=True)
    
    return combined_df


# Set the directory path
directory_path = 'Data/stop_search'

# Define the desired police stations
police_stations = ['metropolitan']

# Read the files and get the combined dataframe
combined_data = read_files(directory_path, police_stations)

Reading file: 2015-04-metropolitan-stop-and-search.csv
Reading file: 2015-05-metropolitan-stop-and-search.csv
Reading file: 2015-06-metropolitan-stop-and-search.csv
Reading file: 2015-07-metropolitan-stop-and-search.csv
Reading file: 2015-08-metropolitan-stop-and-search.csv
Reading file: 2015-09-metropolitan-stop-and-search.csv
Reading file: 2015-10-metropolitan-stop-and-search.csv
Reading file: 2015-11-metropolitan-stop-and-search.csv
Reading file: 2015-12-metropolitan-stop-and-search.csv
Reading file: 2016-01-metropolitan-stop-and-search.csv
Reading file: 2016-02-metropolitan-stop-and-search.csv
Reading file: 2016-03-metropolitan-stop-and-search.csv
Reading file: 2016-04-metropolitan-stop-and-search.csv
Reading file: 2016-05-metropolitan-stop-and-search.csv
Reading file: 2016-06-metropolitan-stop-and-search.csv
Reading file: 2016-07-metropolitan-stop-and-search.csv
Reading file: 2016-08-metropolitan-stop-and-search.csv
Reading file: 2016-09-metropolitan-stop-and-search.csv
Reading fi

In [3]:
combined_data.head(5)

Unnamed: 0,Type,Date,Part of a policing operation,Policing operation,Latitude,Longitude,Gender,Age range,Self-defined ethnicity,Officer-defined ethnicity,Legislation,Object of search,Outcome,Outcome linked to object of search,Removal of more than just outer clothing
0,Person and Vehicle search,2015-03-31T23:00:00+00:00,,,,,Male,18-24,White - White British (W1),White,Police and Criminal Evidence Act 1984 (section 1),,Nothing found - no further action,,
1,Person and Vehicle search,2015-03-31T23:00:00+00:00,,,,,Male,18-24,White - White British (W1),White,Police and Criminal Evidence Act 1984 (section 1),,Suspect arrested,,
2,Person search,2015-03-31T23:05:00+00:00,,,,,Male,18-24,Black or Black British - Any other Black ethni...,Black,Misuse of Drugs Act 1971 (section 23),,Offender given drugs possession warning,,
3,Person search,2015-03-31T23:09:00+00:00,,,,,Male,,Not Stated (NS),Black,Misuse of Drugs Act 1971 (section 23),,Nothing found - no further action,,
4,Person search,2015-03-31T23:15:00+00:00,,,,,Male,18-24,Asian or Asian British - Any other Asian ethni...,Asian,Police and Criminal Evidence Act 1984 (section 1),,Nothing found - no further action,,


In [15]:
len(combined_data)

1533196

In [4]:
combined_data.isna().sum()

Type                                              0
Date                                              0
Part of a policing operation                 178930
Policing operation                          1533196
Latitude                                     332993
Longitude                                    332993
Gender                                        18962
Age range                                    174208
Self-defined ethnicity                        18317
Officer-defined ethnicity                     29193
Legislation                                       0
Object of search                              89451
Outcome                                           0
Outcome linked to object of search          1533196
Removal of more than just outer clothing    1533196
dtype: int64

In [5]:
combined_data = combined_data.dropna(subset=['Longitude'])

In [6]:
dropping_columns = ['Legislation']

In [7]:
combined_data.drop(columns=dropping_columns, inplace=True)

In [8]:
combined_data.to_parquet('stopandsearch.paraquet')