Importing packages

In [2]:
import pandas as pd

Reading in the CSVs

In [3]:
full_moon = pd.read_csv("full_moon.csv")
holidays = pd.read_csv("holidays.csv")
weather = pd.read_csv("weather.csv")
crimes = pd.read_csv("Crimes.csv")

Recoding all the date columns to same format

In [3]:
## First, full_moon.csv

### Convert FullMoonDates column to datetime objects
full_moon['FullMoonDates'] = pd.to_datetime(full_moon['FullMoonDates'], format='%d %B %Y')

### Format the dates as 'YYYY-MM-DD' and assign them to a new column
full_moon['use_date'] = full_moon['FullMoonDates'].dt.strftime('%Y-%m-%d')

# Save the updated DataFrame back to the same CSV file, overwriting the original file
full_moon.to_csv("full_moon.csv", index=False)


## Next, holidays.csv

### Date column already in correct format, just want to change the name to use_date for consistency.
holidays.rename(columns={'Date': 'use_date'}, inplace=True)

# Save the updated DataFrame back to the same CSV file, overwriting the original file
holidays.to_csv("holidays.csv", index=False)


## Next, weather.csv

### Same situation as holidays.csv
weather.rename(columns={'datetime': 'use_date'}, inplace=True)

### Save the updated DataFrame back to the same CSV file, overwriting the original file
weather.to_csv("weather.csv", index=False)

In [4]:
## Now for Crimes.csv

### Convert the 'Date' column to datetime objects
crimes['Date'] = pd.to_datetime(crimes['Date'])

### Create new columns for 'use_date' and 'use_time'
crimes['use_date'] = crimes['Date'].dt.strftime('%Y-%m-%d')
crimes['use_time'] = crimes['Date'].dt.strftime('%H:%M:%S')

### Save the updated DataFrame back to the same CSV file, overwriting the original file
crimes.to_csv("Crimes.csv", index=False)

Inidicating if a day was a full moon or holiday

In [11]:
## Adding a full_moon binary value to weather.csv (since there's a row for each day in that csv)

# Convert 'use_date' column to datetime objects
weather['use_date'] = pd.to_datetime(weather['use_date'])
full_moon['use_date'] = pd.to_datetime(full_moon['use_date'])

# Create a new column 'full_moon' initialized with zeros
weather['full_moon'] = 0

# Set 'full_moon' column to 1 for dates in 'full_moon.csv'
weather.loc[weather['use_date'].isin(full_moon['use_date']), 'full_moon'] = 1

# Save the updated DataFrame back to the same CSV file, overwriting the original file
weather.to_csv("full1.csv", index=False)

In [12]:
## Adding a holiday binary value to weather.csv

# Convert 'use_date' column to datetime objects
weather['use_date'] = pd.to_datetime(weather['use_date'])
holidays['use_date'] = pd.to_datetime(holidays['use_date'])

# Create a new column 'holiday' initialized with zeros
weather['holiday'] = 0

# Set 'holiday' column to 1 for dates in 'holidays.csv'
weather.loc[weather['use_date'].isin(holidays['use_date']), 'holiday'] = 1

# Save the updated DataFrame back to the same CSV file, overwriting the original file
weather.to_csv("full1.csv", index=False)

Indicating if a crime happened, how many crimes, what types, etc.

In [None]:
# Convert 'use_date' column to datetime objects
full1 = pd.read_csv("full1.csv")
full1['use_date'] = pd.to_datetime(full1['use_date'])
crimes['use_date'] = pd.to_datetime(crimes['use_date'])

In [None]:
# Add binary column 'criminal_activity' to weather_df
full1['criminal_activity'] = 0
full1.loc[full1['use_date'].isin(crimes['use_date']), 'criminal_activity'] = 1

In [None]:
# Add 'crime_count' column to weather_df
crime_count_series = crimes.groupby('use_date').size()
weather_df = pd.merge(full1, crime_count_series, left_on='use_date', right_index=True, how='left')
weather_df.rename(columns={0: 'crime_count'}, inplace=True)
weather_df['crime_count'].fillna(0, inplace=True)

In [None]:
# Add binary columns for each 'Primary Type' with count of occurrences on each day
crime_type_counts = crimes.groupby(['use_date', 'Primary Type']).size().unstack(fill_value=0)
crime_type_counts.reset_index(inplace=True)
weather_df = pd.merge(weather_df, crime_type_counts, on='use_date', how='left')

In [None]:
# Fill NaN values in crime type columns with 0
crime_type_columns = crime_type_counts.columns[1:]  # Exclude 'use_date' column
weather_df[crime_type_columns] = weather_df[crime_type_columns].fillna(0)

In [None]:
# Save the updated DataFrame back to the same CSV file, overwriting the original file
weather_df.to_csv("full1.csv", index=False)