### WEB CRUSADERS


In [2]:
import dask.dataframe as dd  # Importing Dask DataFrame for handling large datasets
import matplotlib.pyplot as plt  # Importing matplotlib for plotting
import seaborn as sns  # Importing seaborn for enhanced visualization
import os  # Importing os module for file system operations
from matplotlib.backends.backend_pdf import PdfPages  # Importing PdfPages for saving plots to a PDF file

# Read the CSV file into a Dask DataFrame
ddf = dd.read_csv('Tool3.csv')

# Manually convert the 'date_time' column to datetime format
ddf['date_time'] = dd.to_datetime(ddf['date_time'])

# Extract hour, day of week, month, and season from 'date_time'
ddf['hour'] = ddf['date_time'].dt.hour
ddf['day_of_week'] = ddf['date_time'].dt.dayofweek
ddf['month'] = ddf['date_time'].dt.month
ddf['season'] = ddf['date_time'].dt.month % 12 // 3 + 1

# Create a 'plots' directory if it doesn't exist
if not os.path.exists('plots'):
    os.makedirs('plots')

def plot_crime_occurrence(district_name):
    """
    Function to plot crime occurrence based on different time periods for a specified district.
    
    Parameters:
        district_name (str): The name of the district for which crime occurrence is to be plotted.
    """
    # Filter the Dask DataFrame based on the specified district name and compute it into a Pandas DataFrame
    filtered_ddf = ddf[ddf['District_Name'].str.lower() == district_name.lower()]
    filtered_df = filtered_ddf.compute()

    # Check if the filtered DataFrame is empty
    if filtered_df.empty:
        print("No data found for the selected district.")
        return

    # Get unique units (e.g., police units) within the filtered DataFrame
    units = filtered_df['UnitName'].unique()
    
    # Define time periods (e.g., hour, day of week, month, season) for plotting
    time_periods = ['hour', 'day_of_week', 'month', 'season']
    
    # Define the filename for the output PDF that will contain the plots
    pdf_filename = f"plots/{district_name}_crime_occurrence.pdf"

    # Create a PDF file to save multiple plots
    with PdfPages(pdf_filename) as pdf:
        for unit in units:
            unit_data = filtered_df[filtered_df['UnitName'] == unit]
            for time_period in time_periods:
                # Create a new figure for each time period
                plt.figure(figsize=(10, 6))
                
                # Plot crime occurrence count based on the selected time period using seaborn
                sns.countplot(x=time_period, data=unit_data, palette='viridis')
                
                # Set plot title and labels
                plt.title(f'Crime Occurrence by {time_period.capitalize()} in {district_name.title()}, Unit: {unit}')
                plt.xlabel(time_period.capitalize())
                plt.ylabel('Number of Crimes')
                
                # Save the current plot to the PDF file
                pdf.savefig()
                
                # Close the plot to release memory
                plt.close()

    # Print a message indicating that the PDF file has been generated
    print(f"PDF generated: {pdf_filename}")

if __name__ == '__main__':
    # Prompt the user to enter the district name for which crime occurrence plots will be generated
    district_name = input("Enter the district name: ")
    
    # Call the plot_crime_occurrence function with the specified district name
    plot_crime_occurrence(district_name)


Enter the district name:  Bidar



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.countplot(x=time_period, data=unit_data, palette='viridis')

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.countplot(x=time_period, data=unit_data, palette='viridis')

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.countplot(x=time_period, data=unit_data, palette='viridis')

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.countplot(x=time_period, data=unit_data, palette='viridis')

Passing `palette` without assigning `hue` is deprecated and will be

PDF generated: plots/Bidar_crime_occurrence.pdf



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.countplot(x=time_period, data=unit_data, palette='viridis')

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.countplot(x=time_period, data=unit_data, palette='viridis')
