In [3]:
import dask.dataframe as dd

# Load the dataset with Dask
df = dd.read_csv('Tool6.csv')

# Group the data by 'Year' and 'Beat_Name' to get the count of crimes
df_analyse = df.groupby(['Year', 'District_Name', 'Beat_Name']).size().reset_index()
df_analyse.columns = ['Year', 'District_Name', 'Beat_Name', 'Number of crimes']

# Compute the Dask dataframe to get the actual result
df_analyse = df_analyse.compute()

# Get unique years and districts for filtering
unique_years = df_analyse['Year'].unique()
unique_districts = df_analyse['District_Name'].unique()

# Get user input for preferred year and district
preferred_year = int(input("Enter the preferred year: "))
preferred_district = input("Enter the preferred district: ")

# Check if the preferred year and district exist in the dataset
if (preferred_year not in unique_years) or (preferred_district not in unique_districts):
    print("Data for the preferred year or district not found.")
else:
    # Filter the data based on user preferences
    filtered_data = df_analyse[(df_analyse['Year'] == preferred_year) & (df_analyse['District_Name'] == preferred_district)]

    if filtered_data.empty:
        print("No data found for the selected year and district.")
    else:
        # Rank the beats based on the number of crimes reported
        ranked_beats = filtered_data.sort_values(by='Number of crimes', ascending=False)

        # Print the ranking list
        print(f"Ranking of beats with the highest number of crimes for Year {preferred_year} and District {preferred_district}:")
        for rank, (beat, num_crimes) in enumerate(zip(ranked_beats['Beat_Name'], ranked_beats['Number of crimes']), start=1):
            print(f"Rank {rank}: {beat} - Number of Crimes: {num_crimes}")

        # Save the ranking list to a text file
        with open(f'crime_ranking_{preferred_year}_{preferred_district}.txt', 'w') as f:
            f.write(f"Ranking of beats with the highest number of crimes for Year {preferred_year} and District {preferred_district}:\n")
            for rank, (beat, num_crimes) in enumerate(zip(ranked_beats['Beat_Name'], ranked_beats['Number of crimes']), start=1):
                f.write(f"Rank {rank}: {beat} - Number of Crimes: {num_crimes}\n")

        print(f"Ranking for Year {preferred_year} and District {preferred_district} has been saved to 'crime_ranking_{preferred_year}_{preferred_district}.txt'.")


Enter the preferred year:  2019
Enter the preferred district:  Bagalkot


Ranking of beats with the highest number of crimes for Year 2019 and District Bagalkot:
Rank 1: RURAL BEAT 4 - Number of Crimes: 108
Rank 2: TOWN BEAT 1 - Number of Crimes: 101
Rank 3: RURAL BEAT 3 - Number of Crimes: 70
Rank 4: Rural Beat No 1 - Number of Crimes: 57
Rank 5: BEAT NO 2 - Number of Crimes: 57
Rank 6: RURAL BEAT 2 - Number of Crimes: 53
Rank 7: Rural Beat No 2 - Number of Crimes: 52
Rank 8: TOWN BEAT 4 - Number of Crimes: 45
Rank 9: TOWN BEAT 2 - Number of Crimes: 42
Rank 10: BEAT NO 6 - Number of Crimes: 42
Rank 11: RURAL BEAT 1 - Number of Crimes: 41
Rank 12: BEAT 5 - Number of Crimes: 38
Rank 13: RURAL BEAT I - Number of Crimes: 38
Rank 14: TOWN BEAT 3 - Number of Crimes: 37
Rank 15: Mudhol Town Beat 7 - Number of Crimes: 35
Rank 16: BEAT 1 - Number of Crimes: 33
Rank 17: BEAT NO 4 - Number of Crimes: 33
Rank 18: RAMPUR OP BEAT NO 2 - Number of Crimes: 32
Rank 19: 05 - Number of Crimes: 31
Rank 20: RURALBEAT1 - Number of Crimes: 31
Rank 21: Rural Beat No 4 - Number of 