In [1]:
import pandas as pd
import matplotlib.pyplot as plt
# Load the dataset
data = pd.read_csv('districtwise-cyber-crimes-2017-onwards.csv')


In [None]:
# Group by year and sum all crime categories
data['Year'] = pd.to_datetime(data['year'], format='%Y').dt.year  # Ensure Year is in datetime format
crime_trends = data.groupby('Year').sum()
print(crime_trends)


In [None]:
specific_crimes = [
    'tampering_computer_source_documents',
    'ransom_ware',
    'identity_theft',
    'cheating_by_personation_by_using_computer_resource',
    'violation_of_privacy',
    'cyber_terrorism',
    'data_theft',
    'credit_card_debit_card_fraud'
]

# Calculate total for each specific crime
crime_totals = data[specific_crimes].sum().sort_values(ascending=False)

# Identify the top three specific crimes
top_three_crimes = crime_trends[specific_crimes].sum().nlargest(3).index.tolist()
print(top_three_crimes)

top three specific crimes

In [None]:

# Plot the top three specific crimes
crime_trends[top_three_crimes].plot(figsize=(10, 6), marker='o')

plt.title("Top Three Cybercrime Trends Over Years", fontsize=16)
plt.xlabel("Year", fontsize=14)
plt.ylabel("Number of Cases", fontsize=14)
plt.legend(title="Crime Categories")
plt.grid(True)
plt.show()

top 5 districts with the highest and lowest cybercrime rates

In [None]:
# Calculate total cybercrimes for each district
district_crime_totals = data.groupby('district_name')[specific_crimes].sum().sum(axis=1)

# Identify the top 5 districts with the highest cybercrime rates
top_5_districts = district_crime_totals.nlargest(5)
print("Top 5 districts with highest cybercrime rates:")
print(top_5_districts)

# Identify the top 5 districts with the lowest cybercrime rates

bottom_5_districts = district_crime_totals.nsmallest(5)
print("\nTop 5 districts with lowest cybercrime rates:")
print(bottom_5_districts)

In [None]:
top_5_districts.plot(kind='bar', figsize=(10, 6))

plt.title("Top 5 Districts with Highest Cybercrime Rates", fontsize=16)
plt.xlabel("Districts", fontsize=14)
plt.ylabel("Total Cybercrimes", fontsize=14)
plt.xticks(rotation=45)
plt.grid(True)
plt.show()

top 5 states with the highest total crimes

In [None]:
# Group by state and calculate total crimes for each category
state_crime_totals = data.groupby('state_name')[specific_crimes].sum()

# Identify the top 5 states with the highest total crimes
top_5_states = state_crime_totals.sum(axis=1).nlargest(5)
print("Top 5 states with highest total crimes:")
print(top_5_states)

# Visualize the crime distribution for a specific state (e.g., the state with the highest total crimes)
top_state = top_5_states.index[0]
state_crime_distribution = state_crime_totals.loc[top_state]

state_crime_distribution.plot(kind='bar', figsize=(10, 6))

plt.title(f"Crime Distribution in {top_state}", fontsize=16)
plt.xlabel("Crime Categories", fontsize=14)
plt.ylabel("Total Crimes", fontsize=14)
plt.xticks(rotation=45)
plt.grid(True)
plt.show()