In [4]:

import pandas as pd
import numpy as np
from sklearn.svm import SVR
import matplotlib.pyplot as plt

# Load the dataset
data = pd.read_csv('districtwise-cyber-crimes-2017-onwards.csv')

In [5]:
# Preprocess the data
# Grouping by state and year to get total offences per state per year
state_year_data = data.groupby(['state_name', 'year'])['total_offences_under_ip'].sum().reset_index()

# Prepare the dataset for SVR
states = state_year_data['state_name'].unique()
predictions = []

import numpy as np
from sklearn.svm import SVR
from sklearn.preprocessing import StandardScaler

for state in states:
    state_data = state_year_data[state_year_data['state_name'] == state]
    
    # Prepare the features (X) and the target variable (y)
    X = np.array(state_data['year']).reshape(-1, 1)
    y = np.array(state_data['total_offences_under_ip'])

    # Standardize the features
    scaler_X = StandardScaler()
    scaler_y = StandardScaler()

    X_scaled = scaler_X.fit_transform(X)
    y_scaled = scaler_y.fit_transform(y.reshape(-1, 1)).flatten()

    # Create and fit the SVR model
    model = SVR(kernel='rbf', C=100, gamma=0.1, epsilon=0.1)
    model.fit(X_scaled, y_scaled)

    # Predict the total offences for the next eight years (2023 to 2030)
    future_years = np.array([2023, 2024, 2025, 2026, 2027, 2028, 2029, 2030]).reshape(-1, 1)
    future_years_scaled = scaler_X.transform(future_years)
    predictions_scaled = model.predict(future_years_scaled)
    future_predictions = scaler_y.inverse_transform(predictions_scaled.reshape(-1, 1)).flatten()

    # Store predictions
    for year, prediction in zip(future_years.flatten(), future_predictions):
        predictions.append({'state_name': state, 'year': year, 'total_offences_under_ip': prediction})

# Create DataFrame for predictions
predictions_df = pd.DataFrame(predictions)



In [12]:
# Combine historical and predicted data for visualization in Tableau
combined_df = pd.concat([state_year_data, predictions_df], ignore_index=True)

# Save the combined dataset to a new CSV file
combined_df.to_csv('predicted_crime_data.csv', index=False)

In [13]:
# Preprocess the data
# Grouping by state and year to get total offences per state per year
state_year_data = data.groupby(['state_name', 'year'])['total_offences_under_ip'].sum().reset_index()

# Function to get top 5 crimes for each state and year
def get_top_crimes(data):
    crime_columns = [
        'tampering_computer_source_documents', 'ransom_ware', 'offences_other_than_ransom_ware',
        'dishonestly_recv_stolen_cmp_resrc_or_comm_device', 'identity_theft', 
        'cheating_by_personation_by_using_computer_resource', 'violation_of_privacy', 
        'cyber_terrorism', 'pub_or_trans_obscene_material_in_electronic_form', 
        'pub_or_trans_of_mtrl_cont_sxly_explct_act_in_elect_form', 
        'pub_or_trans_matrl_dpctng_chldrn_sxly_explct_elect_form',
        'presrv_and_retention_of_info_by_intermediaries', 'other_sections_it_act',
        'interception_or_monitoring_or_decryption_of_info', 
        'un_athryz_access_atmpt_access_prct_comp_sys', 
        'abetment_to_commit_offences', 'attempt_to_commit_offences',
        'other_sections_of_it_act', 'abetment_of_suicide_online',
        'cyber_stalking_bullying_of_women_children', 'data_theft',
        'credit_card_debit_card_fraud', 'atms_fraud', 
        'online_banking_fraud', 'otp_frauds', 'other_frauds',
        'cheating', 'forgery', 'defamation_morphing', 
        'fake_profile', 'currency_counterfeiting',
        'stamps_counterfeiting', 'cyber_blackmailing_threatening',
        'fake_news_on_social_media', 'other_offences'
    ]
    
    top_crimes = {}
    
    for year in data['year'].unique():
        yearly_data = data[data['year'] == year]
        
        # Sum the crimes for each category
        total_crimes = yearly_data[crime_columns].sum()
        
        # Get top 5 crimes
        top_5 = total_crimes.nlargest(5)
        
        top_crimes[year] = top_5.index.tolist()
    
    return top_crimes

# Get top 5 crimes for each state and year
top_crimes_by_state = {}
for state in state_year_data['state_name'].unique():
    state_data = data[data['state_name'] == state]
    top_crimes_by_state[state] = get_top_crimes(state_data)

# Prepare predictions and growth calculations
predictions = []
growth_data = []

for state in state_year_data['state_name'].unique():
    state_data = state_year_data[state_year_data['state_name'] == state]
    X = state_data['year'].values.reshape(-1, 1)  # Features (years)
    y = state_data['total_offences_under_ip'].values  # Target (total offences)

    # Fit SVR model
    svr_model = SVR(kernel='linear')
    svr_model.fit(X, y)

    # Predict future values until 2030
    future_years = np.arange(2023, 2031).reshape(-1, 1)
    future_predictions = svr_model.predict(future_years)

    # Store predictions and calculate growth
    for year, prediction in zip(future_years.flatten(), future_predictions):
        predictions.append({'state_name': state, 'year': year, 'total_offences_under_ip': prediction})
        
        # Calculate growth compared to previous year (if applicable)
        if year > 2023:
            previous_year_prediction = future_predictions[year - 2024]
            growth_rate = ((prediction - previous_year_prediction) / previous_year_prediction) * 100 if previous_year_prediction > 0 else None
            growth_data.append({'state_name': state, 'year': year, 'growth_rate': growth_rate})

# Create DataFrames for predictions and growth data
predictions_df = pd.DataFrame(predictions)
growth_df = pd.DataFrame(growth_data)

# Combine historical and predicted data for visualization in Tableau
combined_df = pd.concat([state_year_data, predictions_df], ignore_index=True)

# Save the combined dataset to a new CSV file with top crimes information
combined_df.to_csv('predicted_crime_with_top_crimes.csv', index=False)

# Save top crimes information to a separate CSV file for Tableau visualization
top_crimes_df = pd.DataFrame([(state, year, ', '.join(top_crimes_by_state[state][year])) 
                               for state in top_crimes_by_state 
                               for year in top_crimes_by_state[state]], 
                              columns=['state_name', 'year', 'top_5_crimes'])
top_crimes_df.to_csv('top_5_crimes_by_state.csv', index=False)


In [None]:
import matplotlib.pyplot as plt

# Visualize state-wise growth
plt.figure(figsize=(15, 10))
for state in growth_df['state_name'].unique():
    state_growth = growth_df[growth_df['state_name'] == state]
    plt.plot(state_growth['year'], state_growth['growth_rate'], label=state)

plt.xlabel('Year')
plt.ylabel('Growth Rate (%)')
plt.title('State-wise Growth Rate of Cyber Crimes')
plt.legend(loc='upper right', bbox_to_anchor=(1.2, 1))
plt.grid(True)
plt.show()

# Visualize overall year by year growth
overall_growth = growth_df.groupby('year')['growth_rate'].mean().reset_index()

plt.figure(figsize=(10, 6))
plt.plot(overall_growth['year'], overall_growth['growth_rate'], marker='o')

plt.xlabel('Year')
plt.ylabel('Average Growth Rate (%)')
plt.title('Overall Year by Year Growth Rate of Cyber Crimes')
plt.grid(True)
plt.show()

In [14]:
import pandas as pd
import numpy as np
from sklearn.svm import SVR

# Load the dataset
data = pd.read_csv('districtwise-cyber-crimes-2017-onwards.csv')

# Preprocess the data
# Grouping by state and year to get total offences per state per year
state_year_data = data.groupby(['state_name', 'year'])['total_offences_under_ip'].sum().reset_index()

# Function to get top 5 crimes for each state and year
def get_top_crimes(data):
    crime_columns = [
        'tampering_computer_source_documents', 'ransom_ware', 'offences_other_than_ransom_ware',
        'dishonestly_recv_stolen_cmp_resrc_or_comm_device', 'identity_theft', 
        'cheating_by_personation_by_using_computer_resource', 'violation_of_privacy', 
        'cyber_terrorism', 'pub_or_trans_obscene_material_in_electronic_form', 
        'pub_or_trans_of_mtrl_cont_sxly_explct_act_in_elect_form', 
        'pub_or_trans_matrl_dpctng_chldrn_sxly_explct_elect_form',
        'presrv_and_retention_of_info_by_intermediaries', 'other_sections_it_act',
        'interception_or_monitoring_or_decryption_of_info', 
        'un_athryz_access_atmpt_access_prct_comp_sys', 
        'abetment_to_commit_offences', 'attempt_to_commit_offences',
        'other_sections_of_it_act', 'abetment_of_suicide_online',
        'cyber_stalking_bullying_of_women_children', 'data_theft',
        'credit_card_debit_card_fraud', 'atms_fraud', 
        'online_banking_fraud', 'otp_frauds', 'other_frauds',
        'cheating', 'forgery', 'defamation_morphing', 
        'fake_profile', 'currency_counterfeiting',
        'stamps_counterfeiting', 'cyber_blackmailing_threatening',
        'fake_news_on_social_media', 'other_offences'
    ]
    
    top_crimes = {}
    
    for year in data['year'].unique():
        yearly_data = data[data['year'] == year]
        
        # Sum the crimes for each category
        total_crimes = yearly_data[crime_columns].sum()
        
        # Get top 5 crimes
        top_5 = total_crimes.nlargest(5)
        
        top_crimes[year] = top_5.index.tolist()
    
    return top_crimes

# Get top 5 crimes for each state and year
top_crimes_by_state = {}
for state in state_year_data['state_name'].unique():
    state_data = data[data['state_name'] == state]
    top_crimes_by_state[state] = get_top_crimes(state_data)

# Prepare predictions and growth calculations
predictions = []
growth_data = []

for state in state_year_data['state_name'].unique():
    state_data = state_year_data[state_year_data['state_name'] == state]
    X = state_data['year'].values.reshape(-1, 1)  # Features (years)
    y = state_data['total_offences_under_ip'].values  # Target (total offences)

    # Fit SVR model
    svr_model = SVR(kernel='linear')
    svr_model.fit(X, y)

    # Predict future values until 2030
    future_years = np.arange(2023, 2031).reshape(-1, 1)
    future_predictions = svr_model.predict(future_years)

    # Store predictions and calculate growth
    for year, prediction in zip(future_years.flatten(), future_predictions):
        predictions.append({'state_name': state, 'year': year, 'total_offences_under_ip': prediction})
        
        # Calculate growth compared to previous year (if applicable)
        if year > 2023:
            previous_year_prediction = future_predictions[year - 2024]
            growth_rate = ((prediction - previous_year_prediction) / previous_year_prediction) * 100 if previous_year_prediction > 0 else None
            growth_data.append({'state_name': state, 'year': year, 'growth_rate': growth_rate})

# Create DataFrames for predictions and growth data
predictions_df = pd.DataFrame(predictions)
growth_df = pd.DataFrame(growth_data)

# Combine historical and predicted data for visualization in Tableau
combined_df = pd.concat([state_year_data, predictions_df], ignore_index=True)

# Save the combined dataset to a new CSV file with top crimes information
combined_df.to_csv('predicted_crime_with_top_crimes.csv', index=False)

# Save top crimes information to a separate CSV file for Tableau visualization
top_crimes_df = pd.DataFrame([(state, year, ', '.join(top_crimes_by_state[state][year])) 
                               for state in top_crimes_by_state 
                               for year in top_crimes_by_state[state]], 
                              columns=['state_name', 'year', 'top_5_crimes'])
top_crimes_df.to_csv('top_5_crimes_by_state.csv', index=False)