In [None]:
import pandas as pd
import numpy as np
from sklearn.svm import SVR
from sklearn.preprocessing import StandardScaler

# Load the dataset
data = pd.read_csv('districtwise-cyber-crimes-2017-onwards.csv')

# Remove irrelevant columns
irrelevant_columns = ['id', 'state_code', 'district_name', 'district_code', 'registration_circles']
data = data.drop(columns=irrelevant_columns)

# Fill missing values
data.fillna(0, inplace=True)

# Aggregating data by state and year
data_state_year = data.groupby(['state_name', 'year']).sum().reset_index()

# Features and targets for SVR
crime_columns = data.columns[3:]  # Selecting all crime columns starting from index 3

def predict_crimes(state_data, state_name, scaler_X, scaler_y):
    """Train SVR and predict crimes till 2030 for a given state."""
    future_years = np.array([2023, 2024, 2025, 2026, 2027, 2028, 2029, 2030]).reshape(-1, 1)
    predictions = []

    for crime in crime_columns:
        # Prepare the data
        X = np.array(state_data['year']).reshape(-1, 1)
        y = np.array(state_data[crime])

        # Scale the data
        X_scaled = scaler_X.fit_transform(X)
        y_scaled = scaler_y.fit_transform(y.reshape(-1, 1)).flatten()

        # Train SVR
        model = SVR(kernel='rbf', C=100, gamma=0.1, epsilon=0.1)
        model.fit(X_scaled, y_scaled)

        # Predict future
        future_X_scaled = scaler_X.transform(future_years)
        predicted_scaled = model.predict(future_X_scaled)
        predicted = scaler_y.inverse_transform(predicted_scaled.reshape(-1, 1)).flatten()
        predictions.append(predicted)

    # Organizing predictions
    future_predictions = pd.DataFrame(
        np.array(predictions).T,
        columns=crime_columns,
        index=future_years.flatten()
    )
    future_predictions.reset_index(inplace=True)
    future_predictions.rename(columns={'index': 'year'}, inplace=True)
    future_predictions['state_name'] = state_name

    return future_predictions

# Process for each state
scaler_X = StandardScaler()
scaler_y = StandardScaler()
predictions_all_states = []

for state_name in data_state_year['state_name'].unique():
    state_data = data_state_year[data_state_year['state_name'] == state_name]
    state_predictions = predict_crimes(state_data, state_name, scaler_X, scaler_y)
    predictions_all_states.append(state_predictions)

# Combine all state predictions
future_data = pd.concat(predictions_all_states, ignore_index=True)

# Adding top 5 crimes for each year and state
def get_top_crimes(row):
    row_sorted = row[crime_columns].sort_values(ascending=False)
    return ', '.join(row_sorted.index[:5])

future_data['top_5_crimes'] = future_data.apply(get_top_crimes, axis=1)

# Combine future predictions with original data
final_data = pd.concat([data_state_year, future_data], ignore_index=True)

# Calculate total crimes in each year
yearly_totals = final_data.groupby('year')[crime_columns].sum()
yearly_totals['total_crimes'] = yearly_totals.sum(axis=1)

# Calculate top crimes for each year
def get_yearly_top_crimes(row):
    sorted_crimes = row.sort_values(ascending=False)
    return ', '.join(sorted_crimes.index[:5])

yearly_totals['top_crimes'] = yearly_totals.apply(get_yearly_top_crimes, axis=1)

# Calculate top states for each year
yearly_state_totals = final_data.groupby(['year', 'state_name'])['total_offences_under_ip'].sum().reset_index()
def get_top_states(year):
    year_data = yearly_state_totals[yearly_state_totals['year'] == year]
    top_states = year_data.sort_values(by='total_offences_under_ip', ascending=False).head(5)
    return ', '.join(top_states['state_name'])

yearly_totals['top_states'] = yearly_totals.index.map(get_top_states)

# Save the aggregated yearly data for Tableau
output_path = 'yearly_crime_analysis.csv'
yearly_totals.to_csv(output_path, index=True)

print(f"Yearly crime analysis saved to {output_path}")
