In [1]:
import pandas as pd

# Load the dataset
data = pd.read_csv('districtwise-cyber-crimes-2017-onwards.csv')


In [None]:
# Display basic information
print(data.info())

# Display summary statistics
print(data.describe())


In [None]:
# Check for missing values
print(data.isnull().sum())

# Fill or drop missing values as necessary
data.fillna(0, inplace=True)  # Example: filling missing values with 0


In [None]:
# Unique states and districts
print(data['state_name'].unique())
print(data['district_name'].unique())


In [None]:
import matplotlib.pyplot as plt

# Group by year and sum total offences
yearly_data = data.groupby('year')['total_offences_under_ip'].sum()

plt.figure(figsize=(10,5))
yearly_data.plot(kind='bar')
plt.title('Total Cyber Crimes Over Years')
plt.xlabel('Year')
plt.ylabel('Total Offences')
plt.show()


In [None]:
# Filter data for a specific year (e.g., 2019)
data_2019 = data[data['year'] == 2019]

plt.figure(figsize=(12,6))
data_2019.groupby('state_name')['total_offences_under_ip'].sum().plot(kind='bar')
plt.title('Cyber Crimes by State in 2019')
plt.xlabel('State')
plt.ylabel('Total Offences')
plt.xticks(rotation=45)
plt.show()


In [None]:
# Prepare data for time series analysis
time_series_data = yearly_data.reset_index()
time_series_data.columns = ['Year', 'Total_Offences']
print(time_series_data.head())

Linear Regression

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error,r2_score
import numpy as np

# Prepare features and target variable
X = np.array(time_series_data['Year']).reshape(-1, 1)
y = time_series_data['Total_Offences']

# Fit model
model = LinearRegression()
model.fit(X, y)

# Predicting next 8 years
future_years = np.array([[2023], [2024], [2025], [2026], [2027], [2028], [2029], [2030]])
predictions = model.predict(future_years)
mse = mean_squared_error(y, model.predict(X))
print(mse)
r2 = r2_score(y, model.predict(X))
print(r2)

for year, prediction in zip(future_years.flatten(), predictions):
    print(f'Predicted total offences in {year}: {prediction:.2f}')


In [None]:
import statsmodels.api as sm
X = sm.add_constant(X)  # Add a constant term to the predictor
Y = y 
model.coef_, model.intercept_   # Coefficients of the linear model
rsquare_model = sm.OLS(Y,X).fit()  # R-squared value of the model
rsquare_model.rsquared_adj

In [None]:
# Group by year and sum total offences
yearly_data = data.groupby('year')['total_offences_under_ip'].sum().reset_index()

# Create future_data using future_years and predictions
future_data = pd.DataFrame({'year': future_years.flatten(), 'total_offences_under_ip': predictions})

# Add future years data
yearly_data = pd.concat([yearly_data, future_data]).reset_index(drop=True)
print(yearly_data)


In [None]:
# Combine historical and predicted data for plotting
combined_data = pd.concat([yearly_data, future_data])

plt.figure(figsize=(12, 6))
plt.plot(yearly_data['year'], yearly_data['total_offences_under_ip'], marker='o', label='Historical Data', color='blue')
plt.plot(future_data['year'], future_data['total_offences_under_ip'], marker='o', linestyle='--', label='Predicted Data', color='orange')

plt.title('Total Cyber Crimes Over Years with Predictions')
plt.xlabel('Year')
plt.ylabel('Total Offences')
plt.xticks(np.arange(2017, 2031, 1))
plt.legend()
plt.grid()
plt.show()


In [None]:
# Calculate year-over-year growth
yearly_data['growth'] = yearly_data['total_offences_under_ip'].pct_change() * 100

# Display the growth
print(yearly_data[['year', 'total_offences_under_ip', 'growth']])

In [None]:
# Calculate percentage change
combined_data['pct_change'] = combined_data['total_offences_under_ip'].pct_change() * 100

# Assign colors based on percentage change
colors = ['red' if pct > 0 else 'green' for pct in combined_data['pct_change'].fillna(0)]

plt.figure(figsize=(12, 6))
plt.bar(combined_data['year'], combined_data['total_offences_under_ip'], color=colors)
plt.title('Total Cyber Crimes Over Years with Predictions')
plt.xlabel('Year')
plt.ylabel('Total Offences')
plt.xticks(np.arange(2017, 2031, 1), rotation=45)
plt.grid(axis='y')
plt.show()