In [114]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
import warnings
warnings.filterwarnings("ignore")
from sklearn.model_selection import train_test_split

In [115]:
crime = pd.read_csv("/content/001.csv")

In [116]:
crime.head()

Unnamed: 0,STATE/UT,DISTRICT,YEAR,MURDER,ATTEMPT TO MURDER,CULPABLE HOMICIDE NOT AMOUNTING TO MURDER,RAPE,CUSTODIAL RAPE,OTHER RAPE,KIDNAPPING & ABDUCTION,...,ARSON,HURT/GREVIOUS HURT,DOWRY DEATHS,ASSAULT ON WOMEN WITH INTENT TO OUTRAGE HER MODESTY,INSULT TO MODESTY OF WOMEN,CRUELTY BY HUSBAND OR HIS RELATIVES,IMPORTATION OF GIRLS FROM FOREIGN COUNTRIES,CAUSING DEATH BY NEGLIGENCE,OTHER IPC CRIMES,TOTAL IPC CRIMES
0,ANDHRA PRADESH,ADILABAD,2011,101,60,17,50,0,50,46,...,30,1131,16,149,34,175,0,181,1518,4154
1,ANDHRA PRADESH,ANANTAPUR,2011,151,125,1,23,0,23,53,...,69,1543,7,118,24,154,0,270,754,4125
2,ANDHRA PRADESH,CHITTOOR,2011,101,57,2,27,0,27,59,...,38,2088,14,112,83,186,0,404,1262,5818
3,ANDHRA PRADESH,CUDDAPAH,2011,80,53,1,20,0,20,25,...,23,795,17,126,38,57,0,233,1181,3140
4,ANDHRA PRADESH,EAST GODAVARI,2011,82,67,1,23,0,23,49,...,41,1244,12,109,58,247,0,431,2313,6507


In [117]:
def state(name):
    state = crime.groupby(['STATE/UT','DISTRICT','YEAR']).sum()['TOTAL IPC CRIMES']
    return state[name]

In [118]:
min_val = crime['TOTAL IPC CRIMES'].min()
max_val = crime['TOTAL IPC CRIMES'].max()
range_val = (max_val - min_val) / 4
low = min_val + range_val
medium = low + range_val
high = medium + range_val

In [119]:
def get_crime_level(crime_count):
    if crime_count <= low:
        return 1
    elif crime_count <= medium:
        return 2
    elif crime_count <= high:
        return 3
    else:
        return 4

crime['CRIME_LEVEL'] = crime['TOTAL IPC CRIMES'].apply(get_crime_level)

In [120]:
crime_level_count = crime['CRIME_LEVEL'].value_counts()
crime_level_count

1    8859
3      55
4      48
2      44
Name: CRIME_LEVEL, dtype: int64

In [121]:
crime= crime[crime['DISTRICT'] != 'TOTAL']

In [122]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()

crime["STATE/UT_encoded"] = le.fit_transform(crime["STATE/UT"])

crime["DISTRICT_encoded"] = le.fit_transform(crime["DISTRICT"])

In [123]:

grouped_state = crime[["STATE/UT", "STATE/UT_encoded"]].groupby("STATE/UT").first()
grouped_state

Unnamed: 0_level_0,STATE/UT_encoded
STATE/UT,Unnamed: 1_level_1
A & N ISLANDS,0
ANDHRA PRADESH,1
ARUNACHAL PRADESH,2
ASSAM,3
BIHAR,4
CHANDIGARH,5
CHHATTISGARH,6
D & N HAVELI,7
DAMAN & DIU,8
DELHI UT,9


In [124]:
grouped_district = crime[["DISTRICT", "DISTRICT_encoded"]].groupby("DISTRICT").first()
grouped_district

Unnamed: 0_level_0,DISTRICT_encoded
DISTRICT,Unnamed: 1_level_1
24 PARGANAS NORTH,0
24 PARGANAS SOUTH,1
A and N ISLANDS,2
ADILABAD,3
AGRA,4
...,...
WOKHA,802
YADGIRI,803
YAMUNANAGAR,804
YAVATMAL,805


In [125]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

In [126]:
features = crime[['STATE/UT', 'DISTRICT', 'YEAR']]

target = crime['TOTAL IPC CRIMES']

x_test=features
y_test=target

In [127]:
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)


preprocessor = ColumnTransformer(
    transformers=[
        ('encoder', OneHotEncoder(handle_unknown='ignore'), ['STATE/UT', 'DISTRICT',"YEAR"])
    ],
    remainder='passthrough'
)


model = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('regressor', LinearRegression())
])


model.fit(X_train, y_train)


predict = model.predict(X_test)


model_score = model.score(X_test, y_test)
print('Linear Regression score : ',model_score)

Linear Regression score :  0.9316055340407917


In [128]:
crime.head()

Unnamed: 0,STATE/UT,DISTRICT,YEAR,MURDER,ATTEMPT TO MURDER,CULPABLE HOMICIDE NOT AMOUNTING TO MURDER,RAPE,CUSTODIAL RAPE,OTHER RAPE,KIDNAPPING & ABDUCTION,...,ASSAULT ON WOMEN WITH INTENT TO OUTRAGE HER MODESTY,INSULT TO MODESTY OF WOMEN,CRUELTY BY HUSBAND OR HIS RELATIVES,IMPORTATION OF GIRLS FROM FOREIGN COUNTRIES,CAUSING DEATH BY NEGLIGENCE,OTHER IPC CRIMES,TOTAL IPC CRIMES,CRIME_LEVEL,STATE/UT_encoded,DISTRICT_encoded
0,ANDHRA PRADESH,ADILABAD,2011,101,60,17,50,0,50,46,...,149,34,175,0,181,1518,4154,1,1,3
1,ANDHRA PRADESH,ANANTAPUR,2011,151,125,1,23,0,23,53,...,118,24,154,0,270,754,4125,1,1,28
2,ANDHRA PRADESH,CHITTOOR,2011,101,57,2,27,0,27,59,...,112,83,186,0,404,1262,5818,1,1,154
3,ANDHRA PRADESH,CUDDAPAH,2011,80,53,1,20,0,20,25,...,126,38,57,0,233,1181,3140,1,1,172
4,ANDHRA PRADESH,EAST GODAVARI,2011,82,67,1,23,0,23,49,...,109,58,247,0,431,2313,6507,1,1,220


In [129]:
import pandas as pd

data = [{'STATE/UT': 'ANDHRA PRADESH', 'DISTRICT': 'WEST GODAVARI', 'YEAR': 2011, 'TOTAL IPC CRIMES': 1062},
        {'STATE/UT': 'ANDHRA PRADESH', 'DISTRICT': 'MAHABOOBNAGAR', 'YEAR': 2011, 'TOTAL IPC CRIMES': 864},
        {'STATE/UT': 'ANDHRA PRADESH', 'DISTRICT': 'KARIMNAGAR', 'YEAR': 2011, 'TOTAL IPC CRIMES': 853},
        {'STATE/UT': 'ANDHRA PRADESH', 'DISTRICT': 'KHAMMAM', 'YEAR': 2011, 'TOTAL IPC CRIMES': 721},
        {'STATE/UT': 'ANDHRA PRADESH', 'DISTRICT': 'HYDERABAD CITY', 'YEAR': 2011, 'TOTAL IPC CRIMES': 712}]

df = pd.DataFrame(data)

print(df)


         STATE/UT        DISTRICT  YEAR  TOTAL IPC CRIMES
0  ANDHRA PRADESH   WEST GODAVARI  2011              1062
1  ANDHRA PRADESH   MAHABOOBNAGAR  2011               864
2  ANDHRA PRADESH      KARIMNAGAR  2011               853
3  ANDHRA PRADESH         KHAMMAM  2011               721
4  ANDHRA PRADESH  HYDERABAD CITY  2011               712


In [130]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()

df['STATE/UT'] = le.fit_transform(df['STATE/UT'])

df["DISTRICT"] = le.fit_transform(df["DISTRICT"])

In [131]:
features = df[['STATE/UT', 'DISTRICT', 'YEAR']]

target = df['TOTAL IPC CRIMES']

In [132]:
x=features
y=target

In [133]:
predictions = model.predict(X_test)

top_3_indices = predictions.argsort()[-3:][::-1]

top_3_predictions = predictions[top_3_indices]
top_3_states = X_test.iloc[top_3_indices]['STATE/UT']
top_3_districts = X_test.iloc[top_3_indices]['DISTRICT']

print('Top 3 Predictions:', top_3_predictions)
print('Corresponding State:', top_3_states)
print('Corresponding District:', top_3_districts)


Top 3 Predictions: [31481.90927752 31358.44689729 29449.73097363]
Corresponding State: 7818    MAHARASHTRA
7033    MAHARASHTRA
6908      KARNATAKA
Name: STATE/UT, dtype: object
Corresponding District: 7818       MUMBAI COMMR.
7033       MUMBAI COMMR.
6908    BANGALORE COMMR.
Name: DISTRICT, dtype: object
