In [1]:
! mkdir /root/.kaggle
! cp /content/kaggle.json /root/.kaggle
! chmod 600 /root/.kaggle/kaggle.json

In [2]:
! kaggle datasets download -d rajanand/crime-in-india

Downloading crime-in-india.zip to /content
  0% 0.00/4.39M [00:00<?, ?B/s]
100% 4.39M/4.39M [00:00<00:00, 182MB/s]


In [3]:
! mkdir crime-dataset

In [4]:
! unzip crime-in-india.zip -d /content/crime-dataset

Archive:  crime-in-india.zip
  inflating: /content/crime-dataset/10_Property_stolen_and_recovered.csv  
  inflating: /content/crime-dataset/20_Victims_of_rape.csv  
  inflating: /content/crime-dataset/25_Complaints_against_police.csv  
  inflating: /content/crime-dataset/28_Trial_of_violent_crimes_by_courts.csv  
  inflating: /content/crime-dataset/29_Period_of_trials_by_courts.csv  
  inflating: /content/crime-dataset/30_Auto_theft.csv  
  inflating: /content/crime-dataset/31_Serious_fraud.csv  
  inflating: /content/crime-dataset/32_Murder_victim_age_sex.csv  
  inflating: /content/crime-dataset/33_CH_not_murder_victim_age_sex.csv  
  inflating: /content/crime-dataset/35_Human_rights_violation_by_police.csv  
  inflating: /content/crime-dataset/36_Police_housing.csv  
  inflating: /content/crime-dataset/39_Specific_purpose_of_kidnapping_and_abduction.csv  
  inflating: /content/crime-dataset/40_01_Custodial_death_person_remanded.csv  
  inflating: /content/crime-dataset/40_02_Custodi

In [5]:
import pandas as pd
import numpy as np

In [6]:
crime2001dataset = pd.read_csv('/content/crime-dataset/10_Property_stolen_and_recovered.csv')
crime2001dataset

Unnamed: 0,Area_Name,Year,Group_Name,Sub_Group_Name,Cases_Property_Recovered,Cases_Property_Stolen,Value_of_Property_Recovered,Value_of_Property_Stolen
0,Andaman & Nicobar Islands,2001,Burglary - Property,3. Burglary,27,64,755858,1321961
1,Andhra Pradesh,2001,Burglary - Property,3. Burglary,3321,7134,51483437,147019348
2,Arunachal Pradesh,2001,Burglary - Property,3. Burglary,66,248,825115,4931904
3,Assam,2001,Burglary - Property,3. Burglary,539,2423,3722850,21466955
4,Bihar,2001,Burglary - Property,3. Burglary,367,3231,2327135,17023937
...,...,...,...,...,...,...,...,...
2444,Tamil Nadu,2010,Total Property,7. Total Property Stolen & Recovered,16125,21509,660311804,1317919190
2445,Tripura,2010,Total Property,7. Total Property Stolen & Recovered,192,879,5666102,33032746
2446,Uttar Pradesh,2010,Total Property,7. Total Property Stolen & Recovered,9130,35068,577591772,1442670414
2447,Uttarakhand,2010,Total Property,7. Total Property Stolen & Recovered,964,2234,47135685,123398840


In [7]:
print(len(crime2001dataset['Area_Name'].unique()))

ds = crime2001dataset[crime2001dataset['Group_Name'] == 'Total Property']
ds = ds[ds['Year'] == 2001]
print(len(ds['Area_Name']))
ds

35
35


Unnamed: 0,Area_Name,Year,Group_Name,Sub_Group_Name,Cases_Property_Recovered,Cases_Property_Stolen,Value_of_Property_Recovered,Value_of_Property_Stolen
2100,Andaman & Nicobar Islands,2001,Total Property,7. Total Property Stolen & Recovered,54,143,1192179,3184477
2101,Andhra Pradesh,2001,Total Property,7. Total Property Stolen & Recovered,13418,25070,186103403,476038316
2102,Arunachal Pradesh,2001,Total Property,7. Total Property Stolen & Recovered,300,858,9652850,58483056
2103,Assam,2001,Total Property,7. Total Property Stolen & Recovered,2149,9778,24989343,121602215
2104,Bihar,2001,Total Property,7. Total Property Stolen & Recovered,3357,18503,47713186,422706220
2105,Chandigarh,2001,Total Property,7. Total Property Stolen & Recovered,714,1948,21114612,49527109
2106,Chhattisgarh,2001,Total Property,7. Total Property Stolen & Recovered,3298,9894,37331973,112242456
2107,Dadra & Nagar Haveli,2001,Total Property,7. Total Property Stolen & Recovered,43,106,5314436,11604547
2108,Daman & Diu,2001,Total Property,7. Total Property Stolen & Recovered,21,94,2323494,14151158
2109,Delhi,2001,Total Property,7. Total Property Stolen & Recovered,5893,25170,218254594,2127553393


In [8]:
ds = pd.DataFrame(index=crime2001dataset['Area_Name'].unique())
ds

Andaman & Nicobar Islands
Andhra Pradesh
Arunachal Pradesh
Assam
Bihar
Chandigarh
Chhattisgarh
Dadra & Nagar Haveli
Daman & Diu
Delhi
Goa


In [9]:
ds = pd.read_csv('/content/crime-dataset/35_Human_rights_violation_by_police.csv')

ds = ds[ds['Group_Name'] == 'HR_Total Violations by Police']
ds = ds[ds['Year'] == 2001]
print(len(ds['Area_Name']))

ds = ds.set_index('Area_Name')

23


In [10]:
def add_columns_to_dataset(dataset, ds):
  for col in ds.columns:
    dataset[col] = ds[col]
    # print(col)
    # print(ds[col])
    # print(dataset)
    # input()
  # print(dataset)
  return dataset

def group_dataset(ds, group_name, total_label, year_col, year_label):
  ds = ds[ds[group_name] == total_label]
  ds = ds[ds[year_col] == year_label]

  return ds

def create_cols_list(ds, forbidden_list):
  col_list = list(ds.columns)
  col_list = [x for x in col_list if x not in forbidden_list]
  return col_list

def process_dataset(data_path, dataset, area_name, group_name, total_label, year_col, year_label, forbidden_column_list):
  ds = pd.read_csv(data_path)
  ds = group_dataset(ds, group_name, total_label, year_col, year_label)
  ds[area_name] = np.array(list(map(lambda x: x.lower(), ds[area_name])))
  ds = ds.set_index(area_name)
  cols_list = create_cols_list(ds, forbidden_column_list)
  cols = ds[cols_list]
  dataset = add_columns_to_dataset(dataset, cols)

  return dataset

## 2001

In [11]:
dataset = pd.DataFrame(index=np.array(list(map(lambda x: x.lower(), crime2001dataset['Area_Name'].unique()))))
data_path = '/content/crime-dataset/10_Property_stolen_and_recovered.csv'
data_path_2 = '/content/crime-dataset/35_Human_rights_violation_by_police.csv'
forbidden_list = ['Area_Name', 'Year', 'Group_Name', 'Sub_Group_Name', 'STATE/UT', 'DISTRICT', 'YEAR']
dataset = process_dataset(data_path, dataset, 'Area_Name', 'Group_Name', 'Total Property', 'Year', 2001, forbidden_list)
dataset = process_dataset(data_path_2, dataset, 'Area_Name', 'Group_Name', 'HR_Total Violations by Police', 'Year', 2001, forbidden_list)

In [12]:
data_path_3 = '/content/crime-dataset/crime/01_District_wise_crimes_committed_IPC_2001_2012.csv'
dataset = process_dataset(data_path_3, dataset, 'STATE/UT', 'DISTRICT' ,'TOTAL', 'YEAR', 2001, forbidden_list)

In [13]:
data_path_4 = '/content/crime-dataset/crime/02_01_District_wise_crimes_committed_against_SC_2001_2012.csv'
dataset = process_dataset(data_path_4, dataset, 'STATE/UT', 'DISTRICT' ,'TOTAL', 'Year', 2001, forbidden_list)

In [14]:
data_path_5 = '/content/crime-dataset/crime/13_Police_killed_or_injured_on_duty.csv'
dataset = process_dataset(data_path_5, dataset, 'Area_Name', 'Group_Name', 'Police - Total', 'Year', 2001, forbidden_list)

In [15]:
dataset

Unnamed: 0,Cases_Property_Recovered,Cases_Property_Stolen,Value_of_Property_Recovered,Value_of_Property_Stolen,Cases_Registered_under_Human_Rights_Violations,Policemen_Chargesheeted,Policemen_Convicted,MURDER,ATTEMPT TO MURDER,CULPABLE HOMICIDE NOT AMOUNTING TO MURDER,...,Police_Injured_In_TerroristsExtremists_Operations,Police_Injured_On_Border_Duties,Police_Injured_Total_Policemen,Police_Killed_By_Criminals,Police_Killed_By_Riotous_Mobs,Police_Killed_In_Accidents,Police_Killed_In_Dacoity_OperationsOther_raids,Police_Killed_In_TerroristsExtremists_Operations,Police_Killed_On_Border_Duties,Police_Killed_Total_Policemen
andaman & nicobar islands,54,143,1192179,3184477,,,,,,,...,0,0,0,0,0,0,0,0,0,0
andhra pradesh,13418,25070,186103403,476038316,45.0,29.0,2.0,2602.0,1555.0,136.0,...,45,0,286,0,1,34,4,31,0,70
arunachal pradesh,300,858,9652850,58483056,11.0,6.0,0.0,83.0,53.0,3.0,...,0,0,7,0,0,1,0,0,0,1
assam,2149,9778,24989343,121602215,35.0,0.0,0.0,1356.0,481.0,40.0,...,41,0,54,4,0,5,12,13,0,34
bihar,3357,18503,47713186,422706220,0.0,0.0,0.0,3643.0,3419.0,250.0,...,18,0,40,2,1,0,1,10,0,14
chandigarh,714,1948,21114612,49527109,1.0,0.0,0.0,15.0,10.0,6.0,...,0,0,17,0,0,3,0,0,0,3
chhattisgarh,3298,9894,37331973,112242456,0.0,0.0,0.0,880.0,529.0,45.0,...,15,0,19,0,0,11,0,8,0,19
dadra & nagar haveli,43,106,5314436,11604547,,,,,,,...,0,0,0,0,0,0,0,0,0,0
daman & diu,21,94,2323494,14151158,,,,7.0,5.0,0.0,...,0,0,0,0,0,0,0,0,0,0
delhi,5893,25170,218254594,2127553393,6.0,4.0,,,,,...,13,1,240,3,0,45,0,6,0,54


In [18]:
dataset.columns

Index(['Cases_Property_Recovered', 'Cases_Property_Stolen',
       'Value_of_Property_Recovered', 'Value_of_Property_Stolen',
       'Cases_Registered_under_Human_Rights_Violations',
       'Policemen_Chargesheeted', 'Policemen_Convicted', 'MURDER',
       'ATTEMPT TO MURDER', 'CULPABLE HOMICIDE NOT AMOUNTING TO MURDER',
       'RAPE', 'CUSTODIAL RAPE', 'OTHER RAPE', 'KIDNAPPING & ABDUCTION',
       'KIDNAPPING AND ABDUCTION OF WOMEN AND GIRLS',
       'KIDNAPPING AND ABDUCTION OF OTHERS', 'DACOITY',
       'PREPARATION AND ASSEMBLY FOR DACOITY', 'ROBBERY', 'BURGLARY', 'THEFT',
       'AUTO THEFT', 'OTHER THEFT', 'RIOTS', 'CRIMINAL BREACH OF TRUST',
       'CHEATING', 'COUNTERFIETING', 'ARSON', 'HURT/GREVIOUS HURT',
       'DOWRY DEATHS', 'ASSAULT ON WOMEN WITH INTENT TO OUTRAGE HER MODESTY',
       'INSULT TO MODESTY OF WOMEN', 'CRUELTY BY HUSBAND OR HIS RELATIVES',
       'IMPORTATION OF GIRLS FROM FOREIGN COUNTRIES',
       'CAUSING DEATH BY NEGLIGENCE', 'OTHER IPC CRIMES', 'TOTAL I

In [16]:
dataset.corr()

Unnamed: 0,Cases_Property_Recovered,Cases_Property_Stolen,Value_of_Property_Recovered,Value_of_Property_Stolen,Cases_Registered_under_Human_Rights_Violations,Policemen_Chargesheeted,Policemen_Convicted,MURDER,ATTEMPT TO MURDER,CULPABLE HOMICIDE NOT AMOUNTING TO MURDER,...,Police_Injured_In_TerroristsExtremists_Operations,Police_Injured_On_Border_Duties,Police_Injured_Total_Policemen,Police_Killed_By_Criminals,Police_Killed_By_Riotous_Mobs,Police_Killed_In_Accidents,Police_Killed_In_Dacoity_OperationsOther_raids,Police_Killed_In_TerroristsExtremists_Operations,Police_Killed_On_Border_Duties,Police_Killed_Total_Policemen
Cases_Property_Recovered,1.000000,0.919303,0.800981,0.751374,0.438883,0.459641,0.423870,0.586685,0.509641,0.272169,...,-0.073217,0.125195,0.721819,0.145286,0.556032,0.628475,0.037501,-0.052034,,0.377165
Cases_Property_Stolen,0.919303,1.000000,0.865841,0.850998,0.304753,0.450837,0.208413,0.730048,0.632723,0.455973,...,-0.074692,0.182110,0.750940,0.290759,0.484965,0.611681,0.090216,-0.066211,,0.378650
Value_of_Property_Recovered,0.800981,0.865841,1.000000,0.835464,0.251388,0.665279,0.075203,0.755167,0.703483,0.596826,...,-0.106947,0.104238,0.651640,0.453272,0.302308,0.689863,0.114907,-0.106478,,0.418982
Value_of_Property_Stolen,0.751374,0.850998,0.835464,1.000000,0.175103,0.303679,-0.005075,0.538853,0.406813,0.321696,...,-0.085983,0.138711,0.720051,0.280487,0.350651,0.687564,-0.016123,-0.084849,,0.401129
Cases_Registered_under_Human_Rights_Violations,0.438883,0.304753,0.251388,0.175103,1.000000,0.493239,0.717750,0.296279,0.150121,0.091860,...,-0.004367,0.138551,0.205471,0.023202,0.230971,0.307960,0.576830,0.019304,,0.258406
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Police_Killed_In_Accidents,0.628475,0.611681,0.689863,0.687564,0.307960,0.327514,0.332121,0.394700,0.324300,0.247366,...,-0.007074,0.068824,0.485067,0.381773,0.241386,1.000000,-0.020913,0.002891,,0.663533
Police_Killed_In_Dacoity_OperationsOther_raids,0.037501,0.090216,0.114907,-0.016123,0.576830,0.300865,0.189490,0.349790,0.264473,0.285637,...,0.067228,-0.026772,-0.003498,0.407545,0.028621,-0.020913,1.000000,0.050334,,0.150787
Police_Killed_In_TerroristsExtremists_Operations,-0.052034,-0.066211,-0.106478,-0.084849,0.019304,-0.058610,0.148627,0.056294,0.072381,-0.056396,...,0.992708,-0.065113,0.290262,0.325276,0.051801,0.002891,0.050334,1.000000,,0.732536
Police_Killed_On_Border_Duties,,,,,,,,,,,...,,,,,,,,,,




In [None]:
# def find_args(dataset):
#   area_name = None
#   if 'STATE/UT' in dataset.keys():
#     area_name = 'STATE/UT'
#   elif 'Area_Name' in dataset.keys():
#     area_name = 'Area_Name'

#   group_name = None
#   if 'DISTRICT' in dataset.keys():
#     group_name = 'DISTRICT'
#   elif 'Group_Name' in dataset.keys():
#     group_name = 'Group_Name'

#   total_label = None
#   # if 'TOTAL'