In [9]:
# Supress Warnings
import warnings
warnings.filterwarnings('ignore')

# Data Science
import pandas as pd

# Machine Learning
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import xgboost as xgb
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error

In [10]:
df = pd.read_csv('2011-2022_mergedcrimerate.csv')
df = df.drop(['Month', 'LSOA code', 'Count', 'AS Score', 'Inflation_rate', 'Indoors Score'], axis=1)

Unnamed: 0,Month,LSOA code,Count,Population,Pop_density,IMD Score,Income Score,IDACI Score,IDAOPI Score,Employment Score,...,Crime Score,BHS Score,GB Score,WB Score,LE Score,Indoors Score,Outdoors Score,Inflation_rate,unemployment_rate,Crime Rate
0,2011-01,E01007599,1,1760.0,15.6031,11.2,0.07,0.07,0.13,0.06,...,0.01,30.71,55.81,5.61,6.45,6.13,7.09,3.4,7.8,0.9
1,2011-01,E01007609,2,2302.0,17.9576,18.22,0.13,0.13,0.22,0.1,...,0.65,15.76,21.48,10.04,9.33,9.86,8.27,3.4,7.8,1.3
2,2011-01,E01013810,6,1657.0,0.1345,39.59,0.29,0.42,0.44,0.13,...,1.23,17.42,1.25,33.59,34.51,26.9,49.74,3.4,7.8,5.4
3,2011-01,E01013811,26,1446.0,0.128,45.17,0.27,0.41,0.58,0.16,...,1.99,17.77,0.22,35.31,36.36,33.08,42.91,3.4,7.8,27.0
4,2011-01,E01013812,19,1617.0,0.1209,47.45,0.36,0.55,0.45,0.16,...,1.78,18.06,0.17,35.94,31.94,18.63,58.57,3.4,7.8,17.6


In [12]:
X = df.drop(columns=['Crime Rate']).values
y = df['Crime Rate'].values
# Choose any random state
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3,random_state=17)

In [13]:
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [14]:
gpu_dict = {'tree_method':'gpu_hist'}

regressor = xgb.XGBRegressor(**gpu_dict)

                # Initialize a parameter list that will be iterated in the grid search
grid_params = [{'max_depth': [8, 10, 12, 14, 16], 
                'gamma': [0, 4, 8],
                'min_child_weight': [0, 5, 10],
                'eta': [0.1]
                }]

                # Initiate a grid search model that takes in,
                # - Rnadom Forest classifier model
                # - List of parameters
                # - Cross validation value. It is set to 10 in this case to ensure a robust model
                # - n_jobs is set to 15 to ensure the model runs parallely across all threads in the processor
                # - verbose is set to 3 to get a fairly good description on each loop
                # - scoring metric is set to R2
reg = GridSearchCV(regressor, grid_params, cv = 5, scoring='r2', verbose=3)

                # Use the initialized grid search model to train the split data
reg.fit(X_train, y_train)

Fitting 5 folds for each of 45 candidates, totalling 225 fits
[CV 1/5] END eta=0.1, gamma=0, max_depth=8, min_child_weight=0;, score=0.687 total time=   2.5s
[CV 2/5] END eta=0.1, gamma=0, max_depth=8, min_child_weight=0;, score=0.691 total time=   2.2s
[CV 3/5] END eta=0.1, gamma=0, max_depth=8, min_child_weight=0;, score=0.697 total time=   2.3s
[CV 4/5] END eta=0.1, gamma=0, max_depth=8, min_child_weight=0;, score=0.680 total time=   2.2s
[CV 5/5] END eta=0.1, gamma=0, max_depth=8, min_child_weight=0;, score=0.692 total time=   2.4s
[CV 1/5] END eta=0.1, gamma=0, max_depth=8, min_child_weight=5;, score=0.686 total time=   2.2s
[CV 2/5] END eta=0.1, gamma=0, max_depth=8, min_child_weight=5;, score=0.689 total time=   2.2s
[CV 3/5] END eta=0.1, gamma=0, max_depth=8, min_child_weight=5;, score=0.696 total time=   2.3s
[CV 4/5] END eta=0.1, gamma=0, max_depth=8, min_child_weight=5;, score=0.682 total time=   2.2s
[CV 5/5] END eta=0.1, gamma=0, max_depth=8, min_child_weight=5;, score=0.6

In [None]:
rfgrid_results = pd.DataFrame(reg.cv_results_)
rfgrid_results.to_csv('xgboostgrid_results_v3.csv', index=False)

In [None]:
!nvidia-smi

Mon May  1 21:00:12 2023       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 531.68                 Driver Version: 531.68       CUDA Version: 12.1     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                      TCC/WDDM | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf            Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  NVIDIA GeForce RTX 3070 T...  WDDM | 00000000:01:00.0 Off |                  N/A |
| N/A   65C    P8               11W /  N/A|    596MiB /  8192MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                    