In [20]:
import pandas as pandas
import numpy as numpy
import warnings
warnings.filterwarnings("ignore")
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.ensemble import AdaBoostClassifier, RandomForestClassifier
from sklearn.model_selection import GridSearchCV

In [7]:
# посилання на датасет: https://www.kaggle.com/datasets/omkargowda/suicide-rates-overview-1985-to-2021
suicide = pandas.read_csv("/home/anatoliy/Downloads/master.csv")
suicide = suicide.drop(labels=range(10000, 31756), axis=0)
print(suicide.columns)
print("Raws,Columns:")
print(suicide.shape)
suicide = suicide.dropna() 
suicide.isnull().sum()

Index(['country', 'year', 'sex', 'age', 'suicides_no', 'population',
       'suicides/100k pop', 'country-year', 'HDI for year',
       ' gdp_for_year ($) ', 'gdp_per_capita ($)', 'generation'],
      dtype='object')
Raws,Columns:
(10000, 12)


country               0
year                  0
sex                   0
age                   0
suicides_no           0
population            0
suicides/100k pop     0
country-year          0
HDI for year          0
 gdp_for_year ($)     0
gdp_per_capita ($)    0
generation            0
dtype: int64

In [8]:
#нормалізація
suicide_only_numbers  = suicide.drop(columns=['sex', 'age', 'country-year', 'generation', ' gdp_for_year ($) '], axis=1).set_index('country') #getting numbers
X = suicide_only_numbers.drop(['year'], axis=1)
Y = suicide_only_numbers['year']
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.30)
print(X_train.shape)
print(Y_train.shape)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.fit_transform(X_test)

(2160, 5)
(2160,)


In [13]:
#SVM
svm = SVC(C=1, gamma = 1, kernel = 'rbf')
svm.fit(X_train,Y_train)
svm_prediction = svm.predict(X_test)
print(classification_report(Y_test,svm_prediction))
print(confusion_matrix(Y_test,svm_prediction))
print("Accuracy score:", accuracy_score(Y_test,svm_prediction))

              precision    recall  f1-score   support

        1985       0.83      0.11      0.20        44
        1990       0.57      0.37      0.45        67
        1995       0.39      0.34      0.36        99
        2000       0.32      0.49      0.39        99
        2005       0.53      0.42      0.47       108
        2010       0.12      0.18      0.15        94
        2011       0.11      0.19      0.14       103
        2012       0.13      0.04      0.06       105
        2013       0.13      0.19      0.15       108
        2014       0.08      0.04      0.05        99

    accuracy                           0.24       926
   macro avg       0.32      0.24      0.24       926
weighted avg       0.28      0.24      0.24       926

[[ 5  5  6  9  3  6  7  0  1  2]
 [ 1 25 13 12  3  1  3  1  7  1]
 [ 0  9 34 32  8  2  8  1  3  2]
 [ 0  2 15 49  4  4  2  5 15  3]
 [ 0  2  8 18 45 11  9  0 12  3]
 [ 0  1  0  8 14 17 25  3 16 10]
 [ 0  0  5  5  3 25 20 10 27  8]
 [ 0  0  3

In [14]:
#Oптимальне значення SVM
param_grid = {'C': [0.1, 1, 10, 100], 'gamma': [1,0.1,0.01,0.001], 'kernel': ['rbf']} 
grid_search = GridSearchCV(SVC(), param_grid, refit=True)
grid_search.fit(X_train,Y_train)
grid_search.best_params_

{'C': 100, 'gamma': 1, 'kernel': 'rbf'}

In [15]:
#Підставляємо оптимальне значення
svm = SVC(C=100, gamma = 1, kernel = 'rbf')
svm.fit(X_train,Y_train)
svm_prediction = svm.predict(X_test)
print(classification_report(Y_test,svm_prediction))
print(confusion_matrix(Y_test,svm_prediction))
print("Accuracy score:", accuracy_score(Y_test,svm_prediction))

              precision    recall  f1-score   support

        1985       0.48      0.52      0.50        44
        1990       0.52      0.55      0.54        67
        1995       0.51      0.44      0.47        99
        2000       0.42      0.60      0.50        99
        2005       0.59      0.47      0.52       108
        2010       0.24      0.43      0.31        94
        2011       0.20      0.24      0.22       103
        2012       0.18      0.10      0.13       105
        2013       0.15      0.12      0.13       108
        2014       0.15      0.08      0.10        99

    accuracy                           0.34       926
   macro avg       0.34      0.36      0.34       926
weighted avg       0.33      0.34      0.32       926

[[23  2  4  7  1  1  1  0  1  4]
 [ 8 37 10  5  1  1  2  2  0  1]
 [12 10 44 21  1  8  0  0  2  1]
 [ 2  7  8 59  6  3  7  1  6  0]
 [ 2  7  4 24 51  7  4  1  3  5]
 [ 1  3  3  7 14 40  9  5  5  7]
 [ 0  3  3  3  5 36 25  6 15  7]
 [ 0  0  3

In [22]:
#Tree з використанням критерію'entropy'
tree = DecisionTreeClassifier(criterion='entropy')
tree.fit(X_train, Y_train)
tree_prediction = tree.predict(X_test)
print(classification_report(Y_test,tree_prediction))
print(confusion_matrix(Y_test,tree_prediction))
print("Accuracy score:", accuracy_score(Y_test,tree_prediction))
#Tree criterion='gini'
tree = DecisionTreeClassifier(criterion='gini')
tree.fit(x_train, y_train)
tree_prediction = tree.predict(X_test)
print(classification_report(Y_test,tree_prediction))
print(confusion_matrix(Y_test,tree_prediction))
print("Accuracy score:", accuracy_score(Y_test,tree_prediction))

              precision    recall  f1-score   support

        1985       0.20      0.23      0.22        44
        1990       0.41      0.61      0.49        67
        1995       0.48      0.48      0.48        99
        2000       0.51      0.49      0.50        99
        2005       0.49      0.50      0.50       108
        2010       0.27      0.44      0.34        94
        2011       0.22      0.30      0.25       103
        2012       0.11      0.08      0.09       105
        2013       0.07      0.04      0.05       108
        2014       0.33      0.14      0.20        99

    accuracy                           0.32       926
   macro avg       0.31      0.33      0.31       926
weighted avg       0.31      0.32      0.31       926

[[10 10  0  9  6  9  0  0  0  0]
 [ 3 41  0  5  8  0  6  4  0  0]
 [ 8  5 48 18  4  5  3  1  3  4]
 [ 1 15  6 49 14  1  5  6  0  2]
 [ 7 10  6  5 54  4  5  5  6  6]
 [ 0  3  5  6  7 41 22  6  4  0]
 [ 9  1 11  3  0 36 31  0  7  5]
 [ 7  2 13

In [24]:
#RFS
rf = RandomForestClassifier(n_estimators=300, random_state=42)
rf.fit(X_train, Y_train)
rf_prediction = rf.predict(X_test)
print(classification_report(Y_test,rf_prediction))
print(confusion_matrix(Y_test,rf_prediction))
print("Accuracy score:", accuracy_score(Y_test,rf_prediction))

              precision    recall  f1-score   support

        1985       0.50      0.48      0.49        44
        1990       0.55      0.61      0.58        67
        1995       0.61      0.58      0.59        99
        2000       0.55      0.70      0.62        99
        2005       0.74      0.70      0.72       108
        2010       0.26      0.54      0.36        94
        2011       0.23      0.24      0.24       103
        2012       0.20      0.17      0.19       105
        2013       0.13      0.06      0.09       108
        2014       0.30      0.14      0.19        99

    accuracy                           0.41       926
   macro avg       0.41      0.42      0.41       926
weighted avg       0.40      0.41      0.39       926

[[21  6  1  7  0  0  6  1  0  2]
 [ 3 41  9  4  4  1  1  1  0  3]
 [10  8 57 10  2  5  0  2  1  4]
 [ 6  6  5 69  4  1  4  0  1  3]
 [ 2  4  1 11 76  4  4  2  1  3]
 [ 0  8  1 10  7 51  9  4  2  2]
 [ 0  0  6  5  1 47 25  8  3  8]
 [ 0  2  7

In [25]:
#AdaBoost
svc1=SVC(probability=True, kernel='linear')
abc =AdaBoostClassifier(n_estimators=10, estimator=svc1,learning_rate=1)
model = abc.fit(X_train, Y_train)
ada_prediction = model.predict(X_test)
print(classification_report(Y_test,ada_prediction))
print(confusion_matrix(Y_test,ada_prediction))
print("Accuracy score:", accuracy_score(Y_test,ada_prediction))

              precision    recall  f1-score   support

        1985       0.00      0.00      0.00        44
        1990       0.00      0.00      0.00        67
        1995       0.25      0.01      0.02        99
        2000       0.12      0.65      0.20        99
        2005       0.00      0.00      0.00       108
        2010       0.13      0.50      0.20        94
        2011       0.00      0.00      0.00       103
        2012       0.00      0.00      0.00       105
        2013       0.00      0.00      0.00       108
        2014       0.00      0.00      0.00        99

    accuracy                           0.12       926
   macro avg       0.05      0.12      0.04       926
weighted avg       0.05      0.12      0.04       926

[[ 0  0  0 41  0  3  0  0  0  0]
 [ 0  0  0 38  0 24  5  0  0  0]
 [ 0  0  1 69  0 26  3  0  0  0]
 [ 0  0  2 64  0 33  0  0  0  0]
 [ 0  0  0 65  0 42  1  0  0  0]
 [ 0  0  0 46  0 47  1  0  0  0]
 [ 0  0  0 54  0 49  0  0  0  0]
 [ 0  0  0