In [1]:
%pip install scikit-learn

Collecting scikit-learnNote: you may need to restart the kernel to use updated packages.

  Downloading scikit_learn-1.6.1-cp311-cp311-win_amd64.whl.metadata (15 kB)
Collecting scipy>=1.6.0 (from scikit-learn)
  Downloading scipy-1.15.1-cp311-cp311-win_amd64.whl.metadata (60 kB)
     ---------------------------------------- 0.0/60.8 kB ? eta -:--:--
     ------ --------------------------------- 10.2/60.8 kB ? eta -:--:--
     ------ --------------------------------- 10.2/60.8 kB ? eta -:--:--
     ------------------- ------------------ 30.7/60.8 kB 262.6 kB/s eta 0:00:01
     -------------------------------- ----- 51.2/60.8 kB 260.9 kB/s eta 0:00:01
     -------------------------------------- 60.8/60.8 kB 268.7 kB/s eta 0:00:00
Collecting joblib>=1.2.0 (from scikit-learn)
  Downloading joblib-1.4.2-py3-none-any.whl.metadata (5.4 kB)
Collecting threadpoolctl>=3.1.0 (from scikit-learn)
  Downloading threadpoolctl-3.5.0-py3-none-any.whl.metadata (13 kB)
Downloading scikit_learn-1.6.1-cp31


[notice] A new release of pip is available: 24.0 -> 24.3.1
[notice] To update, run: C:\Users\TZ\AppData\Local\Microsoft\WindowsApps\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\python.exe -m pip install --upgrade pip


In [1]:
# Імпортуємо необхідні бібліотеки
import pandas as pd
import numpy as np
import random
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

# Генерація тестового набору даних на військову тематику
random.seed(42)

# Визначаємо кількість записів
n = 100

# Список для генерації даних
ranks = ['Private', 'Sergeant', 'Lieutenant', 'Captain', 'Major', 'Colonel']
missions_completed = [random.randint(0, 30) for _ in range(n)]
years_of_service = [random.randint(1, 25) for _ in range(n)]
age = [random.randint(20, 55) for _ in range(n)]
salary = [random.randint(25000, 80000) for _ in range(n)]
rank = [random.choice(ranks) for _ in range(n)]

# Створюємо DataFrame
military_data = pd.DataFrame({
    'age': age,
    'years_of_service': years_of_service,
    'missions_completed': missions_completed,
    'salary': salary,
    'rank': rank
})

# Виведемо початковий набір даних
print("Початковий тестовий набір даних на військову тематику:")
display(military_data)

# 1. Трансформація даних: Робота з пропущеними значеннями
# Для демонстрації додамо кілька пропущених значень
military_data.loc[random.sample(range(n), 5), 'missions_completed'] = np.nan

# Заповнення пропусків середнім значенням
military_data_filled = military_data.fillna(military_data['missions_completed'].mean())

print("\nНабір даних після заповнення пропусків середніми значеннями:")
display(military_data_filled)

# 2. Нормалізація даних: Нормалізуємо числові значення 'salary' і 'missions_completed'
military_data_filled['salary_norm'] = (military_data_filled['salary'] - military_data_filled['salary'].mean()) / military_data_filled['salary'].std()
military_data_filled['missions_completed_norm'] = (military_data_filled['missions_completed'] - military_data_filled['missions_completed'].mean()) / military_data_filled['missions_completed'].std()

print("\nНабір даних після нормалізації:")
display(military_data_filled)

# 3. Перетворення категоріальних змінних (One-Hot Encoding) для звання
military_data_transformed = pd.get_dummies(military_data_filled, columns=['rank'])

print("\nНабір даних після перетворення категоріальних змінних (One-Hot Encoding):")
display(military_data_transformed)

# 4. Масштабування даних (Min-Max Scaling) для 'years_of_service' та 'salary'
scaler = MinMaxScaler()

military_data_transformed[['years_of_service_scaled', 'salary_scaled']] = scaler.fit_transform(military_data_transformed[['years_of_service', 'salary']])

print("\nНабір даних після масштабування:")
display(military_data_transformed)

# 5. Декомпозиція набору даних: Поділ на навчальну та тестову вибірки
train_set, test_set = train_test_split(military_data_transformed, test_size=0.2, random_state=42)

print("\nНавчальна вибірка:")
display(train_set)

print("\nТестова вибірка:")
display(test_set)

# 6. Аналіз кореляції ознак
correlation_matrix = military_data_transformed.corr()

print("\nМатриця кореляції:")
display(correlation_matrix)

# 7. Агрегація даних за кількістю років служби
grouped_data = military_data_transformed.groupby('years_of_service').agg({'salary': ['mean', 'count']})

print("\nАгрегація даних за кількістю років служби:")
display(grouped_data)


Початковий тестовий набір даних на військову тематику:


Unnamed: 0,age,years_of_service,missions_completed,salary,rank
0,31,7,20,28315,Sergeant
1,52,22,3,67738,Lieutenant
2,26,9,0,60427,Private
3,39,23,23,79789,Sergeant
4,52,22,8,25967,Lieutenant
...,...,...,...,...,...
95,35,24,26,29801,Captain
96,24,24,11,60234,Colonel
97,48,9,5,38969,Colonel
98,55,17,11,58153,Sergeant



Набір даних після заповнення пропусків середніми значеннями:


Unnamed: 0,age,years_of_service,missions_completed,salary,rank
0,31,7,20.000000,28315,Sergeant
1,52,22,3.000000,67738,Lieutenant
2,26,9,0.000000,60427,Private
3,39,23,23.000000,79789,Sergeant
4,52,22,13.189474,25967,Lieutenant
...,...,...,...,...,...
95,35,24,26.000000,29801,Captain
96,24,24,11.000000,60234,Colonel
97,48,9,5.000000,38969,Colonel
98,55,17,11.000000,58153,Sergeant



Набір даних після нормалізації:


Unnamed: 0,age,years_of_service,missions_completed,salary,rank,salary_norm,missions_completed_norm
0,31,7,20.000000,28315,Sergeant,-1.411568,7.991274e-01
1,52,22,3.000000,67738,Lieutenant,1.107741,-1.195603e+00
2,26,9,0.000000,60427,Private,0.640534,-1.547615e+00
3,39,23,23.000000,79789,Sergeant,1.877854,1.151139e+00
4,52,22,13.189474,25967,Lieutenant,-1.561616,2.084326e-16
...,...,...,...,...,...,...,...
95,35,24,26.000000,29801,Captain,-1.316606,1.503150e+00
96,24,24,11.000000,60234,Colonel,0.628201,-2.569065e-01
97,48,9,5.000000,38969,Colonel,-0.730729,-9.609291e-01
98,55,17,11.000000,58153,Sergeant,0.495216,-2.569065e-01



Набір даних після перетворення категоріальних змінних (One-Hot Encoding):


Unnamed: 0,age,years_of_service,missions_completed,salary,salary_norm,missions_completed_norm,rank_Captain,rank_Colonel,rank_Lieutenant,rank_Major,rank_Private,rank_Sergeant
0,31,7,20.000000,28315,-1.411568,7.991274e-01,False,False,False,False,False,True
1,52,22,3.000000,67738,1.107741,-1.195603e+00,False,False,True,False,False,False
2,26,9,0.000000,60427,0.640534,-1.547615e+00,False,False,False,False,True,False
3,39,23,23.000000,79789,1.877854,1.151139e+00,False,False,False,False,False,True
4,52,22,13.189474,25967,-1.561616,2.084326e-16,False,False,True,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...
95,35,24,26.000000,29801,-1.316606,1.503150e+00,True,False,False,False,False,False
96,24,24,11.000000,60234,0.628201,-2.569065e-01,False,True,False,False,False,False
97,48,9,5.000000,38969,-0.730729,-9.609291e-01,False,True,False,False,False,False
98,55,17,11.000000,58153,0.495216,-2.569065e-01,False,False,False,False,False,True



Набір даних після масштабування:


Unnamed: 0,age,years_of_service,missions_completed,salary,salary_norm,missions_completed_norm,rank_Captain,rank_Colonel,rank_Lieutenant,rank_Major,rank_Private,rank_Sergeant,years_of_service_scaled,salary_scaled
0,31,7,20.000000,28315,-1.411568,7.991274e-01,False,False,False,False,False,True,0.250000,0.058081
1,52,22,3.000000,67738,1.107741,-1.195603e+00,False,False,True,False,False,False,0.875000,0.779480
2,26,9,0.000000,60427,0.640534,-1.547615e+00,False,False,False,False,True,False,0.333333,0.645696
3,39,23,23.000000,79789,1.877854,1.151139e+00,False,False,False,False,False,True,0.916667,1.000000
4,52,22,13.189474,25967,-1.561616,2.084326e-16,False,False,True,False,False,False,0.875000,0.015115
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,35,24,26.000000,29801,-1.316606,1.503150e+00,True,False,False,False,False,False,0.958333,0.085273
96,24,24,11.000000,60234,0.628201,-2.569065e-01,False,True,False,False,False,False,0.958333,0.642164
97,48,9,5.000000,38969,-0.730729,-9.609291e-01,False,True,False,False,False,False,0.333333,0.253038
98,55,17,11.000000,58153,0.495216,-2.569065e-01,False,False,False,False,False,True,0.666667,0.604084



Навчальна вибірка:


Unnamed: 0,age,years_of_service,missions_completed,salary,salary_norm,missions_completed_norm,rank_Captain,rank_Colonel,rank_Lieutenant,rank_Major,rank_Private,rank_Sergeant,years_of_service_scaled,salary_scaled
55,34,12,11.0,63996,0.868610,-0.256906,False,True,False,False,False,False,0.458333,0.711005
88,54,11,7.0,74274,1.525421,-0.726255,False,False,False,False,False,True,0.416667,0.899081
26,28,8,6.0,72874,1.435954,-0.843592,False,False,False,False,False,True,0.291667,0.873463
42,48,5,22.0,45552,-0.310046,1.033802,False,True,False,False,False,False,0.166667,0.373499
69,50,20,17.0,68081,1.129660,0.447116,True,False,False,False,False,False,0.791667,0.785756
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
60,52,3,25.0,32856,-1.121378,1.385813,False,False,False,False,True,False,0.083333,0.141176
71,32,13,26.0,62042,0.743740,1.503150,False,False,False,False,True,False,0.500000,0.675249
14,21,15,2.0,28842,-1.377891,-1.312940,False,False,False,False,True,False,0.583333,0.067724
92,31,6,8.0,65708,0.978014,-0.608918,False,False,False,False,False,True,0.208333,0.742333



Тестова вибірка:


Unnamed: 0,age,years_of_service,missions_completed,salary,salary_norm,missions_completed_norm,rank_Captain,rank_Colonel,rank_Lieutenant,rank_Major,rank_Private,rank_Sergeant,years_of_service_scaled,salary_scaled
83,41,18,7.0,69019,1.189602,-0.7262549,True,False,False,False,False,False,0.708333,0.802921
53,24,19,12.0,37178,-0.845182,-0.1395694,True,False,False,False,False,False,0.75,0.220264
70,46,3,9.0,63251,0.821001,-0.4915807,False,False,False,False,False,True,0.083333,0.697372
45,34,8,8.0,63284,0.82311,-0.6089178,False,False,False,False,True,False,0.291667,0.697976
44,35,5,10.0,28286,-1.413422,-0.3742436,False,False,True,False,False,False,0.166667,0.05755
39,43,13,24.0,60533,0.647308,1.268476,False,False,True,False,False,False,0.5,0.647636
22,25,11,16.0,54819,0.282158,0.329779,False,False,False,False,True,False,0.416667,0.543076
80,26,24,2.0,42407,-0.511025,-1.31294,False,False,False,False,True,False,0.958333,0.315949
10,53,18,21.0,56826,0.410414,0.9164645,False,False,True,False,False,False,0.708333,0.579802
0,31,7,20.0,28315,-1.411568,0.7991274,False,False,False,False,False,True,0.25,0.058081



Матриця кореляції:


Unnamed: 0,age,years_of_service,missions_completed,salary,salary_norm,missions_completed_norm,rank_Captain,rank_Colonel,rank_Lieutenant,rank_Major,rank_Private,rank_Sergeant,years_of_service_scaled,salary_scaled
age,1.0,-0.158676,0.109302,0.009517,0.009517,0.109302,-0.064462,-0.007916,0.06526,0.034201,-0.072535,0.025879,-0.158676,0.009517
years_of_service,-0.158676,1.0,-0.087853,-0.087003,-0.087003,-0.087853,0.083945,0.0246,0.063687,-0.014597,0.051565,-0.184797,1.0,-0.087003
missions_completed,0.109302,-0.087853,1.0,-0.057651,-0.057651,1.0,-0.039993,-0.070943,0.00061,0.229533,-0.019531,-0.083201,-0.087853,-0.057651
salary,0.009517,-0.087003,-0.057651,1.0,1.0,-0.057651,-0.116785,-0.051635,-0.161075,0.089621,-0.023754,0.242778,-0.087003,1.0
salary_norm,0.009517,-0.087003,-0.057651,1.0,1.0,-0.057651,-0.116785,-0.051635,-0.161075,0.089621,-0.023754,0.242778,-0.087003,1.0
missions_completed_norm,0.109302,-0.087853,1.0,-0.057651,-0.057651,1.0,-0.039993,-0.070943,0.00061,0.229533,-0.019531,-0.083201,-0.087853,-0.057651
rank_Captain,-0.064462,0.083945,-0.039993,-0.116785,-0.116785,-0.039993,1.0,-0.152312,-0.162142,-0.126886,-0.142327,-0.157243,0.083945,-0.116785
rank_Colonel,-0.007916,0.0246,-0.070943,-0.051635,-0.051635,-0.070943,-0.152312,1.0,-0.249707,-0.195411,-0.219189,-0.242161,0.0246,-0.051635
rank_Lieutenant,0.06526,0.063687,0.00061,-0.161075,-0.161075,0.00061,-0.162142,-0.249707,1.0,-0.208023,-0.233336,-0.25779,0.063687,-0.161075
rank_Major,0.034201,-0.014597,0.229533,0.089621,0.089621,0.229533,-0.126886,-0.195411,-0.208023,1.0,-0.1826,-0.201737,-0.014597,0.089621



Агрегація даних за кількістю років служби:


Unnamed: 0_level_0,salary,salary
Unnamed: 0_level_1,mean,count
years_of_service,Unnamed: 1_level_2,Unnamed: 2_level_2
1,38571.0,2
2,62708.0,3
3,56846.5,4
4,47877.333333,3
5,51762.5,4
6,58553.75,4
7,30763.666667,3
8,56200.6,5
9,49865.875,8
10,25610.0,1
