In [141]:
import json
import os
import pandas as pd
from datetime import datetime
from keras.models import load_model
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
import joblib

In [142]:
directory_path = './device_input_logs'  # Путь к директории с JSON-файлами
model_path = './predict_device_input.h5'  # Путь к модели

In [143]:
# Считываем все логи
all_logs = []
for filename in os.listdir(directory_path):
    if filename.endswith('.json'):
        file_path = os.path.join(directory_path, filename)
        with open(file_path, 'r') as file:
            data = json.load(file)
            all_logs.extend(data['deviceLogs'])

In [144]:
# Подготовка данных
df = pd.DataFrame(all_logs)

# Преобразуем timestamp в числовой формат
df['timestamp'] = pd.to_datetime(df['timestamp'])
df['timestamp_seconds'] = df['timestamp'].astype('int64') // 10**9  # Конвертация в секунды

# Выбираем необходимые столбцы
df = df[['timestamp_seconds', 'buttonKey', 'isWorkingMode']]
prepared_data = df

In [145]:
model = None
try:
    model = joblib.load(model_path)
except OSError: 
    print("Saved model not found")
if not model:
    # Обучение модели
    # Разделение данных на признаки и целевую переменную
    X = prepared_data[['timestamp_seconds', 'buttonKey']]
    y = prepared_data['isWorkingMode']

    # Разделение данных на тренировочные и тестовые
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    # Создание и обучение модели
    model = RandomForestClassifier()
    model.fit(X_train, y_train)
    # model.save(model_path)
    joblib.dump(model, model_path)
    
    # Оценка модели
    predictions = model.predict(X_test)
    print(classification_report(y_test, predictions))
    
    

Saved model not found
              precision    recall  f1-score   support

       False       0.87      0.80      0.83     49881
        True       0.82      0.88      0.85     50119

    accuracy                           0.84    100000
   macro avg       0.84      0.84      0.84    100000
weighted avg       0.84      0.84      0.84    100000

