In [1]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Загрузка данных
df = pd.read_csv('D:/Valerian/Documents/OneDrive/Python/ДопОбр Анализ данных/Практика Нетодология/sales_prediction/Команда_11/train.csv', parse_dates=['Date'], low_memory=False)

# Преобразование даты в datetime
df['Date'] = pd.to_datetime(df['Date'])

# Обработка пропусков и ненужных значений
df = df.fillna(0)  # Или другой метод обработки пропусков

# Добавление дополнительных признаков (например, день недели, месяц)
df['DayOfWeek'] = df['Date'].dt.dayofweek
df['Month'] = df['Date'].dt.month

# Преобразование категориальных данных в числовые
df = pd.get_dummies(df, columns=['StateHoliday'])

# Сортировка данных по дате
df = df.sort_values(by='Date')

# Удаление ненужных столбцов
df = df[['Store', 'Sales', 'Customers', 'Open', 'Promo', 'StateHoliday_a', 'StateHoliday_b', 'StateHoliday_c', 'SchoolHoliday', 'DayOfWeek', 'Month']]

# Нормализация признаков
scaler = StandardScaler()
df[['Sales', 'Customers', 'Open', 'Promo', 'StateHoliday_a', 'StateHoliday_b', 'StateHoliday_c', 'SchoolHoliday', 'DayOfWeek', 'Month']] = scaler.fit_transform(df[['Sales', 'Customers', 'Open', 'Promo', 'StateHoliday_a', 'StateHoliday_b', 'StateHoliday_c', 'SchoolHoliday', 'DayOfWeek', 'Month']])

# Разделение данных на обучающую и тестовую выборки
train_size = int(len(df) * 0.8)
train_df = df[:train_size]
test_df = df[train_size:]

def create_sequences(data, seq_length):
    sequences = []
    for i in range(len(data) - seq_length):
        seq = data[i:i + seq_length]
        sequences.append(seq)
    return np.array(sequences)

seq_length = 30  # Длина окна

# Преобразование данных в массивы для CNN
train_sequences = create_sequences(train_df.values, seq_length)
test_sequences = create_sequences(test_df.values, seq_length)

X_train = train_sequences[:, :-1]
y_train = train_sequences[:, -1, 1]  # Sales is the second column
X_test = test_sequences[:, :-1]
y_test = test_sequences[:, -1, 1]
print(X_train)
print("-------------------------")
print(y_train)
# Преобразование типов
X_train = X_train.astype('float32')
y_train = y_train.astype('float32')
X_test = X_test.astype('float32')
y_test = y_test.astype('float32')

# Создание модели
model = Sequential()
model.add(Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(MaxPooling1D(pool_size=2))
model.add(Flatten())
model.add(Dense(50, activation='relu'))
model.add(Dense(1))  # Предсказание объема продаж

model.compile(optimizer='adam', loss='mean_squared_error')

# Обучение модели
history = model.fit(X_train, y_train, epochs=20, validation_data=(X_test, y_test))

# Прогнозирование на тестовых данных
y_pred = model.predict(X_test)

# Вычисление метрик
mae_cnn = mean_absolute_error(y_test, y_pred)
mse_cnn = mean_squared_error(y_test, y_pred)
rmse_cnn = np.sqrt(mse_cnn)
r2_cnn = r2_score(y_test, y_pred)

# Вывод значений метрик
print(f"Среднее абсолютное отклонение (MAE) на тестовых данных (CNN): {mae_cnn}")
print(f"Корень средней квадратичной ошибки (RMSE) на тестовых данных (CNN): {rmse_cnn}")
print(f"Среднеквадратичная ошибка (MSE) на тестовых данных (CNN): {mse_cnn}")
print(f"Коэффициент детерминации (R2) на тестовых данных (CNN): {r2_cnn}")


[[[ 1.11500000e+03 -1.49972273e+00 -1.36332959e+00 ...  2.14421115e+00
   -1.00047591e+00 -1.45719295e+00]
  [ 3.79000000e+02 -1.49972273e+00 -1.36332959e+00 ...  2.14421115e+00
   -1.00047591e+00 -1.45719295e+00]
  [ 3.78000000e+02 -1.49972273e+00 -1.36332959e+00 ...  2.14421115e+00
   -1.00047591e+00 -1.45719295e+00]
  ...
  [ 3.54000000e+02 -1.49972273e+00 -1.36332959e+00 ...  2.14421115e+00
   -1.00047591e+00 -1.45719295e+00]
  [ 3.53000000e+02 -6.84382024e-01  4.02345875e-01 ...  2.14421115e+00
   -1.00047591e+00 -1.45719295e+00]
  [ 3.52000000e+02 -1.49972273e+00 -1.36332959e+00 ...  2.14421115e+00
   -1.00047591e+00 -1.45719295e+00]]

 [[ 3.79000000e+02 -1.49972273e+00 -1.36332959e+00 ...  2.14421115e+00
   -1.00047591e+00 -1.45719295e+00]
  [ 3.78000000e+02 -1.49972273e+00 -1.36332959e+00 ...  2.14421115e+00
   -1.00047591e+00 -1.45719295e+00]
  [ 3.77000000e+02 -1.49972273e+00 -1.36332959e+00 ...  2.14421115e+00
   -1.00047591e+00 -1.45719295e+00]
  ...
  [ 3.53000000e+02 -6.8

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/20
[1m25430/25430[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m77s[0m 3ms/step - loss: 18.5086 - val_loss: 0.4536
Epoch 2/20
[1m25430/25430[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m72s[0m 3ms/step - loss: 0.4613 - val_loss: 0.4465
Epoch 3/20
[1m25430/25430[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m75s[0m 3ms/step - loss: 0.4364 - val_loss: 0.4164
Epoch 4/20
[1m21328/25430[0m [32m━━━━━━━━━━━━━━━━[0m[37m━━━━[0m [1m9s[0m 2ms/step - loss: 0.4175 

KeyboardInterrupt: 

In [None]:
import matplotlib.pyplot as plt

# График потерь
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Training and Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()


plt.plot(history.history['mse'], label='Training MSE')
plt.plot(history.history['val_mae'], label='Validation MSE')
plt.title('Training and Validation MSE')
plt.xlabel('Epochs')
plt.ylabel('MSE')
plt.legend()
plt.show()


Обучение модели на данных без столбца Customers

In [2]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Загрузка данных
df = pd.read_csv('D:/Valerian/Documents/OneDrive/Python/ДопОбр Анализ данных/Практика Нетодология/sales_prediction/Команда_11/train.csv', parse_dates=['Date'], low_memory=False)

# Удаление столбца 'Customers'
df = df.drop(columns=['Customers'])

# Преобразование даты в datetime
df['Date'] = pd.to_datetime(df['Date'])

# Обработка пропусков и ненужных значений
df = df.fillna(0)  # Или другой метод обработки пропусков

# Добавление дополнительных признаков (например, день недели, месяц)
df['DayOfWeek'] = df['Date'].dt.dayofweek
df['Month'] = df['Date'].dt.month


# Преобразование категориальных данных в числовые
df = pd.get_dummies(df, columns=['StateHoliday'])

# Сортировка данных по дате
df = df.sort_values(by='Date')

# Удаление ненужных столбцов
df = df[['Store', 'Sales' , 'Open', 'Promo', 'StateHoliday_a', 'StateHoliday_b', 'StateHoliday_c', 'SchoolHoliday', 'DayOfWeek', 'Month']]

# Нормализация признаков
scaler = StandardScaler()
df[['Sales' , 'Open', 'Promo', 'StateHoliday_a', 'StateHoliday_b', 'StateHoliday_c', 'SchoolHoliday', 'DayOfWeek', 'Month']] = scaler.fit_transform(df[['Sales', 'Open', 'Promo', 'StateHoliday_a', 'StateHoliday_b', 'StateHoliday_c', 'SchoolHoliday', 'DayOfWeek', 'Month']])

# Разделение данных на обучающую и тестовую выборки
train_size = int(len(df) * 0.8)
train_df = df[:train_size]
test_df = df[train_size:]

def create_sequences(data, seq_length):
    sequences = []
    for i in range(len(data) - seq_length):
        seq = data[i:i + seq_length]
        sequences.append(seq)
    return np.array(sequences)

seq_length = 30  # Длина окна

# Преобразование данных в массивы для CNN
train_sequences = create_sequences(train_df.values, seq_length)
test_sequences = create_sequences(test_df.values, seq_length)

X_train = train_sequences[:, :-1]
y_train = train_sequences[:, -1, 1]  # Sales is the second column
X_test = test_sequences[:, :-1]
y_test = test_sequences[:, -1, 1]

# Преобразование типов
X_train = X_train.astype('float32')
y_train = y_train.astype('float32')
X_test = X_test.astype('float32')
y_test = y_test.astype('float32')
print(X_train)
print("-------------------------")
print(y_train)
# Создание модели
model = Sequential()
model.add(Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(MaxPooling1D(pool_size=2))
model.add(Flatten())
model.add(Dense(50, activation='relu'))
model.add(Dense(1))  # Предсказание объема продаж

model.compile(optimizer='adam', loss='mean_squared_error')

# Обучение модели
history = model.fit(X_train, y_train, epochs=7, validation_data=(X_test, y_test))

# Прогнозирование на тестовых данных
y_pred = model.predict(X_test)

# Вычисление метрик

mae_cnn = mean_absolute_error(y_test, y_pred)
mse_cnn = mean_squared_error(y_test, y_pred)
rmse_cnn = np.sqrt(mse_cnn)
r2_cnn = r2_score(y_test, y_pred)

# Вывод значений метрик
print(f"Среднее абсолютное отклонение (MAE) на тестовых данных (CNN): {mae_cnn}")
print(f"Корень средней квадратичной ошибки (RMSE) на тестовых данных (CNN): {rmse_cnn}")
print(f"Среднеквадратичная ошибка (MSE) на тестовых данных (CNN): {mse_cnn}")
print(f"Коэффициент детерминации (R2) на тестовых данных (CNN): {r2_cnn}")


[[[ 1.1150000e+03 -1.4997227e+00 -2.2104404e+00 ...  2.1442111e+00
   -1.0004759e+00 -1.4571929e+00]
  [ 3.7900000e+02 -1.4997227e+00 -2.2104404e+00 ...  2.1442111e+00
   -1.0004759e+00 -1.4571929e+00]
  [ 3.7800000e+02 -1.4997227e+00 -2.2104404e+00 ...  2.1442111e+00
   -1.0004759e+00 -1.4571929e+00]
  ...
  [ 3.5400000e+02 -1.4997227e+00 -2.2104404e+00 ...  2.1442111e+00
   -1.0004759e+00 -1.4571929e+00]
  [ 3.5300000e+02 -6.8438202e-01  4.5239851e-01 ...  2.1442111e+00
   -1.0004759e+00 -1.4571929e+00]
  [ 3.5200000e+02 -1.4997227e+00 -2.2104404e+00 ...  2.1442111e+00
   -1.0004759e+00 -1.4571929e+00]]

 [[ 3.7900000e+02 -1.4997227e+00 -2.2104404e+00 ...  2.1442111e+00
   -1.0004759e+00 -1.4571929e+00]
  [ 3.7800000e+02 -1.4997227e+00 -2.2104404e+00 ...  2.1442111e+00
   -1.0004759e+00 -1.4571929e+00]
  [ 3.7700000e+02 -1.4997227e+00 -2.2104404e+00 ...  2.1442111e+00
   -1.0004759e+00 -1.4571929e+00]
  ...
  [ 3.5300000e+02 -6.8438202e-01  4.5239851e-01 ...  2.1442111e+00
   -1.0004

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/7
[1m25430/25430[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m87s[0m 3ms/step - loss: 26.7976 - val_loss: 0.4862
Epoch 2/7
[1m25430/25430[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m87s[0m 3ms/step - loss: 0.4587 - val_loss: 0.4515
Epoch 3/7
[1m25430/25430[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m80s[0m 3ms/step - loss: 0.4411 - val_loss: 0.4165
Epoch 4/7
[1m25430/25430[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m79s[0m 3ms/step - loss: 0.4265 - val_loss: 0.4073
Epoch 5/7
[1m25430/25430[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m84s[0m 3ms/step - loss: 0.4142 - val_loss: 0.3964
Epoch 6/7
[1m25430/25430[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m86s[0m 3ms/step - loss: 0.4075 - val_loss: 0.3888
Epoch 7/7
[1m25430/25430[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m87s[0m 3ms/step - loss: 0.3996 - val_loss: 0.3830
[1m6357/6357[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 2ms/step
Среднее абсолютное отклонение (MAE) на тестовых данны

In [None]:
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras import backend as K

# Определение пользовательской метрики для RMSE
def rmse(y_true, y_pred):
    return K.sqrt(K.mean(K.square(y_pred - y_true), axis=-1))

# Создание модели
model = Sequential()
model.add(Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(MaxPooling1D(pool_size=2))
model.add(Flatten())
model.add(Dense(50, activation='relu'))
model.add(Dense(1))  # Предсказание объема продаж

# Компиляция модели с использованием RMSE в качестве метрики
model.compile(optimizer='adam', loss='mean_squared_error', metrics=[rmse])

# Обучение модели
history = model.fit(X_train, y_train, epochs=7, validation_data=(X_test, y_test))

# Визуализация потерь
plt.figure(figsize=(14, 6))

# Потери на обучении и валидации
plt.subplot(1, 2, 1)
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Training and Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

# RMSE на обучении и валидации
plt.subplot(1, 2, 2)
plt.plot(history.history['rmse'], label='Training RMSE')
plt.plot(history.history['val_rmse'], label='Validation RMSE')
plt.title('Training and Validation RMSE')
plt.xlabel('Epochs')
plt.ylabel('RMSE')
plt.legend()

plt.tight_layout()
plt.show()
