In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from sklearn.preprocessing import OneHotEncoder


# Загрузка и предварительная обработка данных
data = pd.read_csv('basketball.csv')


# Вычисление общего счета игры
data['totalScore'] = (
    data['firstQuarterAwayScore'] + data['secondQuarterAwayScore'] + 
    data['thirdQuarterAwayScore'] + data['fourthQuarterAwayScore'] + 
    data['firstQuarterHomeScore'] + data['secondQuarterHomeScore'] + 
    data['thirdQuarterHomeScore'] + data['fourthQuarterHomeScore']
)

# Вычисление среднего счета для каждой команды
average_score_away = data.groupby('awayTeam')['awayScore'].mean()
average_score_home = data.groupby('homeTeam')['homeScore'].mean()

data['averageScoreAway'] = data['awayTeam'].map(average_score_away)
data['averageScoreHome'] = data['homeTeam'].map(average_score_home)

# Кодирование названий команд
encoder = OneHotEncoder(sparse=False)
team_names = data[['awayTeam', 'homeTeam']]
encoded_teams = encoder.fit_transform(team_names)

# Добавление закодированных данных обратно в исходный DataFrame
encoded_team_names = pd.DataFrame(encoded_teams, columns=encoder.get_feature_names_out(team_names.columns))
data_encoded = pd.concat([data.reset_index(drop=True), encoded_team_names], axis=1)

# Подготовка данных для обучения модели
features = data_encoded[encoded_team_names.columns.tolist() + ['averageScoreAway', 'averageScoreHome']]
target = data_encoded['totalScore']

# Разделение на обучающий и тестовый наборы
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

# Масштабирование данных
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Создание модели нейронной сети
model_nn = Sequential()
model_nn.add(Dense(64, input_dim=X_train_scaled.shape[1], activation='relu'))
model_nn.add(Dense(32, activation='relu'))
model_nn.add(Dense(1))  # Один выходной нейрон без функции активации для регрессии

# Компиляция модели
model_nn.compile(optimizer=Adam(learning_rate=0.001), loss='mean_squared_error')

# Обучение модели
model_nn.fit(X_train_scaled, y_train, epochs=100, batch_size=10, verbose=1)

# Оценка модели
loss = model.evaluate(X_test_scaled, y_test)
print(f"Loss (Mean Squared Error): {loss}")

# Прогнозирование с помощью модели
y_pred = model_nn.predict(X_test_scaled)


In [None]:
y_pred

In [None]:
# Данные для прогнозирования
teams_for_prediction = pd.DataFrame({
    'awayTeam': ['Phoenix Suns (Hayley)'],
    'homeTeam': ['Dallas Mavericks (Maisie)']
})


# Преобразование названий команд в one-hot encoding
encoded_teams_for_prediction = encoder.transform(teams_for_prediction[['awayTeam', 'homeTeam']])

# Создание DataFrame для one-hot encoded данных
encoded_teams_for_prediction_df = pd.DataFrame(encoded_teams_for_prediction, columns=encoder.get_feature_names_out())

# Получение средних значений для команд из teams_for_prediction
average_away = average_score_away.loc[teams_for_prediction['awayTeam'][0]]
average_home = average_score_home.loc[teams_for_prediction['homeTeam'][0]]

# Убедитесь, что признаки в final_prediction_data совпадают с features
final_prediction_data = pd.concat([encoded_teams_for_prediction_df, pd.DataFrame({'averageScoreAway': [average_away], 'averageScoreHome': [average_home]})], axis=1)[features.columns]

# Прогнозирование с использованием модели
final_prediction_data_scaled = scaler.transform(final_prediction_data)
predicted_score = model_nn.predict(final_prediction_data_scaled)
print(f"Прогнозируемый общий счет игры: {predicted_score[0]}")


In [None]:
final_prediction_data

In [None]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split, KFold
from sklearn.preprocessing import StandardScaler, OneHotEncoder

# Загрузка и предварительная обработка данных
data = pd.read_csv('basketball.csv')

# Вычисление общего счета игры
data['totalScore'] = (
    data['firstQuarterAwayScore'] + data['secondQuarterAwayScore'] + 
    data['thirdQuarterAwayScore'] + data['fourthQuarterAwayScore'] + 
    data['firstQuarterHomeScore'] + data['secondQuarterHomeScore'] + 
    data['thirdQuarterHomeScore'] + data['fourthQuarterHomeScore']
)

# Вычисление среднего счета для каждой команды
average_score_away = data.groupby('awayTeam')['awayScore'].mean()
average_score_home = data.groupby('homeTeam')['homeScore'].mean()

data['averageScoreAway'] = data['awayTeam'].map(average_score_away)
data['averageScoreHome'] = data['homeTeam'].map(average_score_home)

# Кодирование названий команд
encoder = OneHotEncoder(sparse=False)
team_names = data[['awayTeam', 'homeTeam']]
encoded_teams = encoder.fit_transform(team_names)

# Добавление закодированных данных обратно в исходный DataFrame
encoded_team_names = pd.DataFrame(encoded_teams, columns=encoder.get_feature_names_out(team_names.columns))
data_encoded = pd.concat([data.reset_index(drop=True), encoded_team_names], axis=1)

# Подготовка данных для обучения модели
features = data_encoded[encoded_team_names.columns.tolist() + ['averageScoreAway', 'averageScoreHome']]
target = data_encoded['totalScore']

# Разделение на обучающий и тестовый наборы
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

# Масштабирование данных
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Преобразование данных в тензоры PyTorch
X_train_torch = torch.tensor(X_train_scaled.astype(np.float32))
y_train_torch = torch.tensor(y_train.values.astype(np.float32))
X_test_torch = torch.tensor(X_test_scaled.astype(np.float32))
y_test_torch = torch.tensor(y_test.values.astype(np.float32))

# Определение модели
class RegressionModel(nn.Module):
    def __init__(self, input_size):
        super(RegressionModel, self).__init__()
        self.fc1 = nn.Linear(input_size, 64)
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, 1)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x

# Функции для обучения и оценки модели
def train_model(model, criterion, optimizer, X_train, y_train, epochs=1000):
    model.train()
    for epoch in range(epochs):
        optimizer.zero_grad()
        outputs = model(X_train)
        loss = criterion(outputs.squeeze(), y_train)
        loss.backward()
        optimizer.step()

def evaluate_model(model, criterion, X_test, y_test):
    model.eval()
    with torch.no_grad():
        y_pred = model(X_test)
        loss = criterion(y_pred.squeeze(), y_test)
    return loss.item()

# Кросс-валидация
kf = KFold(n_splits=5, shuffle=True, random_state=42)
fold_results = []

for train_index, test_index in kf.split(X_train_scaled):
    # Разделение данных
    X_train_fold = torch.tensor(X_train_scaled[train_index].astype(np.float32))
    y_train_fold = torch.tensor(y_train.values[train_index].astype(np.float32))
    X_test_fold = torch.tensor(X_train_scaled[test_index].astype(np.float32))
    y_test_fold = torch.tensor(y_train.values[test_index].astype(np.float32))
    
    # Инициализация модели и оптимизатора
    model = RegressionModel(X_train_fold.shape[1])
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    criterion = nn.MSELoss()

    # Обучение модели
    train_model(model, criterion, optimizer, X_train_fold, y_train_fold, epochs=1000)

    # Оценка модели
    loss = evaluate_model(model, criterion, X_test_fold, y_test_fold)
    fold_results.append(loss)

# Вывод результатов кросс-валидации
print(f"Средняя ошибка (Mean Squared Error) на кросс-валидации: {np.mean(fold_results)}, Стандартное отклонение: {np.std(fold_results)}")

# Инициализация и обучение финальной модели
model_final = RegressionModel(X_train_torch.shape[1])
optimizer_final = optim.Adam(model_final.parameters(), lr=0.001)
train_model(model_final, criterion, optimizer_final, X_train_torch, y_train_torch, epochs=1000)

# Оценка финальной модели
final_loss = evaluate_model(model_final, criterion, X_test_torch, y_test_torch)
print(f"Финальная ошибка (Mean Squared Error) на тестовом наборе: {final_loss}")


In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder

# Загрузка и предварительная обработка данных (используем тот же процесс)
data = pd.read_csv('basketball.csv')

# Вычисление общего счета игры
data['totalScore'] = (
    data['firstQuarterAwayScore'] + data['secondQuarterAwayScore'] + 
    data['thirdQuarterAwayScore'] + data['fourthQuarterAwayScore'] + 
    data['firstQuarterHomeScore'] + data['secondQuarterHomeScore'] + 
    data['thirdQuarterHomeScore'] + data['fourthQuarterHomeScore']
)

# Вычисление среднего счета для каждой команды
average_score_away = data.groupby('awayTeam')['awayScore'].mean()
average_score_home = data.groupby('homeTeam')['homeScore'].mean()

data['averageScoreAway'] = data['awayTeam'].map(average_score_away)
data['averageScoreHome'] = data['homeTeam'].map(average_score_home)

# Кодирование названий команд
encoder = OneHotEncoder(sparse=False)
team_names = data[['awayTeam', 'homeTeam']]
encoded_teams = encoder.fit_transform(team_names)

# Добавление закодированных данных обратно в исходный DataFrame
encoded_team_names = pd.DataFrame(encoded_teams, columns=encoder.get_feature_names_out(team_names.columns))
data_encoded = pd.concat([data.reset_index(drop=True), encoded_team_names], axis=1)

# Подготовка данных для обучения модели
features = data_encoded[encoded_team_names.columns.tolist() + ['averageScoreAway', 'averageScoreHome']]
target = data_encoded['totalScore']

# Разделение на обучающий и тестовый наборы
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

# Масштабирование данных
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Преобразование данных в тензоры PyTorch
X_train_torch = torch.tensor(X_train_scaled.astype(np.float32))
y_train_torch = torch.tensor(y_train.values.astype(np.float32))
X_test_torch = torch.tensor(X_test_scaled.astype(np.float32))
y_test_torch = torch.tensor(y_test.values.astype(np.float32))

# Определение модели
class RegressionModel(nn.Module):
    def __init__(self, input_size):
        super(RegressionModel, self).__init__()
        self.fc1 = nn.Linear(input_size, 64)
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, 1)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x

# Инициализация модели
model = RegressionModel(X_train_torch.shape[1])

# Определение функции потерь и оптимизатора
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Обучение модели
for epoch in range(3000):
    optimizer.zero_grad()
    outputs = model(X_train_torch)
    loss = criterion(outputs.squeeze(), y_train_torch)
    loss.backward()
    optimizer.step()

    if epoch % 10 == 0:
        print(f'Epoch {epoch+1}, Loss: {loss.item()}')

# Оценка модели
model.eval()
with torch.no_grad():
    y_pred = model(X_test_torch)
    test_loss = criterion(y_pred.squeeze(), y_test_torch)
print(f"Test Loss (Mean Squared Error): {test_loss.item()}")



Epoch 1, Loss: 32188.388671875
Epoch 11, Loss: 32069.58984375
Epoch 21, Loss: 31887.34765625
Epoch 31, Loss: 31527.5078125
Epoch 41, Loss: 30848.169921875
Epoch 51, Loss: 29674.916015625
Epoch 61, Loss: 27834.939453125
Epoch 71, Loss: 25206.744140625
Epoch 81, Loss: 21775.1015625
Epoch 91, Loss: 17656.408203125
Epoch 101, Loss: 13110.72265625
Epoch 111, Loss: 8563.310546875
Epoch 121, Loss: 4638.0458984375
Epoch 131, Loss: 2004.0654296875
Epoch 141, Loss: 860.38916015625
Epoch 151, Loss: 628.3214111328125
Epoch 161, Loss: 587.9473876953125
Epoch 171, Loss: 534.0349731445312
Epoch 181, Loss: 499.8909912109375
Epoch 191, Loss: 483.87530517578125
Epoch 201, Loss: 471.9736022949219
Epoch 211, Loss: 461.5197448730469
Epoch 221, Loss: 452.7330017089844
Epoch 231, Loss: 445.09136962890625
Epoch 241, Loss: 438.1982116699219
Epoch 251, Loss: 431.9093017578125
Epoch 261, Loss: 426.14202880859375
Epoch 271, Loss: 420.8433532714844
Epoch 281, Loss: 415.92755126953125
Epoch 291, Loss: 411.348236083

Epoch 2321, Loss: 303.2418518066406
Epoch 2331, Loss: 303.1927795410156
Epoch 2341, Loss: 303.144287109375
Epoch 2351, Loss: 303.09478759765625
Epoch 2361, Loss: 303.0454406738281
Epoch 2371, Loss: 302.996826171875
Epoch 2381, Loss: 302.9488830566406
Epoch 2391, Loss: 302.90185546875
Epoch 2401, Loss: 302.85516357421875
Epoch 2411, Loss: 302.8089599609375
Epoch 2421, Loss: 302.76409912109375
Epoch 2431, Loss: 302.7196044921875
Epoch 2441, Loss: 302.6757507324219
Epoch 2451, Loss: 302.6328430175781
Epoch 2461, Loss: 302.5906066894531
Epoch 2471, Loss: 302.54888916015625
Epoch 2481, Loss: 302.5074462890625
Epoch 2491, Loss: 302.4664001464844
Epoch 2501, Loss: 302.4254455566406
Epoch 2511, Loss: 302.38482666015625
Epoch 2521, Loss: 302.3443908691406
Epoch 2531, Loss: 302.3042907714844
Epoch 2541, Loss: 302.2644958496094
Epoch 2551, Loss: 302.2244567871094
Epoch 2561, Loss: 302.18450927734375
Epoch 2571, Loss: 302.1452331542969
Epoch 2581, Loss: 302.1070251464844
Epoch 2591, Loss: 302.0702

In [3]:
# Преобразование данных для прогнозирования в формат, совместимый с моделью
teams_for_prediction = pd.DataFrame({
    'awayTeam': ['Cleveland Cavaliers (Maisie)'],
    'homeTeam': ['Los Angeles Lakers (Lucy)']
})

# Преобразование названий команд в one-hot encoding
encoded_teams_for_prediction = encoder.transform(teams_for_prediction[['awayTeam', 'homeTeam']])

# Создание DataFrame для one-hot encoded данных
encoded_teams_for_prediction_df = pd.DataFrame(encoded_teams_for_prediction, columns=encoder.get_feature_names_out())

# Получение средних значений для команд из teams_for_prediction
average_away = average_score_away.loc[teams_for_prediction['awayTeam'][0]]
average_home = average_score_home.loc[teams_for_prediction['homeTeam'][0]]

# Убедитесь, что признаки в final_prediction_data совпадают с features
final_prediction_data = pd.concat([encoded_teams_for_prediction_df, pd.DataFrame({'averageScoreAway': [average_away], 'averageScoreHome': [average_home]})], axis=1)[features.columns]

# Масштабирование данных для прогнозирования
final_prediction_data_scaled = scaler.transform(final_prediction_data)

# Преобразование данных в тензоры PyTorch
final_prediction_data_torch = torch.tensor(final_prediction_data_scaled.astype(np.float32))

# Предсказание с использованием модели PyTorch
model.eval()
with torch.no_grad():
    predicted_score_torch = model(final_prediction_data_torch)
    predicted_score = predicted_score_torch.item()

print(f"Прогнозируемый общий счет игры: {predicted_score}")


Прогнозируемый общий счет игры: 186.00289916992188
