In [None]:
!pip install "pymongo[srv]"

In [None]:
!pip install --upgrade pip

In [None]:
!pip install beautifulsoup4 lxml


In [2]:
from pymongo import MongoClient
import requests

# Получение данных
url = "https://betwinner-232507.top/service-api/LiveFeed/Get1x2_VZip?sports=3&champs=2626462&count=20&gr=495&mode=4"
response = requests.get(url)
data = response.json()

# Подключение к MongoDB
uri = "mongodb+srv://dyominov:1212dema@cluster0.v37qbx3.mongodb.net/?retryWrites=true&w=majority"
client = MongoClient(uri)
db = client['sports_database']  # Название базы данных
collection = db['basketball']  # Название коллекции

for item in data.get("Value", []):
    match_id = item.get("I")
    home_team = item.get("O1")
    away_team = item.get("O2")
    start_time = item.get("S")
    
    # Текущий счет и время получаем из SC
    current_score = item.get("SC", {}).get("FS", {})
    minute = item.get("SC", {}).get("TS")

    # Формируем запись для добавления в историю счета
    score_update = {
        "current_minute": minute,
        "score": {
            "team1": current_score.get("S1"),
            "team2": current_score.get("S2")
        }
    }

    # Обновление документа в MongoDB
    collection.update_one(
        {"match_id": match_id},
        {
            "$set": {"home_team": home_team, "away_team": away_team, "start_time": start_time},
            "$push": {"score_timeline": score_update}  # Добавление новой записи в массив score_timeline
        },
        upsert=True
    )


In [82]:
import pandas as pd

# Извлечение данных из MongoDB
matches = list(collection.find({}))

# Подготовка данных для DataFrame
data_for_df = []
for match in matches:
    for timeline in match["score_timeline"]:
        data_for_df.append({
            "Match ID": match["match_id"],
            "Team 1": match["teams"]["team1"],
            "Team 2": match["teams"]["team2"],
            "Minute": timeline["minute"],
            "Score Team 1": timeline["score"]["team1"],
            "Score Team 2": timeline["score"]["team2"]
        })

# Создание DataFrame
df = pd.DataFrame(data_for_df)

# Экспорт DataFrame в CSV файл
csv_file_path = 'matches.csv'
df.to_csv(csv_file_path, index=False)


KeyError: 'score_timeline'

In [None]:
df

In [3]:
import requests
import json


groups = {17, 15, 62}
# Функция для извлечения и формирования данных в формате JSON
def parse_data(data):
    events_data = []  # Список для хранения данных каждого события

    for event in data['Value']:
        event_info = {}  # Словарь для хранения данных текущего события
        event_info['homeTeam'] = event.get("O1", "Недоступно")
        event_info['awayTeam'] = event.get("O2", "Недоступно")

        # Извлекаем тоталы из списка "E" и добавляем их в список в словаре event_info
        totals_info = []
        for total in event.get("E", []):
            group = total.get("G")
            if group in groups:
                if group == 17:
                        event_info['total'] = total.get("P", "Не указан")
                elif group == 15:
                        event_info['home'] = total.get("P", "Не указан")
                else:
                    event_info['away'] = total.get("P", "Не указан")

        events_data.append(event_info)

    return json.dumps(events_data, ensure_ascii=False)  # Преобразование списка событий в JSON

# URL для получения данных
url = "https://betwinner-232507.top/service-api/LiveFeed/Get1x2_VZip?sports=3&champs=2626462&count=20&gr=495&mode=4"

# Отправка запроса и получение данных
response = requests.get(url)
data = response.json()

# Вызов функции parse_data с полученными данными и вывод результата
json_result = parse_data(data)
print(json_result)


[{"homeTeam": "Golden State Warriors (cyber)", "awayTeam": "Phoenix Suns (cyber)", "total": 201.5, "home": 103.5, "away": 98.5}, {"homeTeam": "Philadelphia 76ers (cyber)", "awayTeam": "Memphis Grizzlies (cyber)", "total": 189.5, "home": 96.5, "away": 95.5}, {"homeTeam": "Sacramento Kings (cyber)", "awayTeam": "Boston Celtics (cyber)"}]


In [8]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import StackingRegressor
from sklearn.linear_model import Ridge, LinearRegression
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.neural_network import MLPRegressor
from xgboost import XGBRegressor
from catboost import CatBoostRegressor
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import OneHotEncoder

# Загрузка данных
data = pd.read_csv('basketball2.csv')

# Применение one-hot encoding к домашним и гостевым командам
encoder = OneHotEncoder(sparse=False)
all_teams = pd.concat([data['homeTeam'], data['awayTeam']])
encoder.fit(all_teams.to_numpy().reshape(-1, 1))

home_teams_encoded = encoder.transform(data['homeTeam'].to_numpy().reshape(-1, 1))
away_teams_encoded = encoder.transform(data['awayTeam'].to_numpy().reshape(-1, 1))

home_teams_df = pd.DataFrame(home_teams_encoded, columns=encoder.get_feature_names_out(['home']))
away_teams_df = pd.DataFrame(away_teams_encoded, columns=encoder.get_feature_names_out(['away']))

combined_teams_df = pd.concat([home_teams_df, away_teams_df], axis=1)

# Определение базовых моделей и мета-модели
base_models = [
    ('XGBRegressor', XGBRegressor(objective='reg:squarederror')),
    ('CatBoostRegressor', CatBoostRegressor(verbose=0)),
    ('RandomForestRegressor', RandomForestRegressor()),
    ('ridge', Ridge()),
    ('gradient_boosting', GradientBoostingRegressor()),
    ('MLPRegressor', MLPRegressor(max_iter=10000))
]
meta_model = LinearRegression()

# Обучение модели стекинга
stacked_model = StackingRegressor(estimators=base_models, final_estimator=meta_model, n_jobs=-1)
stacked_model.fit(combined_teams_df, data['totalScores'])

# Прогнозирование на всём наборе данных
y_pred = stacked_model.predict(combined_teams_df)

# Оценка модели
mse = mean_squared_error(data['totalScores'], y_pred)
r2 = r2_score(data['totalScores'], y_pred)
rmse = mse ** 0.5
print(f'MSE: {mse}, R2: {r2}, RMSE: {rmse}')

# Подготовка данных с предсказаниями
data_with_predictions = data.copy()
data_with_predictions['PredictedTotalScores'] = y_pred
data_with_predictions['AbsoluteError'] = abs(data_with_predictions['PredictedTotalScores'] - data_with_predictions['totalScores'])

# Функция для анализа предсказаний конкретных пар команд
def analyze_team_pair_predictions(test_data, team1, team2):
    specific_pair = test_data[(test_data['homeTeam'] == team1) & (test_data['awayTeam'] == team2)]
    mean_absolute_error = specific_pair['AbsoluteError'].mean()
    return specific_pair[['homeTeam', 'awayTeam', 'PredictedTotalScores', 'totalScores', 'AbsoluteError']], mean_absolute_error



MSE: 132.76074805566793, R2: 0.2556205239503212, RMSE: 11.522185038249816


In [69]:
# Пример использования функции
results, mean_error = analyze_team_pair_predictions(data_with_predictions, 'Boston Celtics (cyber)', 'Cleveland Cavaliers (cyber)')
print(results)
print(f"Средняя абсолютная ошибка для пары команд: {mean_error}")

#Boston Celtics (cyber) - Cleveland Cavaliers (cyber)

                    homeTeam                     awayTeam  \
36    Boston Celtics (cyber)  Cleveland Cavaliers (cyber)   
94    Boston Celtics (cyber)  Cleveland Cavaliers (cyber)   
121   Boston Celtics (cyber)  Cleveland Cavaliers (cyber)   
204   Boston Celtics (cyber)  Cleveland Cavaliers (cyber)   
208   Boston Celtics (cyber)  Cleveland Cavaliers (cyber)   
292   Boston Celtics (cyber)  Cleveland Cavaliers (cyber)   
339   Boston Celtics (cyber)  Cleveland Cavaliers (cyber)   
381   Boston Celtics (cyber)  Cleveland Cavaliers (cyber)   
401   Boston Celtics (cyber)  Cleveland Cavaliers (cyber)   
414   Boston Celtics (cyber)  Cleveland Cavaliers (cyber)   
524   Boston Celtics (cyber)  Cleveland Cavaliers (cyber)   
931   Boston Celtics (cyber)  Cleveland Cavaliers (cyber)   
978   Boston Celtics (cyber)  Cleveland Cavaliers (cyber)   
984   Boston Celtics (cyber)  Cleveland Cavaliers (cyber)   
1169  Boston Celtics (cyber)  Cleveland Cavaliers (cyber)   
1395  Boston Celtics (cy

In [9]:
# Создание столбца с парами команд
data_with_predictions['TeamPair'] = data_with_predictions.apply(lambda row: f"{row['homeTeam']} - {row['awayTeam']}", axis=1)

# Функция для анализа предсказаний для группы матчей
def analyze_group(group):
    mean_actual = group['totalScores'].mean()
    mean_predicted = group['PredictedTotalScores'].mean()
    mean_absolute_error = group['AbsoluteError'].mean()
    return {
        'MeanActualScores': mean_actual,
        'MeanPredictedScores': mean_predicted,
        'MeanAbsoluteError': mean_absolute_error,
        'MatchCount': len(group)
    }

# Группировка по парам команд и применение функции анализа
analysis_results = data_with_predictions.groupby('TeamPair').apply(analyze_group).apply(pd.Series)

# Вывод результатов
print(analysis_results.sort_values(by='MeanAbsoluteError'))




                                                    MeanActualScores  \
TeamPair                                                               
Sacramento Kings (cyber) - Cleveland Cavaliers ...        211.285714   
Miami Heat (cyber) - Cleveland Cavaliers (cyber)          202.774194   
Phoenix Suns (cyber) - Los Angeles Clippers (cy...        207.363636   
Miami Heat (cyber) - Brooklyn Nets (cyber)                192.057143   
Brooklyn Nets (cyber) - Boston Celtics (cyber)            194.685714   
...                                                              ...   
Golden State Warriors (cyber) - Denver Nuggets ...        202.833333   
Philadelphia 76ers (cyber) - Phoenix Suns (cyber)         207.625000   
Denver Nuggets (cyber) - Cleveland Cavaliers (c...        206.250000   
Boston Celtics (cyber) - Philadelphia 76ers (cy...        199.117647   
Los Angeles Lakers (cyber) - Phoenix Suns (cyber)         208.222222   

                                                    MeanPredict

In [10]:
# Сохранение результатов анализа в файл CSV
analysis_results.to_csv('analysis_results_home.csv', index=True)


In [68]:
import requests


# Функция для отправки запроса
def send_request(data):
    response = requests.post(url, json=data, headers=headers)
    return response



# URL для отправки запроса
url = 'https://betwinner-232507.top/web-api/datalinelive/putbetscommon'

# Заголовки запроса
headers = {
    'accept': 'application/json, text/plain, */*',
    'accept-language': 'ru-RU,ru;q=0.9,en-US;q=0.8,en;q=0.7',
    'content-type': 'application/json',
    'cookie': 'SESSION=e3f33713aa692ce05f63352fd60bc9d4',  # Обрезано для упрощения
    'dnt': '1',
    'is-srv': 'false',
    'origin': 'https://betwinner-232507.top',
    'referer': 'https://betwinner-232507.top/ru/live/basketball/2626462-nba-2k24-cyber-league/519263061-philadelphia-76ers-cyber-new-york-knicks-cyber',
    'sec-ch-ua': '"Google Chrome";v="123", "Not:A-Brand";v="8", "Chromium";v="123"',
    'sec-ch-ua-mobile': '?1',
    'sec-ch-ua-platform': '"Android"',
    'sec-fetch-dest': 'empty',
    'sec-fetch-mode': 'cors',
    'sec-fetch-site': 'same-origin',
    'user-agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Mobile Safari/537.36',
    'x-requested-with': 'XMLHttpRequest'
}

# Тело запроса
data = {
    "coupon": {
        "UserId": 795238725,
        "Events": [
            {
                "GameId": 519263061,
                "Type": 8,
                "Coef": 2.315,
                "Param": 7.5,
                "PV": None,
                "PlayerId": 0,
                "Kind": 1,
                "InstrumentId": 0,
                "Seconds": 0,
                "Price": 0,
                "Expired": 0,
                "PlayersDuel": []
            }
        ],
        "Vid": 0,
        "partner": 152,
        "CfView": 0,
        "Live": True,
        "CheckCf": 2,
        "Lng": "ru",
        "hash": "2d257c7e9e7926e7926598a4fa5728e4",
        "ApprovedBet": False,
        "notWait": True,
        "IsPowerBet": False,
        "Summ": 5,
        "isAutoBet": True,
        "autoBetCf": 0,
        "DropOnScoreChange": False,
        "TransformEventKind": True,
        "autoBetCfView": 0
    }
}

# Отправка первого запроса
first_response = send_request(data)

# Парсинг ответа и извлечение betGUID
first_response_data = first_response.json()

print(first_response_data)

# Проверяем, что запрос был успешным и в ответе есть словарь 'Value'
if first_response_data.get('Success') and 'Value' in first_response_data:
    new_betGUID = first_response_data['Value'].get('betGUID', '')
else:
    new_betGUID = ''
    print("Не удалось извлечь betGUID из ответа сервера")

print(new_betGUID)


# Обновление данных для второго запроса
data['coupon']['betGUID'] = new_betGUID

# Отправка измененного запроса
second_response = send_request(data)

# Вывод ответа от сервера на второй запрос
print(second_response.text)

{'Error': '', 'ErrorCode': 0, 'Guid': '00d17880-f401-4338-b16f-69a985162a11', 'Id': 0, 'Success': True, 'Value': {'Balance': 0, 'Coupon': None, 'Dt': '/Date(1711226636755)/', 'FailInfo': None, 'Id': 0, 'SummPrep': 0, 'betGUID': '65ff3f0c077caa60f262f246', 'lnC': False, 'lvC': False, 'waitTime': 6000}}
65ff3f0c077caa60f262f246
{"Error":"","ErrorCode":0,"Guid":"979a9b94-8df7-4dde-aa5e-cd2de05ee548","Id":0,"Success":true,"Value":{"Balance":0,"Coupon":null,"Dt":"\/Date(1711226637248)\/","FailInfo":null,"Id":0,"SummPrep":0,"betGUID":"65ff3f0c077caa60f262f246","lnC":false,"lvC":false,"waitTime":4506}}


In [None]:
import requests
import pymongo

from pymongo import MongoClient

# Подключение к MongoDB
uri = "mongodb+srv://dyominov:1212dema@cluster0.v37qbx3.mongodb.net/?retryWrites=true&w=majority"
client = MongoClient(uri)
db = client['basket']  # Замените на имя вашей базы данных
collection = db['basket']  # Замените на имя вашей коллекции

startDate = '2024-02-11T22:00:00.000'
endDate = '2024-03-06T22:00:00.000'

# URL, с которого нужно получить JSON
url = ('https://stats-widget-api.feedconstruct.com/api/en/900/93f428d0-6591-48da-859d-b6c326db2448/Match/' 
       'GetCalendarWidgetMatches?sportId=73&competitionId=null&' 
       f'startDate={startDate}&endDate={endDate}&liveStatus=3')

# Отправка запроса и получение ответа
response = requests.get(url)

# Проверка статуса ответа
if response.status_code == 200:
    # Преобразование ответа из JSON в список словарей Python
    data = response.json()

    transformed_data = []
    for match in data:
        if match['Results'] is not None:
            # Создание словаря для хранения результатов каждого квартала
            quarter_scores = {f'quarter{i}Score': {'home': 0, 'away': 0} for i in range(1, 5)}

            # Обработка данных по каждому кварталу
            for result in match['Results']:
                if 'Quarter' in result['TypeName']:
                    quarter_number = int(result['TypeAbbreviation'])
                    quarter_scores[f'quarter{quarter_number}Score']['home'] = result['HomeScore']
                    quarter_scores[f'quarter{quarter_number}Score']['away'] = result['AwayScore']

            # Сборка окончательного объекта для матча
            new_match = {
                '_id': match['Id'],
                'awayScore': match['AwayScore'],
                'homeScore': match['HomeScore'],
                'awayTeam': match['AwayTeam']['Name'],
                'homeTeam': match['HomeTeam']['Name'],
                'competition': match['Competition']['Name'],
                'date': match['Date'],
                'region': match['Region']['Name'],
                'firstQuarterAwayScore': quarter_scores['quarter1Score']['away'],
                'firstQuarterHomeScore': quarter_scores['quarter1Score']['home'],
                'secondQuarterAwayScore': quarter_scores['quarter2Score']['away'],
                'secondQuarterHomeScore': quarter_scores['quarter2Score']['home'],
                'thirdQuarterAwayScore': quarter_scores['quarter3Score']['away'],
                'thirdQuarterHomeScore': quarter_scores['quarter3Score']['home'],
                'fourthQuarterAwayScore': quarter_scores['quarter4Score']['away'],
                'fourthQuarterHomeScore': quarter_scores['quarter4Score']['home']
            }

            transformed_data.append(new_match)

    # Сохранение данных в MongoDB
    if transformed_data:  # Проверка на наличие преобразованных данных
        collection.insert_many(transformed_data)  # Используйте insert_many для добавления нескольких документов
        print("Данные успешно сохранены в MongoDB")
    else:
        print("Нет данных для сохранения")
else:
    print("Ошибка при получении данных: статус", response.status_code)


In [1]:
import requests
import pymongo
from datetime import datetime, timezone


from pymongo import MongoClient

# Подключение к MongoDB
uri = "mongodb+srv://dyominov:1212dema@cluster0.v37qbx3.mongodb.net/?retryWrites=true&w=majority"
client = MongoClient(uri)
db = client['basket']  # Замените на имя вашей базы данных
collection = db['basket2']  # Замените на имя вашей коллекции

def generate_url_with_unix_timestamp(date_from_iso, date_to_iso):
    # Конвертация дат из ISO 8601 в Unix timestamp
    date_from_unix = int(datetime.fromisoformat(date_from_iso).replace(tzinfo=timezone.utc).timestamp())
    date_to_unix = int(datetime.fromisoformat(date_to_iso).replace(tzinfo=timezone.utc).timestamp())

    # Формирование URL с Unix timestamp вместо ISO 8601
    url = (f'https://betwinner-232507.top/service-api/result/web/api/v1/games?champId=2626462'
           f'&dateFrom={date_from_unix}&dateTo={date_to_unix}&lng=ru&ref=152&gr=495&country=2')
    print(url)
    return url

def transform_data(data):
    transformed_data = []
    for match in data['items']:
        # Разбиение строки с результатами на четверти
        quarters = match['score'].split(' ')[1].split(',')
        if (len(quarters) > 4):
            continue
        # Инициализация счетов по четвертям
        quarter_scores = {'firstQuarter': {}, 'secondQuarter': {}, 'thirdQuarter': {}, 'fourthQuarter': {}}
        home_total = 0
        away_total = 0

        for i, quarter in enumerate(quarters):
            # Удаление скобок из строки с результатами четверти
            quarter = quarter.replace('(', '').replace(')', '')

            home_score, away_score = map(int, quarter.split(':'))
            quarter_name = list(quarter_scores.keys())[i]
            quarter_scores[quarter_name]['home'] = home_score
            quarter_scores[quarter_name]['away'] = away_score
            home_total += home_score
            away_total += away_score

        # Формирование итогового объекта для каждого матча
        new_match = {
            '_id': match['id'],
            'awayTeam': match['opp2'],
            'homeTeam': match['opp1'],
            'competition': match['champName'],
            'date': match['dateStart'],
            'totalScores': (home_total + away_total),
            'home': home_total,
            'away': away_total,
            'firstQuarterAwayScore': quarter_scores['firstQuarter']['away'],
            'firstQuarterHomeScore': quarter_scores['firstQuarter']['home'],
            'secondQuarterAwayScore': quarter_scores['secondQuarter']['away'],
            'secondQuarterHomeScore': quarter_scores['secondQuarter']['home'],
            'thirdQuarterAwayScore': quarter_scores['thirdQuarter']['away'],
            'thirdQuarterHomeScore': quarter_scores['thirdQuarter']['home'],
            'fourthQuarterAwayScore': quarter_scores['fourthQuarter']['away'],
            'fourthQuarterHomeScore': quarter_scores['fourthQuarter']['home']
        }
        transformed_data.append(new_match)
    return transformed_data



# Исходные даты
date_from_iso = '2024-03-24T00:00:00.000'
date_to_iso = '2024-03-25T00:00:00.000'

# Генерация URL
url = generate_url_with_unix_timestamp(date_from_iso, date_to_iso)

# Отправка запроса и получение ответа
response = requests.get(url)

if response.status_code == 200:
    data = response.json()
    transformed_data = transform_data(data)

    # Здесь вы можете сохранить преобразованные данные в MongoDB
    collection.insert_many(transformed_data)
    print("Данные успешно сохранены в MongoDB")

else:
    print(f"Ошибка при получении данных: статус {response.status_code}")

https://betwinner-232507.top/service-api/result/web/api/v1/games?champId=2626462&dateFrom=1711238400&dateTo=1711324800&lng=ru&ref=152&gr=495&country=2
Данные успешно сохранены в MongoDB


In [2]:
import pandas as pd
df = pd.DataFrame(list(collection.find()))

df

Unnamed: 0,_id,awayTeam,homeTeam,competition,date,totalScores,home,away,firstQuarterAwayScore,firstQuarterHomeScore,secondQuarterAwayScore,secondQuarterHomeScore,thirdQuarterAwayScore,thirdQuarterHomeScore,fourthQuarterAwayScore,fourthQuarterHomeScore
0,489401947,Brooklyn Nets (cyber),Boston Celtics (cyber),NBA 2K24. Cyber League,1699717200,175,103,72,14,27,22,27,16,22,20,27
1,490672305,Miami Heat (cyber),Cleveland Cavaliers (cyber),NBA 2K24. Cyber League,1700218800,207,102,105,21,30,26,22,26,25,32,25
2,490474362,Miami Heat (cyber),Philadelphia 76ers (cyber),NBA 2K24. Cyber League,1700140800,187,114,73,17,30,18,33,29,28,9,23
3,490455096,Atlanta Hawks (cyber),Brooklyn Nets (cyber),NBA 2K24. Cyber League,1700134800,213,103,110,35,16,26,27,23,28,26,32
4,490496168,Miami Heat (cyber),Brooklyn Nets (cyber),NBA 2K24. Cyber League,1700148600,204,110,94,19,27,26,23,27,30,22,30
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4245,519324465,Denver Nuggets (cyber),Minnesota Timberwolves (cyber),NBA 2K24. Cyber League,1711249200,203,106,97,22,20,27,28,23,32,25,26
4246,519319461,Los Angeles Lakers (cyber),Los Angeles Clippers (cyber),NBA 2K24. Cyber League,1711246800,202,95,107,24,19,24,19,32,28,27,29
4247,519314539,Milwaukee Bucks (cyber),Miami Heat (cyber),NBA 2K24. Cyber League,1711244400,192,87,105,31,20,23,22,17,24,34,21
4248,519309387,Sacramento Kings (cyber),Phoenix Suns (cyber),NBA 2K24. Cyber League,1711242000,222,115,107,26,33,29,25,26,30,26,27


In [3]:
df.to_csv('basketball2.csv', index=False)

In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Ridge, LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.preprocessing import OneHotEncoder
from xgboost import XGBRegressor
from catboost import CatBoostRegressor
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, AdaBoostRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.linear_model import Lasso, ElasticNet, BayesianRidge, TheilSenRegressor, RANSACRegressor

# Загрузка данных
file_path = 'basketball2.csv'
data = pd.read_csv(file_path)

# Кодирование категориальных признаков
categorical_features = ['awayTeam', 'homeTeam']
one_hot_encoder = OneHotEncoder(handle_unknown='ignore')
encoded_features = one_hot_encoder.fit_transform(data[categorical_features]).toarray()
feature_names = one_hot_encoder.get_feature_names_out(categorical_features)
encoded_df = pd.DataFrame(encoded_features, columns=feature_names)

# Вычисление среднего счета для каждой команды и добавление в датафрейм
#data['averageScoreAway'] = data['awayTeam'].map(data.groupby('awayTeam')['away'].mean())
#data['averageScoreHome'] = data['homeTeam'].map(data.groupby('homeTeam')['home'].mean())

# Объединение закодированных признаков и средних счетов с исходными данными
data_final = pd.concat([data.drop(categorical_features + ['away', 'home', 'totalScores'], axis=1), encoded_df], axis=1)

# Подготовка данных
X = data_final.drop(['_id','competition','date', 'firstQuarterAwayScore', 'firstQuarterHomeScore', 'secondQuarterAwayScore', 'secondQuarterHomeScore', 'thirdQuarterAwayScore', 'thirdQuarterHomeScore','fourthQuarterAwayScore', 'fourthQuarterHomeScore'], axis=1)
targets = ['totalScores', 'home', 'away', 'firstQuarterAwayScore', 'firstQuarterHomeScore', 'secondQuarterAwayScore', 'secondQuarterHomeScore', 'thirdQuarterAwayScore', 'thirdQuarterHomeScore', 'fourthQuarterAwayScore', 'fourthQuarterHomeScore']

# Обучение и оценка моделей для каждой целевой переменной
results = {}
for target in targets:
    y = data[target]
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    # Модели
    models = {
        'LinearRegression': LinearRegression(),
        'Ridge': Ridge(),
        'Lasso': Lasso(),
        'ElasticNet': ElasticNet(),
        'SVR': SVR(),
        'DecisionTreeRegressor': DecisionTreeRegressor(),
        'RandomForestRegressor': RandomForestRegressor(),
        'GradientBoostingRegressor': GradientBoostingRegressor(),
        'XGBRegressor': XGBRegressor(objective='reg:squarederror'),
        'CatBoostRegressor': CatBoostRegressor(verbose=0),
        'AdaBoostRegressor': AdaBoostRegressor(),
        'KNeighborsRegressor': KNeighborsRegressor(),
        'MLPRegressor': MLPRegressor(max_iter=5000),
        'GaussianProcessRegressor': GaussianProcessRegressor(),
        'BayesianRidge': BayesianRidge(),
        'TheilSenRegressor': TheilSenRegressor(),
        'RANSACRegressor': RANSACRegressor(),
    }
    
    results[target] = {}
    for model_name, model in models.items():
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        mse = mean_squared_error(y_test, y_pred)
        mae = mean_absolute_error(y_test, y_pred)
        results[target][model_name] = {'MSE': mse, 'MAE': mae}

# Вывод результатов
for target, models in results.items():
    print(f"Target: {target}")
    for model_name, metrics in models.items():
        print(f"  Model: {model_name} - MSE: {metrics['MSE']:.2f}, MAE: {metrics['MAE']:.2f}")
    print("-" * 30)


Target: totalScores
  Model: LinearRegression - MSE: 132.72, MAE: 9.08
  Model: Ridge - MSE: 132.57, MAE: 9.08
  Model: Lasso - MSE: 179.72, MAE: 10.55
  Model: ElasticNet - MSE: 177.26, MAE: 10.47
  Model: SVR - MSE: 135.92, MAE: 9.12
  Model: DecisionTreeRegressor - MSE: 135.68, MAE: 9.12
  Model: RandomForestRegressor - MSE: 136.13, MAE: 9.14
  Model: GradientBoostingRegressor - MSE: 134.69, MAE: 9.12
  Model: XGBRegressor - MSE: 135.67, MAE: 9.12
  Model: CatBoostRegressor - MSE: 135.38, MAE: 9.11
  Model: AdaBoostRegressor - MSE: 153.69, MAE: 9.77
  Model: KNeighborsRegressor - MSE: 155.04, MAE: 9.79
  Model: MLPRegressor - MSE: 132.54, MAE: 9.08
  Model: GaussianProcessRegressor - MSE: 135.68, MAE: 9.12
  Model: BayesianRidge - MSE: 132.80, MAE: 9.08
  Model: TheilSenRegressor - MSE: 146.49, MAE: 9.62
  Model: RANSACRegressor - MSE: 179.64, MAE: 10.67
------------------------------
Target: home
  Model: LinearRegression - MSE: 71.56, MAE: 6.68
  Model: Ridge - MSE: 71.69, MAE: 6.

In [4]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.linear_model import LinearRegression, Ridge, BayesianRidge
from sklearn.neural_network import MLPRegressor
from xgboost import XGBRegressor
from catboost import CatBoostRegressor
from sklearn.ensemble import GradientBoostingRegressor, StackingRegressor, RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, mean_squared_log_error, explained_variance_score, median_absolute_error
from sklearn.preprocessing import OneHotEncoder
from joblib import dump
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)


# Загрузка данных
file_path = 'basketball2.csv'
data = pd.read_csv(file_path)


# Кодирование категориальных признаков
categorical_features = ['awayTeam', 'homeTeam']
one_hot_encoder = OneHotEncoder(handle_unknown='ignore')
encoded_features = one_hot_encoder.fit_transform(data[categorical_features]).toarray()
feature_names = one_hot_encoder.get_feature_names_out(categorical_features)
encoded_df = pd.DataFrame(encoded_features, columns=feature_names)


# Исправлено: использование data_filtered вместо data
X = pd.concat([encoded_df], axis=1)
#X = pd.concat([X, data[['firstQuarterAwayScore', 'firstQuarterHomeScore', 'secondQuarterAwayScore', 'secondQuarterHomeScore', 
#                                         'thirdQuarterAwayScore', 'thirdQuarterHomeScore']]], axis=1)


# Обновление y с использованием отфильтрованных данных для каждой целевой переменной
y_updated = {
    'total': data['totalScores'],
    'home': data['home'],
    'away': data['away'],
}



# Определение базовых моделей
base_models = [
    ('XGBRegressor', XGBRegressor(objective='reg:squarederror')),
    ('CatBoostRegressor', CatBoostRegressor(verbose=0)),
    ('RandomForestRegressor', RandomForestRegressor()),
    ('ridge', Ridge(alpha=1, max_iter=None, solver='lsqr')),  # Обновленные параметры для Ridge
    ('gradient_boosting', GradientBoostingRegressor(learning_rate=0.1, max_depth=3, n_estimators=100)),  # Обновленные параметры для GradientBoostingRegressor
    ('MLPRegressor', MLPRegressor(activation='relu', hidden_layer_sizes=(50,), max_iter=10000))  # Обновленные параметры для MLPRegressor
]


# Определение мета-модели
meta_model = LinearRegression()


# Кросс-валидация для базовых моделей по каждой целевой переменной
for target, y_target in y_updated.items():
    print(f"Processing {target}...")
    for name, model in base_models:
        scores = cross_val_score(model, X, y_target, cv=5, scoring='neg_mean_squared_error')
        print(f"Cross-validated scores for {name} on {target}: {scores.mean():.2f}")


# Словарь для хранения моделей
stacked_models = {}

# Обучение и сохранение моделей для каждой целевой переменной
for target, y_target in y_updated.items():
    print(f"Processing {target}...")
    X_train, X_test, y_train, y_test = train_test_split(X, y_target, test_size=0.2, random_state=42)

    # Создание и обучение стекинговой модели
    stacked_regressor = StackingRegressor(estimators=base_models, final_estimator=meta_model, cv=5)
    stacked_regressor.fit(X_train, y_train)
    
    # Сохранение обученной стекинговой модели
    model_filename = f'model_{target}.joblib'
    dump(stacked_regressor, model_filename)
    print(f"Saved {target} model as {model_filename}")

    # Оценка модели
    y_pred = stacked_regressor.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    mae = mean_absolute_error(y_test, y_pred)
    # Добавление дополнительных метрик
    rmse_val = np.sqrt(mse)
    r2_val = r2_score(y_test, y_pred)
    explained_variance = explained_variance_score(y_test, y_pred)
    median_ae = median_absolute_error(y_test, y_pred)
    try:
        msle_val = mean_squared_log_error(y_test, y_pred)
    except ValueError:
        msle_val = 'не вычисляется из-за отрицательных предсказаний'

    # Вывод всех метрик
    print(f"MSE: {mse:.2f}, MAE: {mae:.2f}, RMSE: {rmse_val:.2f}, MSLE: {msle_val}, R2: {r2_val:.2f}, Explained Variance: {explained_variance:.2f}, Median AE: {median_ae:.2f}")
    
    # Сохранение модели для дальнейшего использования
    stacked_models[target] = stacked_regressor

# Сохранение OneHotEncoder
dump(one_hot_encoder, 'one_hot_encoder.joblib')
print("Saved OneHotEncoder as one_hot_encoder.joblib")


Processing total...
Cross-validated scores for XGBRegressor on total: -141.23
Cross-validated scores for CatBoostRegressor on total: -140.74
Cross-validated scores for RandomForestRegressor on total: -141.31
Cross-validated scores for ridge on total: -136.68
Cross-validated scores for gradient_boosting on total: -137.18
Cross-validated scores for MLPRegressor on total: -136.70
Processing home...
Cross-validated scores for XGBRegressor on home: -75.78
Cross-validated scores for CatBoostRegressor on home: -75.60
Cross-validated scores for RandomForestRegressor on home: -75.90
Cross-validated scores for ridge on home: -73.86
Cross-validated scores for gradient_boosting on home: -74.40
Cross-validated scores for MLPRegressor on home: -73.90
Processing away...
Cross-validated scores for XGBRegressor on away: -77.17
Cross-validated scores for CatBoostRegressor on away: -76.89
Cross-validated scores for RandomForestRegressor on away: -77.13
Cross-validated scores for ridge on away: -74.05
Cro

In [5]:
import pandas as pd

# Загрузка данных
df = pd.read_csv('basketball2.csv')  # Убедитесь, что здесь указан правильный путь к вашему файлу
# Инициализация счетчика ничьих исходов
total_ties = 0

for _, match in df.iterrows():
    # Суммирование очков за четыре четверти для каждой команды
    home_score = match['firstQuarterHomeScore'] + match['secondQuarterHomeScore'] + \
                 match['thirdQuarterHomeScore'] + match['fourthQuarterHomeScore']
    away_score = match['firstQuarterAwayScore'] + match['secondQuarterAwayScore'] + \
                 match['thirdQuarterAwayScore'] + match['fourthQuarterAwayScore']
    
    # Проверка на ничью
    if home_score == away_score:
        total_ties += 1

print(f"Total ties (without overtime): {total_ties}")


Total ties (without overtime): 0


In [42]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
import torch
import torch.nn as nn
import torch.optim as optim

# Загрузка и предварительная обработка данных
data = pd.read_csv('basketball2.csv')

# Кодирование названий команд
encoder = OneHotEncoder(sparse=False)
team_names = data[['awayTeam', 'homeTeam']]
encoded_teams = encoder.fit_transform(team_names)
encoded_team_names = pd.DataFrame(encoded_teams, columns=encoder.get_feature_names_out(team_names.columns))
data_encoded = pd.concat([data.reset_index(drop=True), encoded_team_names], axis=1)

# Подготовка данных для обучения
features = data_encoded[encoded_team_names.columns.tolist()]

# Разделение на обучающий и тестовый наборы
X_train, X_test = train_test_split(features, test_size=0.2, random_state=42)

# Масштабирование признаков
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Преобразование данных в тензоры PyTorch
X_train_torch = torch.tensor(X_train_scaled.astype(np.float32))
X_test_torch = torch.tensor(X_test_scaled.astype(np.float32))

# Определение целевых переменных
targets = {
    'total': data['totalScores'],
}

# Определение модели
class RegressionModel(nn.Module):
    def __init__(self, input_size):
        super(RegressionModel, self).__init__()
        self.fc1 = nn.Linear(input_size, 512)
        self.fc2 = nn.Linear(512, 256)
        self.fc3 = nn.Linear(256, 128)
        self.fc4 = nn.Linear(128, 64)
        self.fc5 = nn.Linear(64, 32)
        self.fc6 = nn.Linear(32, 1)


    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = torch.relu(self.fc3(x))
        x = torch.relu(self.fc4(x))
        x = torch.relu(self.fc5(x))
        x = self.fc6(x)
        return x


# Функция для вычисления MAE
def mean_absolute_error(output, target):
    return torch.mean(torch.abs(output - target))

# Обучение моделей для каждой целевой переменной
for target_name, target_data in targets.items():
    print(f"Training model for: {target_name}")
    
    # Разделение данных на обучающую и тестовую выборки для текущей целевой переменной
    y_train, y_test = train_test_split(target_data, test_size=0.2, random_state=42)
    
    # Преобразование целевых переменных в тензоры PyTorch
    y_train_torch = torch.tensor(y_train.values.astype(np.float32))
    y_test_torch = torch.tensor(y_test.values.astype(np.float32))
    
    # Инициализация и обучение модели
    model = RegressionModel(X_train_torch.shape[1])
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    for epoch in range(1000):  # Можно настроить количество эпох
        optimizer.zero_grad()
        outputs = model(X_train_torch)
        loss = criterion(outputs.squeeze(), y_train_torch)
        loss.backward()
        optimizer.step()


    # Оценка модели
    model.eval()
    with torch.no_grad():
        y_pred = model(X_test_torch)
        test_loss = criterion(y_pred.squeeze(), y_test_torch)
        test_mae = mean_absolute_error(y_pred.squeeze(), y_test_torch)
        print(f"{target_name} - Test Loss (MSE): {test_loss.item()}, Test MAE: {test_mae.item()}\n")


Training model for: total
total - Test Loss (MSE): 139.49774169921875, Test MAE: 9.224629402160645



In [41]:
from joblib import load
import pandas as pd

# Загружаем стекинговые модели и OneHotEncoder из указанной папки
model_total = load(f'model_total.joblib')
model_home = load(f'model_home.joblib')
model_away = load(f'model_away.joblib')
one_hot_encoder = load(f'one_hot_encoder.joblib')

# Допустим, X_train_columns сохранены в файле или определены в коде. Вам нужно их загрузить или определить.
# X_train_columns = [...]

def predict_match(away_team, home_team):
    # Данные для прогнозирования
    teams_for_prediction = pd.DataFrame({
        'awayTeam': [away_team],
        'homeTeam': [home_team]
    })

    # Применение OneHotEncoder к новым данным
    encoded_teams_for_prediction = one_hot_encoder.transform(teams_for_prediction).toarray()
    
    # Преобразование в DataFrame
    encoded_teams_df = pd.DataFrame(encoded_teams_for_prediction, columns=one_hot_encoder.get_feature_names_out())
    
    # Предсказания с использованием загруженных моделей
    predictions = {
        'Total Score': model_total.predict(encoded_teams_df)[0],
        'Home Score': model_home.predict(encoded_teams_df)[0],
        'Away Score': model_away.predict(encoded_teams_df)[0],
    }

    # Вывод результатов
    for key, value in predictions.items():
        print(f"{key}: {value}")
        
    

# Пример использования функции
predict_match('Cleveland Cavaliers (cyber)', 'Atlanta Hawks (cyber)')


Total Score: 207.77274643952651
Home Score: 103.6142256800634
Away Score: 104.48387131042789


In [None]:
def predict_match(away_team, home_team):
    # Данные для прогнозирования
    teams_for_prediction = pd.DataFrame({
        'awayTeam': [away_team],
        'homeTeam': [home_team]
    })

    # Применение OneHotEncoder к новым данным
    encoded_teams_for_prediction = encoder.transform(teams_for_prediction)
    encoded_teams_df = pd.DataFrame(encoded_teams_for_prediction, columns=encoder.get_feature_names_out())
    
    # Убедитесь, что у encoded_teams_df такое же количество столбцов, как и у X_train/X_test, и в том же порядке
    missing_cols = set(features.columns) - set(encoded_teams_df.columns)
    for c in missing_cols:
        encoded_teams_df[c] = 0
    encoded_teams_df = encoded_teams_df[features.columns]
    
    # Масштабирование новых данных с использованием того же Scaler, что и для обучающего набора
    encoded_teams_scaled = scaler.transform(encoded_teams_df)

    # Преобразование в тензор PyTorch
    encoded_teams_torch = torch.tensor(encoded_teams_scaled.astype(np.float32))
    
    # Предсказания с использованием обученной модели
    model.eval()  # Переключение модели в режим оценки
    with torch.no_grad():  # Отключение вычисления градиентов
        y_pred = model(encoded_teams_torch)  # Получение предсказаний модели

    # Вывод результатов
    print(f"Прогнозируемые результаты игры:\n{y_pred.numpy()}")  # Преобразование тензора обратно в NumPy для удобочитаемого вывода

# Пример использования функции
predict_match('Brooklyn Nets (cyber)', 'Boston Celtics (cyber)')
