In [1]:
import pandas as pd

csv_path = r'D:\WeChat Files\wxid_uwnfhnxuzbcc41\FileStorage\File\2023-06\final_dataset.csv'
df = pd.read_csv(csv_path)

def init_team_dic(team_name):
    '''
    team： 队名
    totalGames：总计比赛场数
    wins：胜场数
    draws：平场数
    losses：负场数
    winDetails：胜场数据细节
    drawDetails：平场数据细节
    lossDetails：负场数据细节
    expected：赛果符合预期场数
    middle：赛果较符合预期场数
    opposite：赛果不符合预期场数
    date：（时序）比赛时间
    score：每场比赛后的队伍得分
    totalShot：总射门数
    totalShotTarget：总射正数
    totalGoal：总进球数
    totalGoalDetail：每场进球数
    goalDifference：净胜球数
    goalDifferenceDetail：每场净胜球数
    win/draw/lose/big/smallOddAvg: 胜平负大小球平均赔率
    '''
    return {'team': team_name, 'totalGames': 0, 'wins': 0, 'draws': 0, 'losses': 0, 'winDetails': [], 'drawDetails': [], 'lossDetails': [],
           'expected': 0, 'middle': 0, 'opposite': 0, 'date': [], 'score': [0], 'totalShot':0, 'totalShotTarget': 0,
           'totalGoal': 0, 'totalGoalDetail':[], 'goalDifference':0, 'goalDifferenceDetail':[], 'winOddAvg':0, 'drawOddAvg':0,
            'loseOddAvg':0, 'bigOddAvg':0, 'smallOddAvg':0, }



result = {}
for index, row in df.iterrows():
    home_team = row['HomeTeam']
    away_team = row['AwayTeam']
    
    match_result = row['FTR']
    avg_h = row['AvgH']
    avg_d = row['AvgD']
    avg_a = row['AvgA']
    avg_b = row['Avg>2.5']
    avg_s = row['Avg<2.5']
    
    # 对胜平负的赔率进行排序
    sorted_indices = sorted(['H', 'D', 'A'], key=lambda i: [avg_h, avg_d, avg_a][['H', 'D', 'A'].index(i)])
    
    # 如果队伍是第一次出现在比赛中，在result中加入该队
    if home_team not in result:
        result[home_team] = init_team_dic(home_team)
    if away_team not in result:
        result[away_team] = init_team_dic(away_team)
        
    
    # 记录该场比赛的日期、得分    
    result[home_team]['date'].append(row['Date'])
    result[away_team]['date'].append(row['Date'])
    result[home_team]['score'].append(result[home_team]['score'][-1] + (3 if match_result == 'H' else(1 if match_result == 'D' else 0)))
    result[away_team]['score'].append(result[away_team]['score'][-1] + (3 if match_result == 'A' else(1 if match_result == 'D' else 0)))
    result[home_team]['totalGames'] += 1
    result[away_team]['totalGames'] += 1
    
    # 记录该场比赛的信息
    # number表示该队已经比过的场次
    home_detail = {'number':len(result[home_team]['date']), 'opponent': away_team, 'homeAway': 'Home', 'avg_win':avg_h, 'avg_draw': avg_d,
                   'avg_loss': avg_a, 'HS': row['HS'], 'HST': row['HST']}
    away_detail = {'number':len(result[away_team]['date']), 'opponent': home_team, 'homeAway': 'Away', 'avg_win':avg_a, 'avg_draw': avg_d,
                   'avg_loss': avg_h, 'AS': row['AS'], 'AST': row['AST']}
    
    # 记录射门、射正、进球、净胜球、胜平负大小球赔率
    result[home_team]['totalShot'] += row['HS']
    result[away_team]['totalShot'] += row['AS']
    result[home_team]['totalShotTarget'] += row['HST']
    result[away_team]['totalShotTarget'] += row['AST']
    result[home_team]['totalGoal'] += row['FTHG']
    result[away_team]['totalGoal'] += row['FTAG']
    result[home_team]['totalGoalDetail'].append(row['FTHG'])
    result[away_team]['totalGoalDetail'].append(row['FTAG'])
    result[home_team]['goalDifference'] += row['FTHG'] - row['FTAG']
    result[away_team]['goalDifference'] += row['FTAG'] - row['FTHG']
    result[home_team]['goalDifferenceDetail'].append(row['FTHG'] - row['FTAG'])
    result[away_team]['goalDifferenceDetail'].append(row['FTAG'] - row['FTHG'])
    result[home_team]['winOddAvg'] += avg_h
    result[home_team]['drawOddAvg'] += avg_d
    result[home_team]['loseOddAvg'] += avg_a
    result[away_team]['winOddAvg'] += avg_a
    result[away_team]['drawOddAvg'] += avg_d
    result[away_team]['loseOddAvg'] += avg_h
    result[home_team]['bigOddAvg'] += avg_b
    result[home_team]['smallOddAvg'] += avg_s
    result[away_team]['bigOddAvg'] += avg_b
    result[away_team]['smallOddAvg'] += avg_s
    
    # 判断赛果是否符合赔率预期
    if sorted_indices.index(match_result) == 0:
        result[home_team]['expected'] += 1
        result[away_team]['expected'] += 1
    elif sorted_indices.index(match_result) == 1:
        result[home_team]['middle'] += 1
        result[away_team]['middle'] += 1
    elif sorted_indices.index(match_result) == 2:
        result[home_team]['opposite'] += 1
        result[away_team]['opposite'] += 1
        

    # 判断两队赛果，并把比赛细节放入相应的赛果数组    
    if match_result == 'H':
        result[home_team]['wins'] += 1
        result[home_team]['winDetails'].append(home_detail)
        result[away_team]['losses'] += 1
        result[away_team]['lossDetails'].append(away_detail)
    elif match_result == 'A':
        result[home_team]['losses'] += 1
        result[home_team]['lossDetails'].append(home_detail)
        result[away_team]['wins'] += 1
        result[away_team]['winDetails'].append(away_detail)
    else:
        result[home_team]['draws'] += 1
        result[home_team]['drawDetails'].append(home_detail)
        result[away_team]['draws'] += 1
        result[away_team]['drawDetails'].append(away_detail)
    
# 把38场平均赔率的和求平均   
for k in result.keys():
    result[k]['winOddAvg'] /= result[k]['totalGames']
    result[k]['drawOddAvg'] /= result[k]['totalGames']
    result[k]['loseOddAvg'] /= result[k]['totalGames']
    result[k]['bigOddAvg'] /= result[k]['totalGames']
    result[k]['smallOddAvg'] /= result[k]['totalGames']

# 
# print(result)
# for k in result.keys():
#     print(result[k])
#     break
# print(result[result.keys()[0]])
# data = list(result.values())


In [2]:
teams = result.keys()
print(teams)

dict_keys(['Crystal Palace', 'Arsenal', 'Fulham', 'Liverpool', 'Bournemouth', 'Aston Villa', 'Leeds', 'Wolves', 'Newcastle', "Nott'm Forest", 'Tottenham', 'Southampton', 'Everton', 'Chelsea', 'Leicester', 'Brentford', 'Man United', 'Brighton', 'West Ham', 'Man City'])


In [3]:
import json
json_path = r'D:\桌面\曾经的作业\2023 数据可视化\final\data.json'
with open(json_path, 'w', encoding='utf-8') as f:
    json.dump(result, f, ensure_ascii=False, indent=4)