**Install python dependencies**

In [1]:
!pip install -q -r ./dependencies/requirements.txt

**Load python libraries**

In [2]:
from sqlalchemy import create_engine
from os import getenv
import requests
import pandas as pd
from statistics import median
from datetime import datetime
import smart_match


**Connect to database**

In [4]:
db_name = getenv("POSTGRES_DB")
db_user = getenv("POSTGRES_USER")
db_pass = getenv("POSTGRES_PASSWORD")
db_host = 'localhost'
db_port = '5432'
db_string = 'postgresql://{}:{}@{}:{}/{}'.format(db_user, db_pass, db_host, db_port, db_name)
db = create_engine(db_string)

Engine(postgresql://top_manager:***@localhost:5432/fm_analytics)

In [5]:
ODDS_API_URL = "https://api.the-odds-api.com/v3/odds/"
ODDS_API_KEY = getenv("ODDS_API_KEY")

print(ODDS_API_KEY)

7dd9ac1458ce753bc3369a51056043f1


In [6]:
res = requests.get(ODDS_API_URL, params={
    'api_key': ODDS_API_KEY,
    'region': 'eu',
    'sport': 'soccer_germany_bundesliga'
})

odds = res.json()

odds

{'success': True,
 'data': [{'id': '51e53b31d3ae105d30c130632640c450',
   'sport_key': 'soccer_germany_bundesliga',
   'sport_nice': 'Bundesliga - Germany',
   'teams': ['Bayern Munich', 'Borussia Monchengladbach'],
   'commence_time': 1628879400,
   'home_team': 'Borussia Monchengladbach',
   'sites': [{'site_key': 'unibet',
     'site_nice': 'Unibet',
     'last_update': 1628694324,
     'odds': {'h2h': [1.52, 5.5, 4.7]}},
    {'site_key': 'mybookieag',
     'site_nice': 'MyBookie.ag',
     'last_update': 1628694166,
     'odds': {'h2h': [1.53, 4.9, 4.7]}},
    {'site_key': 'betclic',
     'site_nice': 'Betclic',
     'last_update': 1628694333,
     'odds': {'h2h': [1.55, 5.1, 4.82]}},
    {'site_key': 'onexbet',
     'site_nice': '1xBet',
     'last_update': 1628694516,
     'odds': {'h2h': [1.58, 5.75, 4.78]}},
    {'site_key': 'sport888',
     'site_nice': '888sport',
     'last_update': 1628694304,
     'odds': {'h2h': [1.5, 5.4, 4.6]}},
    {'site_key': 'marathonbet',
     'site

In [7]:
column_names = ["club", "odd_win", "odd_lose", "odd_draw"]
df_odds = pd.DataFrame(columns = column_names)

matches = odds['data']

for match in matches:
    club_1 = match['teams'][0]
    club_2 = match['teams'][1]

    odds_club1, odds_club2, odds_draw = [], [], []

    betting_sites = match['sites']

    for site in betting_sites:
        # key can be h2h or h2h_lay
        for key in site['odds']:
             odds_club1.append(site['odds'][key][0])
             odds_club2.append(site['odds'][key][1])
             odds_draw.append(site['odds'][key][2])

    df_odds = df_odds.append({'club': club_1, 'odd_win': median(odds_club1), 'odd_lose': median(odds_club2), 'odd_draw': median(odds_draw)}, ignore_index=True)
    df_odds = df_odds.append({'club': club_2, 'odd_win': median(odds_club2), 'odd_lose': median(odds_club1), 'odd_draw': median(odds_draw)}, ignore_index=True)

df_odds

Unnamed: 0,club,odd_win,odd_lose,odd_draw
0,Bayern Munich,1.56,5.5,4.735
1,Borussia Monchengladbach,5.5,1.56,4.735
2,Augsburg,3.25,2.155,3.705
3,TSG Hoffenheim,2.155,3.25,3.705
4,Arminia Bielefeld,2.93,2.51,3.26
5,SC Freiburg,2.51,2.93,3.26
6,Bayer Leverkusen,2.51,2.865,3.315
7,Union Berlin,2.865,2.51,3.315
8,VfL Bochum,5.725,1.575,4.29
9,VfL Wolfsburg,1.575,5.725,4.29


In [8]:
df_matchdays = pd.read_sql_table('matchdays', db)
current_timestamp = datetime.now(tz=None).isoformat()


upcoming_matchday_id = db.execute(f"""select id from (select min(number) as number from matchdays
where start > '2021-05-22T13:29:00Z') sub
inner join matchdays m on m.number = sub.number;""").fetchone()[0]

df_odds['matchday_id'] = upcoming_matchday_id

In [10]:
df_teams = pd.read_sql_table('teams', db)

df_odds['club_id'] = ''

for index_odd, odd in df_odds.iterrows():
    most_similar = {'db_club_id': '', 'similarity': 0 }
    for index_db, db_team in df_teams.iterrows():
        odd_name = odd['club']
        db_name = db_team['name']
        similarity = smart_match.similarity(db_name, odd_name)

        if most_similar['similarity'] < similarity:
            most_similar['db_club_id'] = db_team['id']
            most_similar['similarity'] = similarity

    odd['club_id'] = most_similar['db_club_id']
    df_odds.iloc[index_odd] = odd


df_odds = df_odds[['club_id', 'odd_win', 'odd_lose', 'odd_draw', 'matchday_id']]

df_odds

# df_odds.to_sql('odds', con=db, if_exists='append')


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until


KeyError: 'club'

**Old Linear Regression Code**

In [None]:
# df_player_type = df_player_type.sort_values(by=['matchday'])


# x = np.array(df_player_type['matchday']).reshape((-1, 1))
# y = np.array(df_player_type['count'])
# model = LinearRegression().fit(x, y)
# x_new = np.array(upcoming_matchday).reshape((-1, 1))
# y_pred = model.predict(x_new)
# pred_int = y_pred[0].round(0).astype(int)