In [91]:
import pandas as pd
from dotenv import load_dotenv
from os import getenv
from sqlalchemy import create_engine
from sqlalchemy import Integer, String, Numeric, SmallInteger

In [92]:
load_dotenv()

DB_CONFIG = {
    'user': getenv('DB_ROOT'),
    'password': getenv('DB_ROOT_PASSWORD'),
    'host': getenv('DB_HOST'),
    'database': getenv('DB_NAME'),
    'port': getenv('DB_PORT')
}

engine = create_engine(f"postgresql+psycopg2://{DB_CONFIG['user']}:{DB_CONFIG['password']}@{DB_CONFIG['host']}:{DB_CONFIG['port']}/{DB_CONFIG['database']}")

In [None]:
average_age = pd.read_json('../analysis/average_age/average_age_per_team.json')
average_age = average_age.rename(columns={
    'TeamID': 'team_id',
    'Year': 'year',
    'AverageAge': 'average_age'
})

average_points = pd.read_json('../analysis/average_points/average_points_per_team.json')
average_points = average_points.rename(columns={
    'TeamID': 'team_id',
    'TeamName': 'team_name',
    'AveragePoints': 'average_points'
})

club_info = pd.read_json('../analysis/club_info/club_info.json')
club_info = club_info.rename(columns={
    'TeamID': 'team_id',
    'Team_name': 'team_name',
    'NumberOfCups': 'number_of_cups',
    'NationalTeamID': 'national_team_id',
    'ImageLink': 'image_link'
})

club_titles = pd.read_json('../analysis/club_titles/club_titles.json')
club_titles = club_titles.rename(columns={
    'TeamID': 'team_id',
    'Year': 'year',
    'NumberOfTitlesThisYear': 'number_of_titles_this_year'
})

country_info = pd.read_json('../analysis/country_info/country_info.json')
country_info = country_info.rename(columns={
    'NationalTeamID': 'national_team_id',
    'NationalTeamName': 'national_team_name',
    'ClubIDs': 'team_ids'
})

legionnaires_per_team = pd.read_json('../analysis/legionnaires/legionnaires_per_team.json')
legionnaires_per_team = legionnaires_per_team.rename(columns={
    'TeamID': 'team_id',
    'Year': 'year',
    'Legioners': 'legionnaires'
})

clubs_and_national_players = pd.read_json('../analysis/players_in_national_teams/clubs_and_national_players.json')
clubs_and_national_players = clubs_and_national_players.rename(columns={
    'TeamID': 'team_id',
    'Year': 'year',
    'PlayersInNationalTeam': 'players_in_national_team'
})

team_size_ratio = pd.read_json('../analysis/team_size_ratio/team_size_ratio.json')
team_size_ratio = team_size_ratio.rename(columns={
    'TeamID': 'team_id',
    'Year': 'year',
    'TeamSizeRatio': 'team_size_ratio'
})
 
total_team_cost = pd.read_json('../analysis/total_team_cost/total_team_cost.json')
total_team_cost = total_team_cost.rename(columns={
    'TeamID': 'team_id',
    'Year': 'year',
    'TeamCost': 'team_cost'
})

transfer_balance = pd.read_json('../analysis/transfer_balance/transfer_balance.json')
transfer_balance = transfer_balance.rename(columns={
    'TeamID': 'team_id',
    'Year': 'year',
    'TransferBalance': 'transfer_balance'
})

```sql

teams (
    team_id INT PRIMARY KEY,
    team_name TEXT NOT NULL,
    average_points DECIMAL(3,2)[],
    number_of_cups INT,
    image_link TEXT,
    national_team_id INT REFERENCES national_teams(national_team_id)
);


national_teams (
    national_team_id INT PRIMARY KEY,
    national_team_name TEXT NOT NULL
);

team_yearly_stats (
    team_id INT REFERENCES teams(team_id),
    year INT,
    average_age DECIMAL(3,1),    
    number_of_titles_this_year INT,
    team_cost INT,
    team_size_ratio DECIMAL(4,2),
    players_in_national_team INT,
    legionnaires INT,
    transfer_balance INT,
    PRIMARY KEY (team_id, year)
);

In [99]:
national_teams = country_info.copy(deep=True)
national_teams = national_teams.drop(['team_ids'], axis=1)
national_teams.head()


Unnamed: 0,national_team_id,national_team_name
0,3262,Германия
1,3299,Англия
2,3300,Португалия
3,3375,Испания
4,3376,Италия


In [108]:
teams = pd.merge(club_info, average_points, how='right', on=['team_id', 'team_name'])
teams.head()

Unnamed: 0,team_id,team_name,number_of_cups,national_team_id,image_link,average_points
0,3,Кёльн,13.0,3262.0,https://tmssl.akamaized.net//images/wappen/hea...,"[1.24, 1.28, 1.49, 0.79, 1.83, 1.08, 1.08, 1.5..."
1,5,Милан,53.0,3376.0,https://tmssl.akamaized.net//images/wappen/hea...,"[1.38, 1.67, 1.68, 1.81, 1.73, 1.76, 2.02, 2.0..."
2,11,Арсенал,48.0,3299.0,https://tmssl.akamaized.net//images/wappen/hea...,"[2.07, 1.78, 2.05, 1.77, 1.95, 1.69, 1.78, 1.8..."
3,12,Рома,17.0,3376.0,https://tmssl.akamaized.net//images/wappen/hea...,"[1.66, 1.83, 2.13, 1.9, 1.69, 1.76, 1.77, 1.8,..."
4,13,Атлетико Мадрид,33.0,3375.0,https://tmssl.akamaized.net//images/wappen/hea...,"[1.91, 2.14, 2.03, 2.0, 2.02, 1.78, 2.04, 1.69..."


In [119]:
team_yearly_stats = pd.merge(average_age, club_titles, how='right', on=['team_id', 'year'])
team_yearly_stats.head()

team_yearly_stats = pd.merge(total_team_cost, team_yearly_stats, how='right', on=['team_id', 'year'])
team_yearly_stats.head()

team_yearly_stats = pd.merge(team_size_ratio, team_yearly_stats, how='right', on=['team_id', 'year'])
team_yearly_stats.head()

team_yearly_stats = pd.merge(team_yearly_stats, clubs_and_national_players, how='right', on=['team_id', 'year'])
team_yearly_stats.head()

team_yearly_stats = pd.merge(legionnaires_per_team, team_yearly_stats, how='right', on=['team_id', 'year'])
team_yearly_stats.head()

team_yearly_stats = pd.merge(transfer_balance, team_yearly_stats, how='right', on=['team_id', 'year'])

team_yearly_stats['team_size_ratio'] = team_yearly_stats['team_size_ratio'].round(2)
team_yearly_stats

Unnamed: 0,team_id,year,transfer_balance,legionnaires,team_size_ratio,team_cost,average_age,number_of_titles_this_year,players_in_national_team
0,3,2014,-8300.0,22,,50750.0,25.8,0.0,9
1,3,2015,-1730.0,17,0.93,68630.0,25.4,0.0,8
2,3,2016,10650.0,23,1.20,116150.0,25.8,0.0,5
3,3,2017,-32550.0,28,1.27,91730.0,24.9,2.0,4
4,3,2018,29900.0,20,0.89,83500.0,25.3,2.0,3
...,...,...,...,...,...,...,...,...,...
5891,98841,2020,,0,,,,0.0,0
5892,98841,2021,,3,,,,0.0,0
5893,98841,2022,,7,,,,1.0,0
5894,98841,2023,,16,,,,2.0,1


In [None]:
from sqlalchemy import Integer, String, Numeric, SmallInteger, Text, DECIMAL
from sqlalchemy.dialects.postgresql import ARRAY

In [114]:
national_teams.to_sql('national_teams', 
    engine, 
    dtype={
        'national_team_id': Integer(),
        'national_team_name': String(100)
    },
    if_exists='append',
    index=False
)

51

In [115]:
teams.to_sql('teams', 
    engine, 
    dtype={
        'team_id': Integer(),
        'team_name': String(100),
        'average_points': ARRAY(DECIMAL(3, 2)),
        'number_of_cups': Integer(),
        'image_link': Text(),
        'national_team_id': Integer()
    },
    if_exists='append',
    index=False
)

536

In [122]:
team_yearly_stats.to_sql('team_yearly_stats', 
    engine, 
    dtype={
        'team_id': Integer(),
        'year': Integer(),
        'average_age': DECIMAL(3, 1),
        'number_of_titles_this_year': Integer(),
        'team_cost': Integer(),
        'team_size_ratio': DECIMAL(4, 2),
        'players_in_national_teams': Integer(),
        'legionnaires': Integer(),
        'transfer_balance': Integer()
    },
    if_exists='append',
    index=False
)

896