## Imports and Defines

In [1]:
! pip install -q gdown

import sqlite3
import os
import pandas as pd
import gdown
import warnings

warnings.filterwarnings('ignore')

In [2]:
db_name = 'database.sqlite'
db_folder = 'src'
if not os.path.exists(db_folder):
    os.mkdir(db_folder)

db_path = os.path.join(db_folder, db_name)

if not os.path.exists(db_path):
    url_db = f'https://drive.google.com/uc?id=1Ez76povTEsoi9ZJj0I8swdczV4F3GQiF'
    gdown.download(url_db, db_path)

con = sqlite3.connect(db_path)


def req(query, connect=con):
    return pd.read_sql_query(query, connect)

## In Process

Посмотрим, какие таблицы есть в Базе Данных

In [3]:
req('''SELECT name FROM sqlite_schema WHERE type='table' AND name NOT LIKE 'sqlite_%';''')

Unnamed: 0,name
0,Player_Attributes
1,Player
2,Match
3,League
4,Country
5,Team
6,Team_Attributes


In [4]:
players = req('''
SELECT Player.player_api_id,
       player_name,
       birthday,
       date,
       overall_rating,
       potential,
       preferred_foot,
       attacking_work_rate,
       defensive_work_rate,
       crossing,
       finishing,
       heading_accuracy,
       short_passing,
       volleys,
       dribbling,
       curve,
       free_kick_accuracy,
       long_passing,
       ball_control,
       acceleration,
       sprint_speed,
       agility,
       reactions,
       balance,
       shot_power,
       jumping,
       stamina,
       strength,
       long_shots,
       aggression,
       interceptions,
       positioning,
       vision,
       penalties,
       marking,
       standing_tackle,
       sliding_tackle,
       gk_diving,
       gk_handling,
       gk_kicking,
       gk_positioning,
       gk_reflexes
FROM Player_Attributes
         LEFT JOIN Player ON Player_Attributes.player_api_id = Player.player_api_id;
''')
players.to_csv('./src/players.csv', index=False)
players.head(5)

Unnamed: 0,player_api_id,player_name,birthday,date,overall_rating,potential,preferred_foot,attacking_work_rate,defensive_work_rate,crossing,...,vision,penalties,marking,standing_tackle,sliding_tackle,gk_diving,gk_handling,gk_kicking,gk_positioning,gk_reflexes
0,505942,Aaron Appindangoye,1992-02-29 00:00:00,2016-02-18 00:00:00,67.0,71.0,right,medium,medium,49.0,...,54.0,48.0,65.0,69.0,69.0,6.0,11.0,10.0,8.0,8.0
1,505942,Aaron Appindangoye,1992-02-29 00:00:00,2015-11-19 00:00:00,67.0,71.0,right,medium,medium,49.0,...,54.0,48.0,65.0,69.0,69.0,6.0,11.0,10.0,8.0,8.0
2,505942,Aaron Appindangoye,1992-02-29 00:00:00,2015-09-21 00:00:00,62.0,66.0,right,medium,medium,49.0,...,54.0,48.0,65.0,66.0,69.0,6.0,11.0,10.0,8.0,8.0
3,505942,Aaron Appindangoye,1992-02-29 00:00:00,2015-03-20 00:00:00,61.0,65.0,right,medium,medium,48.0,...,53.0,47.0,62.0,63.0,66.0,5.0,10.0,9.0,7.0,7.0
4,505942,Aaron Appindangoye,1992-02-29 00:00:00,2007-02-22 00:00:00,61.0,65.0,right,medium,medium,48.0,...,53.0,47.0,62.0,63.0,66.0,5.0,10.0,9.0,7.0,7.0


In [9]:
teams = req('''
SELECT *
FROM Team_Attributes
         LEFT JOIN Team USING (team_api_id);
''')
teams.to_csv('./src/teams.csv', index=False)
teams.info(5)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1458 entries, 0 to 1457
Data columns (total 29 columns):
 #   Column                          Non-Null Count  Dtype  
---  ------                          --------------  -----  
 0   id                              1458 non-null   int64  
 1   team_fifa_api_id                1458 non-null   int64  
 2   team_api_id                     1458 non-null   int64  
 3   date                            1458 non-null   object 
 4   buildUpPlaySpeed                1458 non-null   int64  
 5   buildUpPlaySpeedClass           1458 non-null   object 
 6   buildUpPlayDribbling            489 non-null    float64
 7   buildUpPlayDribblingClass       1458 non-null   object 
 8   buildUpPlayPassing              1458 non-null   int64  
 9   buildUpPlayPassingClass         1458 non-null   object 
 10  buildUpPlayPositioningClass     1458 non-null   object 
 11  chanceCreationPassing           1458 non-null   int64  
 12  chanceCreationPassingClass      14

In [6]:
matches = req('''
SELECT Match.id,
       Country.name AS country_name,
       League.name  AS league_name,
       season,
       stage,
       date,
       card,
       match_api_id,
       home_team_api_id,
       away_team_api_id,
       home_team_goal,
       away_team_goal,
       home_player_1,
       home_player_2,
       home_player_3,
       home_player_4,
       home_player_5,
       home_player_6,
       home_player_7,
       home_player_8,
       home_player_9,
       home_player_10,
       home_player_11,
       away_player_1,
       away_player_2,
       away_player_3,
       away_player_4,
       away_player_5,
       away_player_6,
       away_player_7,
       away_player_8,
       away_player_9,
       away_player_10,
       away_player_11,
       B365H,
       B365D,
       B365A,
       BWH,
       BWD,
       BWA,
       IWH,
       IWD,
       IWA,
       LBH,
       LBD,
       LBA,
       PSH,
       PSD,
       PSA,
       WHH,
       WHD,
       WHA,
       SJH,
       SJD,
       SJA,
       VCH,
       VCD,
       VCA,
       GBH,
       GBD,
       GBA,
       BSH,
       BSD,
       BSA
FROM Match
         JOIN Country ON Match.country_id = Country.id
         JOIN League ON Country.id = League.country_id
''')
matches.to_csv('./src/matches.csv', index=False)
matches.head(5)

Unnamed: 0,id,country_name,league_name,season,stage,date,card,match_api_id,home_team_api_id,away_team_api_id,...,SJA,VCH,VCD,VCA,GBH,GBD,GBA,BSH,BSD,BSA
0,1,Belgium,Belgium Jupiler League,2008/2009,1,2008-08-17 00:00:00,,492473,9987,9993,...,4.0,1.65,3.4,4.5,1.78,3.25,4.0,1.73,3.4,4.2
1,2,Belgium,Belgium Jupiler League,2008/2009,1,2008-08-16 00:00:00,,492474,10000,9994,...,3.8,2.0,3.25,3.25,1.85,3.25,3.75,1.91,3.25,3.6
2,3,Belgium,Belgium Jupiler League,2008/2009,1,2008-08-16 00:00:00,,492475,9984,8635,...,2.5,2.35,3.25,2.65,2.5,3.2,2.5,2.3,3.2,2.75
3,4,Belgium,Belgium Jupiler League,2008/2009,1,2008-08-17 00:00:00,,492476,9991,9998,...,7.5,1.45,3.75,6.5,1.5,3.75,5.5,1.44,3.75,6.5
4,5,Belgium,Belgium Jupiler League,2008/2009,1,2008-08-16 00:00:00,,492477,7947,9985,...,1.73,4.5,3.4,1.65,4.5,3.5,1.65,4.75,3.3,1.67
