In [16]:
# Dependencies
import sys 
import os
import psycopg2
# ----------------------------------
# Run my other notebook so I can use my datafames/my variables
%run mls_data.ipynb 
%run mls_salary_data.ipynb
# Imports the method used for connecting to DBs
from sqlalchemy import create_engine
import datetime
# Allow us to declare column types
from sqlalchemy import Column, Integer, String, Float, Time, ForeignKey, Numeric
from sqlalchemy.orm import relationship
from sqlalchemy.ext.declarative import declarative_base
Base = declarative_base()

# Session is a temporary binding to our DB
from sqlalchemy.orm import Session
import numpy
from psycopg2.extensions import register_adapter, AsIs


In [17]:
engine = create_engine('postgresql+psycopg2://postgres:postgres@localhost/mls_db')
# Establish Connection
conn = engine.connect()

In [18]:
# clear out the db
Base.metadata.drop_all(engine)

In [19]:
# Define tables: 
# For all possible integers that might be used for calculations in sequel, I'd suggest making them into floats
# SQL will have issues creating decimal values without the use of extra functions
# Table names, columns, and schemas should all be lowercase to prevent the required usage of " " around column names, table, schemas will cause overall issues
# All foreign keys must be addressed in primary key table and named accordingly with other tables

# Club	Conference	Ranking	Matches Played	Wins	Draws	Losses	Total Points
class League(Base):
    __tablename__ = 'league'
    id = Column(Integer, primary_key=True, index=True, nullable= False)
    club = Column(String(30), nullable= False)
    conference = Column(String(30), nullable= False)
    ranking = Column(Integer, nullable= False)
    matches_played = Column(Float, nullable= False)
    wins = Column(Float, nullable= False)
    draws = Column(Integer, nullable= False)
    losses = Column(Float, nullable= False)
    total_points = Column(Float, nullable= False)
    def __repr__(self): 
        return "League(id ={self.id},"\
    "club = '{self.club}',"\
    "conference = '{self.conference}',"\
    "ranking = {self.ranking},"\
    "matches_played = {self.matches_played},"\
    "wins = {self.wins},"\
    "draws = {self.draws},"\
    "losses = {self.losses},"\
    "total_points = {self.total_points})".format(self=self)
    
# Goals For	Goals Against	Goal Differential	Total Points
class Goals(Base):
    __tablename__ = 'goals'
    id = Column(Integer, primary_key=True, index=True, nullable= False)
    goals_for = Column(Float, nullable= False)
    goals_against = Column(Float, nullable= False)
    goal_differential = Column(Float, nullable= False)
    def __repr__(self): 
        return "Goals(id ={self.id},"\
    "goals_for = {self.goals_for},"\
    "goals_against = {self.goals_against},"\
    "goal_differential = {self.goal_differential})".format(self=self)
    
# Expected Goals	Expected Goals Allowed	Expected Goals Differiential	Expected Goals Differiential/ 90 (mins)  
class Expected(Base):
    __tablename__ = 'expected'
    id = Column(Integer, primary_key=True, index=True, nullable= False)
    expected_goals = Column(Float, nullable= False)
    expected_allowed = Column(Float, nullable= False)
    expected_goals_differiential = Column(Float, nullable= False)
    expected_goals_differiential_mins = Column(Float, nullable= False)
    def __repr__(self):
        return "Expected(id ={self.id},"\
    "expected_goals = {self.expected_goals},"\
    "expected_allowed = {self.expected_allowed},"\
    "expected_goals_differiential = {self.expected_goals_differiential},"\
    "expected_goals_differiential_mins = {self.expected_goals_differiential_mins})".format(self=self)
    
# Spectators	Top Team Scorer	Goalkeeper
class Mvp(Base):
    __tablename__ = 'mvp'
    id = Column(Integer, primary_key=True, index=True, nullable= False)
    spectators = Column(Float, nullable= False)
    top_team_scorer = Column(String(50), nullable= False)
    goalkeeper = Column(String(50), nullable= False)
    def __repr__(self):
        return "Mvp(id ={self.id},"\
    "spectators = {self.spectators},"\
    "top_team_Scorer = '{self.top_team_scorer}',"\
    "goalkeeper = '{self.goalkeeper}')".format(self=self)   
    
# Create teams and salary classes for tables in sql
class Teams(Base):
    __tablename__ = 'team_names'
    id = Column(Integer, primary_key=True, index=True, nullable= False)
    club_id = Column(Integer, primary_key=True, index=True, nullable= False)
    club = Column(String(30), nullable= False)
    def __repr__(self):
        return "Teams(id ={self.id},"\
    "club_id = '{self.club_id}',"\
    "club = '{self.club}')".format(self=self) 
    
class Salary(Base):
    __tablename__ = 'salaries'
    id = Column(Integer, primary_key=True, nullable= False)
    club_id = Column(Integer, primary_key=True, index=True, nullable= False)
    first = Column(String(30))
    last = Column(String(30))
    club = Column(String(30), nullable= False)
    position = Column(String(30))
    base_salary = Column(Float, nullable= False)
    guaranteed_compensation = Column(Float, nullable= False)
    def __repr__(self):
        return "Salary(id ={self.id},"\
    "club_id={self.club_id},"\
    "first = '{self.first}',"\
    "last = '{self.last}',"\
    "club = '{self.club}',"\
    "position = '{self.position}',"\
    "base_salary = {self.base_salary},"\
    "guaranteed_compensation = {self.guaranteed_compensation})".format(self=self)   
    

In [20]:
# Create Session
# ----------------------------------
session = Session(bind=engine)

In [21]:
# Create tables within the database
Base.metadata.create_all(conn)

In [22]:
# # Create a Session Object to Connect to DB:
# The issue with function is my lack of expertise, the errors are less likely to show to fix my classes and mapping. 
# def run_my_sesions():
#     # Create Session
#     # ----------------------------------
#     session = Session(bind=engine)
#     try:
#         session.bulk_insert_mappings(League, league_df.T.to_dict(orient="records"))
#         session.bulk_insert_mappings(Goals, actual_stats_df.T.to_dict(orient="records"))
#         session.bulk_insert_mappings(Expected, expected_stats_df.T.to_dict(orient="records"))
#         session.bulk_insert_mappings(MVP, mvp_df.T.to_dict(orient="records"))
#         session.commit()
#     except:
#         session.rollback()
#         raise
#     finally:
#         session.close()
# run_my_sesions()

In [23]:
for row in league_df.iterrows():
    league = League(id= row[0], club = row[1]['club'], conference = row[1]['conference'], ranking = row[1]['ranking'],
                     matches_played = row[1]['matches_played'], wins = row[1]['wins'], draws = row[1]['draws'], losses = row[1]['losses'], total_points = row[1]['total_points'])
    session.add(league)
    print(league)

League(id =14,club = 'New York City FC',conference = 'Eastern',ranking = 1,matches_played = 34,wins = 18,draws = 10,losses = 6,total_points = 64)
League(id =1,club = 'Atlanta United',conference = 'Eastern',ranking = 2,matches_played = 34,wins = 18,draws = 4,losses = 12,total_points = 58)
League(id =17,club = 'Philadelphia Union',conference = 'Eastern',ranking = 3,matches_played = 34,wins = 16,draws = 7,losses = 11,total_points = 55)
League(id =23,club = 'Toronto FC',conference = 'Eastern',ranking = 4,matches_played = 34,wins = 13,draws = 11,losses = 10,total_points = 50)
League(id =5,club = 'DC United',conference = 'Eastern',ranking = 5,matches_played = 34,wins = 13,draws = 11,losses = 10,total_points = 50)
League(id =15,club = 'New York Red Bulls',conference = 'Eastern',ranking = 6,matches_played = 34,wins = 14,draws = 6,losses = 14,total_points = 48)
League(id =13,club = 'New England Revolution',conference = 'Eastern',ranking = 7,matches_played = 34,wins = 11,draws = 12,losses = 11,t

In [24]:
def adapt_numpy_int64(numpy_int64):
    """ Adapting numpy.int64 type to SQL-conform int type using psycopg extension, see [1]_ for more info.
    References
    ----------
    .. [1] http://initd.org/psycopg/docs/advanced.html#adapting-new-python-types-to-sql-syntax
    """
    return AsIs(numpy_int64)

register_adapter(numpy.int64, adapt_numpy_int64) 

#Goals For	Goals Against	Goal Differential	Total Points	Goals ID
for row in actual_stats_df.iterrows():
    goals = Goals(id= row[0], goals_for = row[1]['goals_for'], 
    goals_against = row[1]['goals_against'], goal_differential = row[1]['goal_differential']) 
    session.add(goals)
    register_adapter(numpy.int64, adapt_numpy_int64) 
    print(goals)
# ClubID = row[0],

Goals(id =14,goals_for = 63,goals_against = 42,goal_differential = 21)
Goals(id =1,goals_for = 58,goals_against = 43,goal_differential = 15)
Goals(id =17,goals_for = 58,goals_against = 50,goal_differential = 8)
Goals(id =23,goals_for = 57,goals_against = 52,goal_differential = 5)
Goals(id =5,goals_for = 42,goals_against = 38,goal_differential = 4)
Goals(id =15,goals_for = 53,goals_against = 51,goal_differential = 2)
Goals(id =13,goals_for = 50,goals_against = 57,goal_differential = -7)
Goals(id =2,goals_for = 55,goals_against = 47,goal_differential = 8)
Goals(id =12,goals_for = 47,goals_against = 60,goal_differential = -13)
Goals(id =4,goals_for = 39,goals_against = 47,goal_differential = -8)
Goals(id =16,goals_for = 44,goals_against = 52,goal_differential = -8)
Goals(id =6,goals_for = 31,goals_against = 75,goal_differential = -44)
Goals(id =10,goals_for = 85,goals_against = 37,goal_differential = 48)
Goals(id =21,goals_for = 52,goals_against = 49,goal_differential = 3)
Goals(id =19,go

In [25]:
# Expected Goals	Expected Goals Allowed	Expected Goals Differiential	Expected Goals Differiential/ 90 (mins)
for row in expected_stats_df.iterrows():
    expected = Expected(id= row[0], expected_goals = row[1]['expected_goals'], expected_allowed = row[1]['expected_goals_allowed'],
    expected_goals_differiential = row[1]['expected_goals_differiential'], expected_goals_differiential_mins = row[1]['expected_goals_differiential_mins']) 
    session.add(expected)
    print(expected)

Expected(id =14,expected_goals = 51.8,expected_allowed = 44.7,expected_goals_differiential = 7.1,expected_goals_differiential_mins = 0.22)
Expected(id =1,expected_goals = 56.6,expected_allowed = 42.4,expected_goals_differiential = 14.2,expected_goals_differiential_mins = 0.42)
Expected(id =17,expected_goals = 54.9,expected_allowed = 41.8,expected_goals_differiential = 13.1,expected_goals_differiential_mins = 0.39)
Expected(id =23,expected_goals = 48.0,expected_allowed = 49.3,expected_goals_differiential = -1.3,expected_goals_differiential_mins = -0.04)
Expected(id =5,expected_goals = 37.8,expected_allowed = 48.1,expected_goals_differiential = -10.3,expected_goals_differiential_mins = -0.3)
Expected(id =15,expected_goals = 49.9,expected_allowed = 49.7,expected_goals_differiential = 0.2,expected_goals_differiential_mins = 0.01)
Expected(id =13,expected_goals = 44.8,expected_allowed = 54.9,expected_goals_differiential = -10.2,expected_goals_differiential_mins = -0.3)
Expected(id =2,expect

In [26]:
# Spectators	Top Team Scorer	Goalkeeper
for row in mvp_df.iterrows(): 
    mvp = Mvp(id= row[0], spectators = row[1]['spectators'], top_team_scorer = row[1]['top_team_scorer'],
    goalkeeper = row[1]['goalkeeper']) 
    session.add(mvp)
    print(mvp)

Mvp(id =14,spectators = 21107,top_team_Scorer = 'Héber - 15',goalkeeper = 'Sean Johnson')
Mvp(id =1,spectators = 52510,top_team_Scorer = 'Josef Martínez - 27',goalkeeper = 'Brad Guzan')
Mvp(id =17,spectators = 17111,top_team_Scorer = 'Kacper Przybyłko - 15',goalkeeper = 'Andre Blake')
Mvp(id =23,spectators = 25048,top_team_Scorer = 'Alejandro Pozuelo - 12',goalkeeper = 'Quentin Westberg')
Mvp(id =5,spectators = 17744,top_team_Scorer = 'Wayne Rooney - 11',goalkeeper = 'Bill Hamid')
Mvp(id =15,spectators = 17751,top_team_Scorer = 'Daniel Royer - 11',goalkeeper = 'Luis Robles')
Mvp(id =13,spectators = 16737,top_team_Scorer = 'Carles Gil - 10',goalkeeper = 'Matt Turner')
Mvp(id =2,spectators = 12324,top_team_Scorer = 'C.J. Sapong - 13',goalkeeper = 'Kenneth Kronholm')
Mvp(id =12,spectators = 16171,top_team_Scorer = 'Saphir Taïder - 9',goalkeeper = 'Evan Bush')
Mvp(id =4,spectators = 14856,top_team_Scorer = 'Gyasi Zardes - 13',goalkeeper = 'Zack Steffen')
Mvp(id =16,spectators = 22761,top_t

In [27]:
for club in soccer_clubs.iterrows(): 
    teams = Teams(id=club[1][0], club_id=[1][0], club=club[1][1])
    print(teams)
    session.add(teams) 

Teams(id =1,club_id = '1',club = 'Atlanta United')
Teams(id =2,club_id = '1',club = 'Chicago Fire')
Teams(id =3,club_id = '1',club = 'Colorado Rapids')
Teams(id =4,club_id = '1',club = 'Columbus Crew')
Teams(id =5,club_id = '1',club = 'DC United')
Teams(id =6,club_id = '1',club = 'FC Cincinnati')
Teams(id =7,club_id = '1',club = 'FC Dallas')
Teams(id =8,club_id = '1',club = 'Houston Dynamo')
Teams(id =9,club_id = '1',club = 'LA Galaxy')
Teams(id =10,club_id = '1',club = 'LAFC')
Teams(id =11,club_id = '1',club = 'Minnesota United')
Teams(id =12,club_id = '1',club = 'Montreal Impact')
Teams(id =13,club_id = '1',club = 'New England Revolution')
Teams(id =14,club_id = '1',club = 'New York City FC')
Teams(id =15,club_id = '1',club = 'New York Red Bulls')
Teams(id =16,club_id = '1',club = 'Orlando City SC')
Teams(id =17,club_id = '1',club = 'Philadelphia Union')
Teams(id =18,club_id = '1',club = 'Portland Timbers')
Teams(id =19,club_id = '1',club = 'Real Salt Lake')
Teams(id =20,club_id = '1

In [28]:
for row in df.iterrows():
    salaries = Salary(id= row[0], club_id=row[0],first=row[1][1],last=row[1][2],club=row[1][3],position=row[1][4],base_salary=row[1][5],guaranteed_compensation=row[1][6])
    print(salaries)
    session.add(salaries)

Salary(id =0,club_id=0,first = 'us USA',last = 'MF,FW',club = 'Philadelphia',position = '18.0',base_salary = 2000.0,guaranteed_compensation = 28.0)
Salary(id =1,club_id=1,first = 'us USA',last = 'DF',club = 'Seattle',position = '27.0',base_salary = 1991.0,guaranteed_compensation = 18.0)
Salary(id =2,club_id=2,first = 'gh GHA',last = 'DF',club = 'Colorado',position = '24.0',base_salary = 1994.0,guaranteed_compensation = 22.0)
Salary(id =3,club_id=3,first = 'gh GHA',last = 'DF',club = 'Columbus',position = '24.0',base_salary = 1994.0,guaranteed_compensation = 1.0)
Salary(id =4,club_id=4,first = 'gh GHA',last = 'FW',club = 'Philadelphia',position = '28.0',base_salary = 1990.0,guaranteed_compensation = 8.0)
Salary(id =5,club_id=5,first = 'gh GHA',last = 'FW,MF',club = 'Columbus',position = '28.0',base_salary = 1990.0,guaranteed_compensation = 15.0)
Salary(id =6,club_id=6,first = 'hn HON',last = 'MF',club = 'FC Dallas',position = '25.0',base_salary = 1993.0,guaranteed_compensation = 26.0)
S

In [29]:
# Commit all of the tables at once then close: 
session.commit()
