In [7]:
import pandas as pd

# Read the CSV file
df = pd.read_csv("./data/matches_info_SL.csv")

# Split the "Result" column into two new columns
df[['result_end', 'result_ht']] = df['Result'].str.split(expand=True)

# Remove the parentheses from the "result_ht" column
df['result_ht'] = df['result_ht'].str.strip('()')

# Extract goals for home team and away team
df[['goals_HomeTeam', 'goals_AwayTeam']] = df['result_end'].str.split(':', expand=True)

# Save the cleaned data to a new CSV file
df.to_csv("./data/matches_info_SL_cleaned.csv", index=False)

# Display the first few rows of the cleaned data
print(df.head())

   Match ID       Home Team  HomeTeamPoints  AwayTeamPoints  \
0   3840895  BSC Young Boys               3               0   
1   3840896   FC Winterthur               1               1   
2   3840897       FC Lugano               0               3   
3   3840898     Servette FC               3               0   
4   3840899       FC Luzern               1               1   

                 Away Team     Result                      League result_end  \
0                FC Zürich  4:0 (0:0)  Credit Suisse Super League        4:0   
1            FC Basel 1893  1:1 (1:0)  Credit Suisse Super League        1:1   
2                  FC Sion  2:3 (0:1)  Credit Suisse Super League        2:3   
3       FC St. Gallen 1879  1:0 (1:0)  Credit Suisse Super League        1:0   
4  Grasshopper Club Zurich  1:1 (1:1)  Credit Suisse Super League        1:1   

  result_ht goals_HomeTeam goals_AwayTeam  
0       0:0              4              0  
1       1:0              1              1  
2       

In [9]:
# Read the CSV file
df_lineups = pd.read_csv("./data/lineups_2022_2023_SL.csv")

# Split the "Gameday" column to extract matchday, weekday, date, and time
df_lineups[['matchday', 'weekday', 'date', 'time']] = df_lineups['Gameday'].str.split('|').apply(lambda x: pd.Series([x[0].strip(), x[1].strip().split(',')[0].strip(), x[1].strip().split(',')[1].strip(), x[2].strip()]))

# Convert the "date" column to proper datetime format
df_lineups['date'] = pd.to_datetime(df_lineups['date'])

# Save the cleaned data to a new CSV file
df_lineups.to_csv("./data/lineups_2022_2023_SL_cleaned.csv", index=False)

# Display the first few rows of the cleaned data
print(df_lineups.head())

      Position               Player  Age Market Value            Club  \
0   Goalkeeper   David von Ballmoos   27       €2.50m  BSC Young Boys   
1  Centre-Back       Cédric Zesiger   24       €3.20m  BSC Young Boys   
2  Centre-Back  Fabian Lustenberger   34        €400k  BSC Young Boys   
3    Left-Back       Ulisses Garcia   26       €2.00m  BSC Young Boys   
4   Right-Back           Lewin Blum   20        €750k  BSC Young Boys   

                                   Gameday   H/A    Status  Match ID  \
0  1. Matchday | Sat, 7/16/22   |  6:00 PM  Home  Starting   3840895   
1  1. Matchday | Sat, 7/16/22   |  6:00 PM  Home  Starting   3840895   
2  1. Matchday | Sat, 7/16/22   |  6:00 PM  Home  Starting   3840895   
3  1. Matchday | Sat, 7/16/22   |  6:00 PM  Home  Starting   3840895   
4  1. Matchday | Sat, 7/16/22   |  6:00 PM  Home  Starting   3840895   

      matchday weekday       date     time  
0  1. Matchday     Sat 2022-07-16  6:00 PM  
1  1. Matchday     Sat 2022-07-16  6:0

  df_lineups['date'] = pd.to_datetime(df_lineups['date'])


In [4]:
import pandas as pd
import mysql.connector
from datetime import datetime

# Replace these placeholders with your MySQL credentials
db_config = {
    "host": "127.0.0.1",
    "user": "pbl_ra_mh",
    "password": "pblservette2024",
    "database": "transfermarkt"
}

# Establish a connection to the MySQL server
connection = mysql.connector.connect(**db_config)

# Create a cursor to execute SQL queries
cursor = connection.cursor()

# Specify the name of the table for matches_info_SL_cleaned DataFrame
matches_table_name = "matches_info_SL_cleaned"

drop_matches_table_query = f"DROP TABLE IF EXISTS `{matches_table_name}`"
cursor.execute(drop_matches_table_query)

# Create the table for matches_info_SL_cleaned DataFrame
create_matches_table_query = f"""
CREATE TABLE IF NOT EXISTS `{matches_table_name}` (
    `Match ID` INT PRIMARY KEY,
    `Home Team` VARCHAR(255),
    `HomeTeamPoints` INT,
    `AwayTeamPoints` INT,
    `Away Team` VARCHAR(255),
    `Result` VARCHAR(255),
    `League` VARCHAR(255),
    `result_end` VARCHAR(10),
    `result_ht` VARCHAR(10),
    `goals_HomeTeam` INT,
    `goals_AwayTeam` INT
)
"""
cursor.execute(create_matches_table_query)

# Load data from matches_info_SL_cleaned DataFrame into MySQL database
matches_df = pd.read_csv("./data/matches_info_SL_cleaned.csv")
matches_df['Match ID'] = matches_df['Match ID'].astype(str)  # Convert Match ID to string to preserve leading zeros
matches_df.to_sql(matches_table_name, connection, if_exists='replace', index=False)

# Specify the name of the table for lineups_2022_2023_SL_cleaned DataFrame
lineups_table_name = "lineups_2022_2023_SL_cleaned"

# Create the table for lineups_2022_2023_SL_cleaned DataFrame
create_lineups_table_query = f"""
CREATE TABLE IF NOT EXISTS `{lineups_table_name}` (
    `Position` VARCHAR(255),
    `Player` VARCHAR(255),
    `Age` INT,
    `Market Value` VARCHAR(255),
    `Club` VARCHAR(255),
    `Gameday` VARCHAR(255),
    `H/A` VARCHAR(10),
    `Status` VARCHAR(255),
    `Match ID` INT,
    `matchday` VARCHAR(50),
    `weekday` VARCHAR(10),
    `date` DATE,
    `time` VARCHAR(10)
)
"""
cursor.execute(create_lineups_table_query)

# Load data from lineups_2022_2023_SL_cleaned DataFrame into MySQL database
lineups_df = pd.read_csv("./data/lineups_2022_2023_SL_cleaned.csv")
lineups_df['Match ID'] = lineups_df['Match ID'].astype(str)  # Convert Match ID to string to preserve leading zeros
lineups_df['date'] = pd.to_datetime(lineups_df['date'], format='%d/%m/%Y')  # Convert date to datetime format
lineups_df.to_sql(lineups_table_name, connection, if_exists='replace', index=False)

# Commit the changes and close the connection
connection.commit()
connection.close()

# Print a success message
print("Data insertion into MySQL completed successfully")


InvalidRequestError: Could not reflect: requested table(s) not available in Engine(mysql+mysqlconnector://pbl_ra_mh:***@127.0.0.1/transfermarkt): (matches_info_SL_cleaned)

In [None]:
import mysql.connector

# Replace these placeholders with your MySQL credentials
db_config = {
    "host": "127.0.0.1",
    "user": "pbl_ra_mh",
    "password": "pblservette2024",
    "database": "transfermarkt"
}

# Establish a connection to the MySQL server
connection = mysql.connector.connect(**db_config)

# Create a cursor to execute SQL queries
cursor = connection.cursor()

# Specify the name of the table where you want to insert the data
table_name = "transfermarktv2"  # Replace with your table name

# Create the table if it doesn't exist
create_table_query = f"""
CREATE TABLE IF NOT EXISTS `{table_name}` (
    `Rank` INT,
    `Club` VARCHAR(255),
    `League` VARCHAR(255),
    `Value` DECIMAL(10, 2),
    `Current Value` DECIMAL(10, 2),
    `Change` DECIMAL(10, 2),
    `As of` DATE
)
"""

cursor.execute(create_table_query)

# Assuming you have a DataFrame named 'df' with the scraped data
# Adjust the column names based on your DataFrame's structure

# Replace commas with periods in the 'Value' column
df['Value'] = df['Value'].str.replace(',', '.')

# Replace commas with periods in the 'Current Value' column
df['Current Value'] = df['Current Value'].str.replace(',', '.')

for _, row in df.iterrows():
    # Remove the percentage sign and replace commas with periods in the 'Change' column
    change_value = row['Change'].replace('%', '').replace(',', '.')
    
    insert_query = f"""
    INSERT INTO `{table_name}` (`Rank`, `Club`, `League`, `Value`, `Current Value`, `Change`, `As of`)
    VALUES (%s, %s, %s, %s, %s, %s, %s)
    """
    # Fetch the values from the DataFrame and insert them into the table
    values = (
        row['Rank'],
        row['Club'],
        row['League'],
        float(row['Value']),  # Convert the string to a float
        float(row['Current Value']),  # Convert the string to a float
        float(change_value),  # Convert the modified 'Change' value to a float
        row['As of']
    )
    cursor.execute(insert_query, values)

# Commit the changes and close the connection
connection.commit()
connection.close()

# Print a success message
print("Data insertion into MySQL completed successfully")
