In [5]:
import re
import pandas as pd
from io import BytesIO
from azure.storage.blob import BlobServiceClient
import logging


SILVER = 'silver/'
GOLD = 'gold/'

foot_df = pd.read_csv(f"{SILVER}Foot.csv")
league_df = pd.read_csv(f"{SILVER}League.csv")
nationality_df = pd.read_csv(f"{SILVER}Nationality.csv")
player_df = pd.read_csv(f"{SILVER}Player.csv")
player_name_df = pd.read_csv(f"{SILVER}PlayerName.csv")
player_team_df = pd.read_csv(f"{SILVER}PlayerTeam.csv")
position_df = pd.read_csv(f"{SILVER}Position.csv")
team_df = pd.read_csv(f"{SILVER}Team.csv")
team_league_df = pd.read_csv(f"{SILVER}TeamLeague.csv")

dataset_df = pd.read_csv(f"{GOLD}dataset.csv")
feature_importance_df = pd.read_csv(f"{GOLD}FeatureImportance.csv")
prediction_df = pd.read_csv(f"{GOLD}Prediction.csv")

In [6]:
def create_blob_client_with_connection_string(connection_string):
    connection_string = re.sub(r'%2B', '+', connection_string)
    blob_service_client = BlobServiceClient.from_connection_string(connection_string)
    return blob_service_client

def from_pandas_to_parquet(df):
    parquet_file = BytesIO()
    df.to_parquet(parquet_file, engine = 'pyarrow')
    return parquet_file

In [7]:
logging.info('Data Uploaded to the Azure Blob Storage.')
logger = logging.getLogger('__To_Azure_Blob_Storage__')
logger.setLevel(logging.INFO)

# Create a blob client
connection_string = "DefaultEndpointsProtocol=https;AccountName=storagefootanalysis;AccountKey=UHMmYUJDVHJI1IhTCy/2UXVqjoRJYw2gJTKNPQ8jL9juuD5cJeNMIYXwXbkpfSEIE3cByx%2BkQ29e%2BAStk2zvmQ==;EndpointSuffix=core.windows.net"
blob_service_client = create_blob_client_with_connection_string(connection_string)
logger.info(f"Successfully created blob client\n")

container_name = "player-salaries"
logger.info(f"Successfully got container client for {container_name} container.\n")

INFO:__To_Azure_Blob_Storage__:Successfully created blob client

INFO:__To_Azure_Blob_Storage__:Successfully got container client for player-salaries container.



In [8]:
file_names = ['dataset', 'Foot', 'League', 'Nationality', 'Player', 'PlayerName',
              'PlayerTeam', 'Position', 'Team', 'TeamLeague', 'FeatureImportance', 'Prediction']

dataframes = [dataset_df, foot_df, league_df, nationality_df, player_df, player_name_df, player_team_df,
              position_df, team_df, team_league_df, feature_importance_df, prediction_df]

for df_name, df in zip(file_names, dataframes):
        parquet_buffer = from_pandas_to_parquet(df)
        blob_client = blob_service_client.get_blob_client(container=container_name, blob=f"{df_name}.parquet")
        blob_client.upload_blob(parquet_buffer.getvalue(), blob_type="BlockBlob", overwrite=True)
        logger.info(f"Successfully uploaded {df_name} to {container_name} container !\n")

INFO:__To_Azure_Blob_Storage__:Successfully uploaded dataset to player-salaries container !

INFO:__To_Azure_Blob_Storage__:Successfully uploaded Foot to player-salaries container !

INFO:__To_Azure_Blob_Storage__:Successfully uploaded League to player-salaries container !

INFO:__To_Azure_Blob_Storage__:Successfully uploaded Nationality to player-salaries container !

INFO:__To_Azure_Blob_Storage__:Successfully uploaded Player to player-salaries container !

INFO:__To_Azure_Blob_Storage__:Successfully uploaded PlayerName to player-salaries container !

INFO:__To_Azure_Blob_Storage__:Successfully uploaded PlayerTeam to player-salaries container !

INFO:__To_Azure_Blob_Storage__:Successfully uploaded Position to player-salaries container !

INFO:__To_Azure_Blob_Storage__:Successfully uploaded Team to player-salaries container !

INFO:__To_Azure_Blob_Storage__:Successfully uploaded TeamLeague to player-salaries container !

INFO:__To_Azure_Blob_Storage__:Successfully uploaded FeatureImpo