### Data Union

##### Libraries

In [27]:
import time
import pandas as pd
from datetime import datetime, timedelta

import os

##### Parameters

In [42]:
today_date = datetime.today().strftime('%Y-%m-%d')

# Define the path where the Excel files are stored
db_path = 'DB/'  # Replace with the correct path to the folder containing the files

# Get a list of all Excel files in the directory
files = [f for f in os.listdir(db_path) if f.endswith('.xlsx')]

# Separate files into YouTube and playboard categories based on file naming pattern
youtube_files = [f for f in files if f.startswith('Youtube_Data_')]
playboard_files = [f for f in files if f.startswith('playboard_')]

# Extract the dates from the filenames
youtube_dates = [f.split('_')[2].split('.')[0] for f in youtube_files]
playboard_dates = [f.split('_')[1].split('.')[0] for f in playboard_files]

# Find the most recent date for each category
latest_youtube_date = max(youtube_dates)
latest_playboard_date = max(playboard_dates)

# Build the full file paths for the most recent files
latest_youtube_file = f'Youtube_Data_{latest_youtube_date}.xlsx'
latest_playboard_file = f'playboard_{latest_playboard_date}.xlsx'

latest_youtube_path = os.path.join(db_path, latest_youtube_file)
latest_playboard_path = os.path.join(db_path, latest_playboard_file)

file_path = f"DB/Youtube_final_{today_date}.xlsx"


##### Load Data

In [44]:
# Load the most recent YouTube data and playboard data
latest_youtube_df = pd.read_excel(latest_youtube_path)
latest_playboard_df = pd.read_excel(latest_playboard_path)

#### Excute Merging Data

In [46]:
# Check for rows in latest_playboard_df that do not exist in latest_youtube_df based on 'channel_name'
new_rows = latest_playboard_df[~latest_playboard_df['channel_name'].isin(latest_youtube_df['channel_name'])]

# Add the new rows to latest_youtube_df
updated_youtube_df = pd.concat([latest_youtube_df, new_rows], ignore_index=True)


##### Save Data

In [47]:
updated_youtube_df.to_excel(file_path, index=False)