# SourceDaily

* The script processes records by extracting video details using the YouTube API.
* It fetches video metadata such as titles, descriptions, view counts etc based on specific keywords.

In [1]:
import datetime
from pytz import timezone
import pandas as pd


from kaggle_secrets import UserSecretsClient

user_secrets = UserSecretsClient()
secret_value_0 = user_secrets.get_secret("api_key")
secret_value_1 = user_secrets.get_secret("repo_url_youtube_analysis")

# Assigning secrets to variables
api_key = secret_value_0
repo_url = secret_value_1

# Initialize an empty DataFrame with required columns
FileExecution = pd.DataFrame(columns=['ScriptFile', 'StartTime', 'EndTime', 'TimeTaken', 'Date'])

In [2]:
# Recording the start time of execution
start_time = datetime.datetime.now()
# Code block for which execution time need to measure
print("Execution started...")

Execution started...


In [3]:
# %% [code] {"jupyter":{"outputs_hidden":false}}
def VideoDetailExtraction(kw_list, maxResults=50):
    """
    Fetches a list of video details from YouTube based on the given keyword(s) for the initial batch.

    Args:
        kw_list (str): The keyword(s) to search for.
        maxResults (int, optional): The maximum number of results to fetch in this request (default is 50).

    Returns:
        dict: The API response containing video details. Returns None if an error occurs.
    """
    try:
        # Prepare the API request to fetch video details
        request = youtube.search().list(
            part='snippet',            # Fetch metadata such as title, description, and thumbnails
            order='viewCount',         # Order results by view count
            q=kw_list,                 # Search query
            relevanceLanguage='en',    # Limit results to English-relevant videos
            type='video',              # Restrict results to videos only
            # videoCategoryId=26,      # Optional: Filter by specific category (currently commented)
            # regionCode='IN',         # Optional: Restrict to a specific region (currently commented)
            maxResults=maxResults,     # Number of results to fetch (up to 50 per API limits)
            videoCaption='closedCaption'  # Restrict results to videos with closed captions
        )

        # Execute the API request
        response = request.execute()
        return response
    except Exception as e:
        # Log any errors encountered during the API call
        print(f"Error during VideoDetailExtraction(): {e}")
        return None


def VideoDetailExtractionNextPageToken(kw_list, nextPageToken, maxResults=50):
    """
    Fetches the next page of video details from YouTube using a continuation token.

    Args:
        kw_list (str): The keyword(s) to search for.
        nextPageToken (str): The token for fetching the next page of results.
        maxResults (int, optional): The maximum number of results to fetch in this request (default is 50).

    Returns:
        dict: The API response containing video details for the next page. Returns None if an error occurs.
    """
    try:
        # Prepare the API request to fetch the next page of video details
        request = youtube.search().list(
            part='snippet',            # Fetch metadata such as title, description, and thumbnails
            order='viewCount',         # Order results by view count
            q=kw_list,                 # Search query
            relevanceLanguage='en',    # Limit results to English-relevant videos
            type='video',              # Restrict results to videos only
            # videoCategoryId=26,      # Optional: Filter by specific category (currently commented)
            # regionCode='IN',         # Optional: Restrict to a specific region (currently commented)
            maxResults=maxResults,     # Number of results to fetch (up to 50 per API limits)
            pageToken=nextPageToken,   # Token for fetching the next page
            videoCaption='closedCaption'  # Restrict results to videos with closed captions
        )

        # Execute the API request
        response = request.execute()
        return response
    except Exception as e:
        # Log any errors encountered during the API call
        print(f"Error during VideoDetailExtractionNextPageToken(): {e}")
        return None


# %% [code] {"jupyter":{"outputs_hidden":false}}
def VideoDataFrame(response):
    """
    Processes video and channel details from the YouTube API response, structures the data into DataFrames,
    and merges them to create a comprehensive dataset.

    Args:
        response (dict): The response object returned by the YouTube API containing video details.

    Returns:
        tuple:
            - pd.DataFrame: A DataFrame containing merged video and channel details.
            - str or None: The next page token if available, otherwise None.
    """
    try:
        # Initialize lists to store video and channel details
        videoDetails = []
        videoIds = []
        channelIds = []
        channelDetails = []
        
        '''
        Video Search Block: Extract basic video details from the response.
        '''
        for i in range(len(response['items'])):
            # Extract publication time and convert to components
            publishedOn = response['items'][i].get('snippet', '0000-00-00T00:00:00Z').get('publishTime', '0000-00-00T00:00:00Z')
            publishTime = re.split(r'[TZ-]', publishedOn)
            total_seconds = 0
            if publishedOn != '0000-00-00T00:00:00Z':
                try:
                    dt = datetime.datetime.strptime(publishedOn, "%Y-%m-%dT%H:%M:%S.%fZ")
                except ValueError:
                    dt = datetime.datetime.strptime(publishedOn, "%Y-%m-%dT%H:%M:%SZ")
                epoch = datetime.datetime(1970, 1, 1)
                total_seconds = int((dt - epoch).total_seconds())
            currentDate = datetime.datetime.now(ist).strftime("%Y-%m-%d")
            # Append extracted video details
            videoDetails.append({
                'currentDate': currentDate,
                'channelId': response['items'][i]['snippet']['channelId'],
                'channelName': response['items'][i]['snippet']['channelTitle'],
                'videoId': response['items'][i]['id']['videoId'],
                'videoTitle': response['items'][i]['snippet']['title'],
                'videoPublishYear': publishTime[0],  # Extracted year
                'videoPublishMonth': publishTime[1],  # Extracted month
                'videoPublishDay': publishTime[2],  # Extracted day
                'videoPublishTime': publishTime[3],  # Extracted time
                'videoPublishedOn': publishedOn,
                'videoPublishedOnInSeconds': total_seconds
            })
            
            # Collect video and channel IDs
            videoIds.append(response['items'][i]['id']['videoId'])
            channelIds.append(response['items'][i]['snippet']['channelId'])
        
        # Extract next page token if available
        nextPageToken = response.get("nextPageToken", None)
        
        '''
        Video Block: Fetch additional details about each video using its ID.
        '''
        try:
            request = youtube.videos().list(
                part='id,statistics,snippet,contentDetails,localizations,status,liveStreamingDetails,paidProductPlacementDetails,player,recordingDetails,topicDetails',
                id=videoIds
            )
            response = request.execute()

        except Exception as e:
            # Log any errors encountered during the API call
            print(f"Error during videos().list(): {e}")
            return None
        
        for i in range(len(response['items'])):
            video = response['items'][i]

            # Video id
            videoDetails[i]['videoUniqueId'] = video.get('id',None)
            
            # Video statistics
            statistics = video.get('statistics', {})
            videoDetails[i]['videoViewCount'] = statistics.get('viewCount', 0)
            videoDetails[i]['videoLikeCount'] = statistics.get('likeCount', 0)
            videoDetails[i]['videoFavoriteCount'] = statistics.get('favoriteCount', 0)
            videoDetails[i]['videoCommentCount'] = statistics.get('commentCount', 0)
            
            # Video snippet details
            snippet = video.get('snippet', {})
            videoDetails[i]['videoDescription'] = snippet.get('description', None)
            videoDetails[i]['videoTags'] = snippet.get('tags', [])
            videoDetails[i]['videoCategoryId'] = snippet.get('categoryId', None)
            videoDetails[i]['videoLiveBroadcastContent'] = snippet.get('liveBroadcastContent', None)
            videoDetails[i]['videoDefaultLanguage'] = snippet.get('defaultLanguage', None)
            videoDetails[i]['videoDefaultAudioLanguage'] = snippet.get('defaultAudioLanguage', None)
            
            # Video duration (convert ISO 8601 to seconds)
            duration = video.get('contentDetails', {}).get('duration', None)
            if duration:
                match = re.match(r"PT(?:(\d+)H)?(?:(\d+)M)?(?:(\d+)S)?", duration)
                hours = int(match.group(1) or 0)
                minutes = int(match.group(2) or 0)
                seconds = int(match.group(3) or 0)
                videoDetails[i]['videoDuration'] = timedelta(hours=hours, minutes=minutes, seconds=seconds)
                videoDetails[i]['videoDurationInSeconds'] = hours * 3600 + minutes * 60 + seconds
            else:
                videoDetails[i]['videoDuration'] = None
                videoDetails[i]['videoDurationInSeconds'] = None
            
            # Additional video details
            content_details = video.get('contentDetails', {})
            videoDetails[i]['videoDimension'] = content_details.get('dimension', None)
            videoDetails[i]['videoDefinition'] = content_details.get('definition', None)
            videoDetails[i]['videoCaption'] = content_details.get('caption', None)
            videoDetails[i]['videoLicensedContent'] = content_details.get('licensedContent', False)
            videoDetails[i]['videoProjection'] = content_details.get('projection', False)
        
        '''
        Channel Block: Fetch details for channels associated with the videos.
        '''
        videoDetails = pd.DataFrame(videoDetails)
        Unique_ChannelIds = list(set(videoDetails['channelId']))
        try: 
            request = youtube.channels().list(
                part='id,contentDetails,brandingSettings,contentOwnerDetails,localizations,snippet,statistics,status,topicDetails',
                id=Unique_ChannelIds
            )
            response = request.execute()
        except Exception as e:
            # Log any errors encountered during the API call
            print(f"Error during channels().list(): {e}")
            return None

        for i in range(len(response['items'])):
            item = response['items'][i]
            snippet = item.get('snippet', {})
            publishedOn = snippet.get('publishedAt', '0000-00-00T00:00:00Z')
            publishedAt = re.split(r'[TZ-]', publishedOn)
            total_seconds = 0
            if publishedOn != '0000-00-00T00:00:00Z':
                try:
                    dt = datetime.datetime.strptime(publishedOn, "%Y-%m-%dT%H:%M:%S.%fZ")
                except ValueError:
                    dt = datetime.datetime.strptime(publishedOn, "%Y-%m-%dT%H:%M:%SZ")
                epoch = datetime.datetime(1970, 1, 1)
                total_seconds = int((dt - epoch).total_seconds())
            
            # Extract channel details
            channelDetails.append({
                'channelIdUnique': item['id'],
                'channelTitleCheck': snippet.get('title', None),
                'channelDescription': snippet.get('description', None),
                'channelCustomUrl': snippet.get('customUrl', None),
                'channelPublishYear': publishedAt[0],
                'channelPublishMonth': publishedAt[1],
                'channelPublishDay': publishedAt[2],
                'channelPublishTime': publishedAt[3],
                'channelPublishedOn': publishedOn,
                'channelPublishedOnInSeconds': total_seconds,
                'channelCountry': snippet.get('country', None),
                'channelViewCount': item.get('statistics', {}).get('viewCount', 0),
                'channelSubscriberCount': item.get('statistics', {}).get('subscriberCount', 0),
                'channelVideoCount': item.get('statistics', {}).get('videoCount', 0),
            })
        
        # Convert channel details to DataFrame
        channelDetails = pd.DataFrame(channelDetails)
        
        '''
        Result: Merge video and channel details into a single DataFrame.
        '''
        resultDataFrame = pd.merge(videoDetails, channelDetails, left_on='channelId', right_on='channelIdUnique', how='left')
        return resultDataFrame, nextPageToken
    
    except Exception as e:
        print(f"Error while processing VideoDataFrame(): {e}")
        return None, None

# %% [code] {"jupyter":{"outputs_hidden":false}}
def VideoDetailsStructuring(max_record_count, kw_list):
    """
    Fetches and structures video details into a DataFrame, handling pagination if necessary.

    Args:
        max_record_count (int): The maximum number of video records to fetch.
        kw_list (str): The keyword(s) to use for fetching video details.

    Returns:
        pd.DataFrame: A Pandas DataFrame containing video details. Returns an empty DataFrame on failure.
    """
    try:
        # Initialize an empty DataFrame to store results
        resultDataFrame = pd.DataFrame()

        # Initialize the nextPageToken for pagination
        nextPageToken = None

        # Define the batch sizes for video fetching
        record_fetching_batches = [50]  # Default batch size for YouTube API requests

        # Adjust the batch sizes based on the max_record_count
        if max_record_count > 50:
            quotient = max_record_count // 50  # Number of full batches
            remainder = [max_record_count % 50]  # Remaining records in the last batch
            record_fetching_batches = record_fetching_batches * quotient
            if remainder[0] > 0:
                record_fetching_batches.extend(remainder)  # Add the remainder as a batch
        else:
            record_fetching_batches = [max_record_count]  # Single batch if max_record_count <= 50

        # Case 1: Only one batch needed
        if len(record_fetching_batches) == 1:
            # Fetch video details for the single batch
            response = VideoDetailExtraction(kw_list, record_fetching_batches[0])
            if response is None:
                print("Failed to fetch initial video details - VideoDetailExtraction() returned None, hence returned empty DataFrame.")
                return pd.DataFrame()

            # Process the response into a DataFrame and get the nextPageToken
            resultDataFrame, nextPageToken = VideoDataFrame(response)
            nextPageToken = None  # Reset the token as no further pages are needed
            if resultDataFrame is None:
                print("Failed to process video data frame - VideoDataFrame() returned None, hence returned empty DataFrame.")
                return pd.DataFrame()
            return resultDataFrame

        # Case 2: Multiple batches needed
        elif len(record_fetching_batches) > 1:
            # Fetch initial batch of video details
            response = VideoDetailExtraction(kw_list, record_fetching_batches[0])
            if response is None:
                print("Failed to fetch initial video details - VideoDetailExtraction() returned None, hence returned empty DataFrame.")
                return pd.DataFrame()

            # Process the response into a DataFrame and get the nextPageToken
            resultDataFrame, nextPageToken = VideoDataFrame(response)
            if resultDataFrame is None:
                print("Failed to process video data frame - VideoDataFrame() returned None, hence returned empty DataFrame.")
                return pd.DataFrame()

            # Loop through subsequent batches
            for batch in record_fetching_batches[1:]:
                # Fetch details for the next batch using nextPageToken
                response = VideoDetailExtractionNextPageToken(kw_list, nextPageToken, batch)
                if response is None:
                    print("Failed to fetch next page of video details - VideoDetailExtractionNextPageToken() returned None, hence returned till now fetched videoDetails.")
                    break

                # Process the response into a DataFrame
                resultDataFrame_next, nextPageToken = VideoDataFrame(response)
                if resultDataFrame_next is not None:
                    # Concatenate the new DataFrame to the result DataFrame
                    resultDataFrame = pd.concat([resultDataFrame, resultDataFrame_next], ignore_index=True)

                # Break the loop if we've reached the max record count or no more pages are available
                if len(resultDataFrame) >= max_record_count or not nextPageToken:
                    break

        return resultDataFrame  # Return the final result DataFrame
    except Exception as e:
        print(f"Error during VideoDetailsStructuring(), hence returned empty DataFrame: {e}")
        return pd.DataFrame()

# %% [code] {"jupyter":{"outputs_hidden":false}}
def RawFile(max_record_count):
    """
    Processes video details, structures the data, and saves it as a JSON file.

    Args:
        max_record_count (int): The maximum number of records to process.

    Returns:
        bool: True if the file is successfully created and saved, False otherwise.
    """
    try:
        # Call the function to structure video details and return a DataFrame.
        # `kw_list` is assumed to be a global variable containing the search keyword(s).
        dataframe = VideoDetailsStructuring(max_record_count, kw_list)
        
        # Check if the DataFrame is not empty before saving.
        if not dataframe.empty:
            # Count the number of records (rows) in the DataFrame
            record_count = len(dataframe)
            
            # Generate a timestamp for the file name using the current time in IST (Indian Standard Time).
            timestamp = datetime.datetime.now(ist).strftime("%Y-%m-%d_%H:%M:%S")
        
            # Create a filename using the generated timestamp to ensure uniqueness with number of records.
            filename = f"S_{timestamp}_{record_count}_records.json"
            
            # Save the DataFrame to a JSON file with readable formatting.
            dataframe.to_json(filename, orient="records", indent=4)
            print(f"DataFrame saved as {filename}")
        else:
            # Log a message if the DataFrame is empty.
            print("No data to save since empty DataFrame returned.")
        
        # Return True indicating the process was successful.
        return True
    except Exception as e:
        # Handle and log any errors that occur during the process.
        print(f"Error during raw file creation: {e}")
        
        # Return False indicating the process failed.
        return False

# %% [code] {"jupyter":{"outputs_hidden":false}}
def PushToGithub():
    """
    Automates the process of identifying the latest .json file, copying it 
    to a GitHub repository, and pushing the changes.

    Args:
        None

    Returns:
        bool: True if the process completes successfully and the file is pushed to GitHub, 
              False if an error occurs during any step.
    """
    # List all files in the working directory
    output_files = os.listdir('/kaggle/working')
    
    try:
        # Filter and find the most recent .json file
        json_files = [file for file in output_files if file.startswith("S_") and file.endswith("_records.json")]
        if json_files:
            LatestFiles = max(json_files, key=os.path.getctime)  # Get the latest file based on creation time
        else:
            raise ValueError("No JSON files found!")  # Raise an error if no JSON files are found
    except ValueError as e:
        print(f"An error occurred at fetching recent .json file: {e}")
        return False  # Exit the function if there's an error in fetching JSON files
    
    # Define repository and destination paths
    kaggle_repo_url = '/kaggle/working/YouTubeFoodChannelAnalysis'
    destination_path = '/kaggle/working/YouTubeFoodChannelAnalysis/Source/Daily'
    
    print(LatestFiles)  # Print the latest JSON file name
    try:
        # Check if the repository already exists
        if os.path.exists(kaggle_repo_url):
            print("Already cloned and the repo file exists")
            repo = git.Repo(kaggle_repo_url)  # Access the existing repository
            origin = repo.remote(name='origin')  # Get the remote repository
            origin.pull()  # Pull the latest changes from the repository
            print("Successfully pulled the git repo before push")
        else:
            # Clone the repository if it doesn't exist
            repo = git.Repo.clone_from(repo_url, kaggle_repo_url)
            print("Successfully cloned the git repo")
            
        # Check if the destination path exists, and copy the latest file
        if os.path.exists(destination_path):
            shutil.copyfile(f'/kaggle/working/{LatestFiles}', f'{destination_path}/{LatestFiles}')
        else:
            # Create the destination directory if it doesn't exist
            os.makedirs(destination_path)
            shutil.copyfile(f'/kaggle/working/{LatestFiles}', f'{destination_path}/{LatestFiles}')
        
        # Initialize the repository for git operations
        repo = Repo(kaggle_repo_url)
            
        # Add the copied file to the staging area
        repo.index.add([f"{destination_path}/{LatestFiles}"])
        
        # Create a timestamp for the commit message
        ist = datetime.timezone(datetime.timedelta(hours=5, minutes=30))  # IST timezone
        timestamp = datetime.datetime.now(ist).strftime("%Y-%m-%d_%H:%M:%S")
        # Commit the changes with a message including the timestamp and file name
        repo.index.commit(f"{timestamp} Added files from Kaggle notebook, {LatestFiles}")
        
        # Push the changes to the remote repository
        origin = repo.remote(name="origin")
        push_result = origin.push()
        if push_result:
            print("Push successful.")
        else:
            print("Push failed.")

        # print("Output files successfully pushed to GitHub!")
        return True  # Return True if the process completes successfully
    
    except Exception as e:
        # Handle any errors that occur during the git automation process
        print(f"An error occurred at git automation code: {e}")
        return False  # Return False if an error occurs

# %% [code] {"jupyter":{"outputs_hidden":false}}
def main(max_record_count):
    """
    Main function to orchestrate the execution of raw data extraction and pushing data to GitHub.

    Args:
        max_record_count (int): The maximum number of records to process.
    """
    # Call the RawFile function to process and extract raw data.
    # This function likely handles fetching data, processing it, and storing it in a file.
    RawFile(max_record_count)
    
    # Call the PushToGithub function to push the processed data to a GitHub repository.
    # This function likely handles staging, committing, and pushing the file to the repository.
    PushToGithub()

# %% [code] {"execution":{"iopub.status.busy":"2025-01-22T21:19:26.201047Z","iopub.execute_input":"2025-01-22T21:19:26.201444Z","iopub.status.idle":"2025-01-22T21:19:28.325107Z","shell.execute_reply.started":"2025-01-22T21:19:26.201413Z","shell.execute_reply":"2025-01-22T21:19:28.324059Z"},"jupyter":{"outputs_hidden":false}}
# Entry point of the script
if __name__ == "__main__":
    # Importing necessary libraries
    from googleapiclient.discovery import build  # For interacting with YouTube API
    import googleapiclient.errors
    from IPython.display import JSON, display  # For displaying JSON responses in Jupyter Notebooks
    import re  # For regular expressions
    import datetime  # For date and time manipulations
    # from dateutil.relativedelta import relativedelta  # For handling relative date differences
    import pandas as pd  # For data manipulation and analysis
    import os  # For interacting with the operating system
    from kaggle_secrets import UserSecretsClient  # For securely managing API keys in Kaggle
    import git  # For Git-related operations
    from git import Repo  # For working with repositories
    import shutil  # For file and directory operations
    from pytz import timezone  # For handling time zones
    from datetime import timedelta  # For handling time differences
    import time
    
    # Fetching secrets from Kaggle's secure environment
    from kaggle_secrets import UserSecretsClient
    user_secrets = UserSecretsClient()
    secret_value_0 = user_secrets.get_secret("api_key")
    secret_value_1 = user_secrets.get_secret("repo_url_youtube_analysis")
    
    # Assigning secrets to variables
    api_key = secret_value_0
    repo_url = secret_value_1
    
    # Setting up YouTube API details
    api_service_name = "youtube"
    api_version = "v3"
    youtube = build(api_service_name, api_version, developerKey=api_key)  # Initialize YouTube API client
    
    # Setting the timezone to Indian Standard Time (IST)
    ist = timezone('Asia/Kolkata')
    
    # Maximum number of records to fetch
    max_record_count = 5
    
    # Keyword list for searching YouTube videos
    kw_list = "devops"
    
    # Call the main function with the maximum record count as an argument
    main(max_record_count)

Error during VideoDetailExtraction(): <HttpError 403 when requesting https://youtube.googleapis.com/youtube/v3/search?part=snippet&order=viewCount&q=devops&relevanceLanguage=en&type=video&maxResults=5&videoCaption=closedCaption&key=AIzaSyAvqq88BgMiQGkPHyrWt9DPg6j1ify3M-U&alt=json returned "The request cannot be completed because you have exceeded your <a href="/youtube/v3/getting-started#quota">quota</a>.". Details: "[{'message': 'The request cannot be completed because you have exceeded your <a href="/youtube/v3/getting-started#quota">quota</a>.', 'domain': 'youtube.quota', 'reason': 'quotaExceeded'}]">
Failed to fetch initial video details - VideoDetailExtraction() returned None, hence returned empty DataFrame.
No data to save since empty DataFrame returned.
An error occurred at fetching recent .json file: No JSON files found!


In [4]:
print("Execution ended.")

# Record the end time of execution
end_time = datetime.datetime.now()

# Calculate the time taken for execution
time_taken = end_time - start_time

# Get the current time in the 'Asia/Kolkata' timezone
current_time = datetime.datetime.now(timezone('Asia/Kolkata')).strftime("%Y-%m-%d_%H:%M:%S")

# Create a new row as a DataFrame
new_row = pd.DataFrame([{
    'ScriptFile': 'sourcedaily.ipynb',
    'StartTime': start_time.strftime('%Y-%m-%d %H:%M:%S'),
    'EndTime': end_time.strftime('%Y-%m-%d %H:%M:%S'),
    'TimeTaken': str(time_taken),
    'Date': current_time
}])

# Append the new row using pd.concat()
FileExecution = pd.concat([FileExecution, new_row], ignore_index=True)

# Display the DataFrame
# display(FileExecution)

# Save the DataFrame to a JSON file
FileExecution.to_json(f"{current_time}_ScriptFileExecution.json", orient="records", indent=4)
# print(FileExecution)

Execution ended.


In [5]:
"""
Automates the process of identifying the latest .json file, copying it 
to a GitHub repository, and pushing the changes.

Args:
    None

Returns:
    bool: True if the process completes successfully and the file is pushed to GitHub, 
          False if an error occurs during any step.
"""
# List all files in the working directory
output_files = os.listdir('/kaggle/working')

try:
    # Filter and find the most recent .json file
    json_files = [file for file in output_files if file.endswith("ScriptFileExecution.json")]
    if json_files:
        LatestFiles = max(json_files, key=os.path.getctime)  # Get the latest file based on creation time
    else:
        raise ValueError("No JSON files found!")  # Raise an error if no JSON files are found
except ValueError as e:
    print(f"An error occurred at fetching recent .json file: {e}")

# Define repository and destination paths
kaggle_repo_url = '/kaggle/working/YouTubeFoodChannelAnalysis'
destination_path = '/kaggle/working/YouTubeFoodChannelAnalysis/ExecutionTracker/Daily'

print(LatestFiles)  # Print the latest JSON file name
try:
    # Check if the repository already exists
    if os.path.exists(kaggle_repo_url):
        print("Already cloned and the repo file exists")
        repo = git.Repo(kaggle_repo_url)  # Access the existing repository
        origin = repo.remote(name='origin')  # Get the remote repository
        origin.pull()  # Pull the latest changes from the repository
        print("Successfully pulled the git repo before push")
    else:
        # Clone the repository if it doesn't exist
        repo = git.Repo.clone_from(repo_url, kaggle_repo_url)
        print("Successfully cloned the git repo")
    
    # Check if the destination path exists, and copy the latest file
    if os.path.exists(destination_path):
        shutil.copyfile(f'/kaggle/working/{LatestFiles}', f'{destination_path}/{LatestFiles}')
    else:
        # Create the destination directory if it doesn't exist
        os.makedirs(destination_path)
        shutil.copyfile(f'/kaggle/working/{LatestFiles}', f'{destination_path}/{LatestFiles}')
    
    # Initialize the repository for git operations
    repo = Repo(kaggle_repo_url)
    # Add the copied file to the staging area
    repo.index.add([f"{destination_path}/{LatestFiles}"])
    
    # Create a timestamp for the commit message
    ist = timezone('Asia/Kolkata')  # IST timezone
    timestamp = datetime.datetime.now(ist).strftime("%Y-%m-%d_%H:%M:%S")
    
    # Commit the changes with a message including the timestamp and file name
    if repo.is_dirty(untracked_files=True):
        repo.index.commit(f"{timestamp} Added files from Kaggle notebook, {LatestFiles}")
        print("Changes committed successfully.")
    else:
        # If no changes are detected, create an empty commit
        repo.git.commit(m="Empty commit to trigger contribution", allow_empty=True)
        print("Empty commit created as no changes were detected.")
    
    # Push the changes to the remote repository
    origin = repo.remote(name="origin")
    origin.push()
    print("Execution Tracking file successfully pushed to GitHub!")

except Exception as e:
    # Handle any errors that occur during the git automation process
    print(f"An error occurred at git automation code: {e}")

2025-01-27_01:18:35_ScriptFileExecution.json
Successfully cloned the git repo
Changes committed successfully.
Execution Tracking file successfully pushed to GitHub!
