In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import datetime
import os
import csv
import github

# Function to scrape data from socialblade.com
def scrape_socialblade(url, platform):
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')

    # Find the table containing the data
    table = soup.find('table', {'class': 'sortable-table'})

    # Extract the table headers
    headers = [header.text.strip() for header in table.find_all('th')]

    # Extract the table rows
    rows = table.find_all('tr')[1:]

    # Initialize an empty list to store the data
    data = []

    # Extract the data from each row
    for row in rows:
        data.append([cell.text.strip() for cell in row.find_all('td')])

    # Create a Pandas DataFrame from the data
    df = pd.DataFrame(data, columns=headers)

    # Generate a unique filename based on the current date and time
    now = datetime.datetime.now()
    filename = f"{now.strftime('%Y-%m')}/{platform}_top_50_{now.strftime('%Y-%m-%d_%H-%M-%S')}.csv"

    # Create the directories if they don't exist
    os.makedirs(os.path.dirname(filename), exist_ok=True)

    # Save the DataFrame to a CSV file
    df.to_csv(filename, index=False)

    return filename

# URLs for each social media platform on socialblade.com
urls = {
    'tiktok': 'https://socialblade.com/tiktok/top/50/followers',
    'youtube': 'https://socialblade.com/youtube/top/50/mostsubscribed',
    'instagram': 'https://socialblade.com/instagram/top/50/followers',
    'twitter': 'https://socialblade.com/twitter/top/50/followers'
}

# Scrape data for each platform
for platform, url in urls.items():
    filename = scrape_socialblade(url, platform)
    print(f"Scraped data for {platform} and saved to {filename}")

# Push the data to GitHub repository
access_token = 'YOUR_GITHUB_ACCESS_TOKEN'
repo_name = 'YOUR_GITHUB_REPOSITORY'

# Create a GitHub instance
gh = github.Github(access_token)
repo = gh.get_repo(repo_name)

# Commit and push the scraped data
for platform in urls.keys():
    # Generate the file path
    now = datetime.datetime.now()
    filepath = f"{now.strftime('%Y-%m')}/{platform}_top_50_{now.strftime('%Y-%m-%d_%H-%M-%S')}.csv"

    # Read the CSV file
    with open(filepath, 'r') as file:
        content = file.read()

    # Commit the file to the repository
    repo.create_file(filepath, f"Add {platform} top 50 data", content)

print("Data pushed to GitHub repository")
