In [1]:
import pandas as pd
import requests
from bs4 import BeautifulSoup

In [2]:
contributions = requests.get('https://github.com/users/gracewanggw/contributions')

In [3]:
soup = BeautifulSoup(contributions.text, "html.parser")

In [4]:
items = [item.get_text() for item in soup.find_all('tool-tip')]

In [5]:
import re

In [6]:
def extract(input_string):
    pattern = r"(\w+ \d{1,2})[a-z]*\."
    match = re.search(pattern, input_string)

    if match:
        # Extract the matched date string
        date_string = match.group(1)

        # Extract the number of contributions (if present)
        contributions_match = re.search(r"(\d+) contributions?", input_string)
        contributions = int(contributions_match.group(1)) if contributions_match else 0

        return date_string, contributions
    else:
        return None, None


In [7]:
from datetime import datetime, timedelta

In [8]:
def get_date(day):
    current_date = datetime.now()
    current_year = current_date.year
    
    date_object = datetime.strptime(day, "%B %d")

    # If the date is before the current date, use the current year; otherwise, use the previous year
    updated_year = current_year if date_object.replace(year=current_year) <= current_date else current_year - 1

    # Set the year in the datetime object
    date_object = date_object.replace(year=updated_year)

    return date_object.strftime("%B %d, %Y")


In [9]:
get_date('December 18')

'December 18, 2023'

In [10]:
from collections import defaultdict

In [11]:
contrib_dict = defaultdict(int)
for item in items:
    day, contribs = extract(item)
    
    date_object = datetime.strptime(get_date(day), "%B %d, %Y")

    # Extract the month key (e.g., 'December')
    month_key = date_object.strftime("%b %Y")

    # Update the contributions for the corresponding month
    contrib_dict[month_key] += contribs

In [12]:
contrib_dict

defaultdict(int,
            {'Dec 2023': 131,
             'Dec 2022': 0,
             'Jan 2023': 3,
             'Feb 2023': 4,
             'Mar 2023': 0,
             'Apr 2023': 31,
             'May 2023': 13,
             'Jun 2023': 0,
             'Jul 2023': 0,
             'Aug 2023': 0,
             'Sep 2023': 14,
             'Oct 2023': 17,
             'Nov 2023': 14})

In [13]:
data = []
for key in contrib_dict.keys():
    data.append({'month': key, 'GitHub Contributions': contrib_dict[key]})

In [14]:
data

[{'month': 'Dec 2023', 'GitHub Contributions': 131},
 {'month': 'Dec 2022', 'GitHub Contributions': 0},
 {'month': 'Jan 2023', 'GitHub Contributions': 3},
 {'month': 'Feb 2023', 'GitHub Contributions': 4},
 {'month': 'Mar 2023', 'GitHub Contributions': 0},
 {'month': 'Apr 2023', 'GitHub Contributions': 31},
 {'month': 'May 2023', 'GitHub Contributions': 13},
 {'month': 'Jun 2023', 'GitHub Contributions': 0},
 {'month': 'Jul 2023', 'GitHub Contributions': 0},
 {'month': 'Aug 2023', 'GitHub Contributions': 0},
 {'month': 'Sep 2023', 'GitHub Contributions': 14},
 {'month': 'Oct 2023', 'GitHub Contributions': 17},
 {'month': 'Nov 2023', 'GitHub Contributions': 14}]

In [16]:
# Define a function to extract the date from the 'month' string
def extract_date(item):
    return datetime.strptime(item['month'], "%b %Y")

# Sort the list of dictionaries based on the date
data = sorted(data, key=extract_date)

In [17]:
data = data[1:]

In [18]:
data

[{'month': 'Jan 2023', 'GitHub Contributions': 3},
 {'month': 'Feb 2023', 'GitHub Contributions': 4},
 {'month': 'Mar 2023', 'GitHub Contributions': 0},
 {'month': 'Apr 2023', 'GitHub Contributions': 31},
 {'month': 'May 2023', 'GitHub Contributions': 13},
 {'month': 'Jun 2023', 'GitHub Contributions': 0},
 {'month': 'Jul 2023', 'GitHub Contributions': 0},
 {'month': 'Aug 2023', 'GitHub Contributions': 0},
 {'month': 'Sep 2023', 'GitHub Contributions': 14},
 {'month': 'Oct 2023', 'GitHub Contributions': 17},
 {'month': 'Nov 2023', 'GitHub Contributions': 14},
 {'month': 'Dec 2023', 'GitHub Contributions': 131}]