# Setup

In [1]:
!pip install PyGithub


Collecting PyGithub
  Downloading PyGithub-2.6.1-py3-none-any.whl.metadata (3.9 kB)
Collecting Deprecated (from PyGithub)
  Downloading Deprecated-1.2.18-py2.py3-none-any.whl.metadata (5.7 kB)
Downloading PyGithub-2.6.1-py3-none-any.whl (410 kB)
   ---------------------------------------- 0.0/410.5 kB ? eta -:--:--
   --- ------------------------------------ 41.0/410.5 kB 1.9 MB/s eta 0:00:01
   ----------------------------------- ---- 368.6/410.5 kB 5.7 MB/s eta 0:00:01
   ---------------------------------------- 410.5/410.5 kB 6.4 MB/s eta 0:00:00
Downloading Deprecated-1.2.18-py2.py3-none-any.whl (10.0 kB)
Installing collected packages: Deprecated, PyGithub
Successfully installed Deprecated-1.2.18 PyGithub-2.6.1


In [19]:
from github import Github, GithubException
import os
from tqdm import tqdm
import pandas as pd
import re
from datetime import datetime
import time
import os.path
import base64

'''
This notebook can be used to manage the github content of a github classroom created
via a github organization. Pay attention to the first (this) chunk since there
are many convenience parameters that can be set so the whole notebook might be run
in one go for different tasks.
'''

# read in your github token

with open("C:/Users/3leso/Nextcloud/sources/github_token.txt", 'r') as f:
    token = f.read()
f.close()

# retrieve at deadline
delay = False
deadline = datetime(2025, 5, 6, 23, 59, 59)

# specify and adapt parameters accordingly

assignment_number = 2
total_points = 20

org_name = 'SMDA-2025'
ignore_users = ['elenasolar', 'dgarcia-eu', 'rutschmanna']

ignore_repos = ['template', 'smda-2025-classroom-submission-template', 'forum']
    
g = Github(token)
org = g.get_organization(org_name)
    
# specify if you want to pull/push
pull = True # if pull is True push is False vice versa (alternate between the two)

## Pull files from GitHub (atm only .ipynb)

In [20]:
def write_files(contents, path, repo):
    """
    The function writes the files from the students github repository (or a folder in there)
    to your local storage
    
    contents: a list of files/folders
    file_path: the filepath where the files should be stored locally
    repository: GitHub repository of the student
    """
    
    # if only one content, store it in a list
    if not isinstance(contents, list):
        contents = [contents]
    
    ignore = ['.DS_Store', '.Rhistory', '.ipynb_checkpoints']
    accepted_files = r'.*\.ipynb|.*\.csv|.*\.txt|.*\.json|.*\.png|.*\.jpg|.*\.jpeg|.*\.py'
    
    # loop over content files/folders
    for cont in contents:
        
        if cont.name in ignore:
            continue
        if cont.type == 'file' and not re.match(accepted_files, cont.name):
            continue
        
        # try except, mostly to handle files which have wrong encoding, etc.
        # TODO -> one could improve this, doesn't handle e.g. csv files well
        # also fails for too large files
        try:
            
            # if the content element is a file, write it to local storage
            if cont.type == 'file':
                with open(f'{path}/{cont.name}', 'wb') as f:
                    f.write(cont.decoded_content)
            
            # else (if it's a folder) call the function recursively on the elements in it
            elif cont.type == 'dir':
                # if the folder doesn't exist yet, create it
                if not os.path.exists(f'{path}/{cont.name}'):
                    os.mkdir(f'{path}/{cont.name}')
                write_files(repo.get_contents(cont.path), f'{path}/{cont.name}', repo)
                
        except AssertionError as e:
            print(e, ': failed')
            try:
                # try with requests if pygithub fails
                url = cont.download_url
                resp = requests.get(url)
                with open(f'{path}/{cont.name}', 'wb') as f:
                    f.write(resp.content)
            except:
                continue

In [21]:
def check_for_assignment(repo, folder_contents, assignment_number):
    """
    This function recursively moves through all directories in a students
    submission directory and checks if a .ipynb file for the current
    assignment is present.
    
    repo: the repository of the student
    folder_contents: the result of repo.get_contents('')
    assignment_number: the current assignment
    """
    
    # make folder_contents iterable even if there is only one file
    if not isinstance(folder_contents, list):
        folder_contents = [folder_contents]
        
    # use regex to match current assignment notebook
    match_string = fr'assignment_(0)?{assignment_number}.*\.ipynb'
    for content in folder_contents:
        if content.type == 'file':
            if re.match(match_string, content.name, re.IGNORECASE):
                return True
            else:
                continue
        elif content.type == 'dir':
            if check_for_assignment(repo, repo.get_contents(content.path), assignment_number):
                return True
            else:
                continue
    # if after visiting all directories file is not found return false
    return False

In [22]:
def fetch_assignments(repos, assignment_number, ignore):
    """
    The function fetches the contents of the folder called folder_name from each repository in repos
    and stores them locally, preserving the folder structure
    
    repos - list of student repositories
    folder_name - name of the folder where the solution to the current assignment is stored (e.g. a11)
    first - in the first assignment the solutions were stored differently (in a long text file), 
            feel free to remove that part
            
    returns: the name of the students whose submissions failed to be fetched 
             -> you might need to manually download those, or correct the folder name
    """
    
    # specify non-student contributors here -> needed to ignore some test repos + assignments repo
    # list to store the name of the students where the function ran into an error
    fails = []
    # store the submissions in the submissions/assignment_name folder
    folder = 'submissions'
    
    # create folder if it doesn't exist yet
    if not os.path.exists(folder):
        os.mkdir(folder)

    # for each repository
    for repo in tqdm(repos):
        
        # drop assignments (many colls) + David's repo
        # for student repositories there should be exactly 1 collaborator except the tutors and the professor
        colls = repo.get_collaborators()
        colls = [col for col in colls if col.login not in ignore]
        if len(colls) != 1:
            continue
        
        # create a folder for the student's submission with their name (GitHub handle) as the folder name
        student = colls[0].login
        student_folder = f'{folder}/{student}'
        
        if not os.path.exists(student_folder):
            os.mkdir(student_folder)
        
        # code might fail if the folder doesn't exist, or the repo is empty
        try:
            # get content files/folders inside the folder
            folder_contents = repo.get_contents(f'') #a0{assignment_number}
            
            # check if the current assignment is present in the students submissions
            if not isinstance(folder_contents, list):
                folder_contents = [folder_contents]

            # check for current assignment notebook in all subdirectories
            if check_for_assignment(repo, folder_contents, assignment_number) == False:
                fails.append(f'Assignment not found: {student}')

            # if the fetching didn't fail, write the folders contents to the local storage
            # current_assignment_folder = f'{student_folder}/a0{assignment_number}'
            
            # if not os.path.exists(current_assignment_folder):
            #     os.mkdir(current_assignment_folder)
            
            write_files(folder_contents, student_folder, repo)

        # empty repo
        except GithubException as e:
            fails.append(f'{e}: {student}')
            # continue
        
    fails.sort(key=lambda entry: entry.upper())
    return fails

In [23]:
# Run script with or without delay
if delay:
    try:
        now = datetime.now()
        sleep = (deadline - now).total_seconds()
        print(f'Waiting until: {deadline}; {sleep} seconds')
        time.sleep(sleep)
    except:
        pass

# Retrieve Assignments
if pull:
    org_repos = [repo for repo in list(org.get_repos()) if repo.name not in ignore_repos]

    # fetch submissions
    fails = fetch_assignments(repos=org_repos, assignment_number=assignment_number, ignore=ignore_users)
    # show students for whom pull failed
    for fail in fails:
        print(fail)

  0%|          | 0/26 [00:00<?, ?it/s]

unsupported encoding: none : failed
unsupported encoding: none : failed
unsupported encoding: none : failed


  8%|▊         | 2/26 [00:18<03:43,  9.29s/it]

unsupported encoding: none : failed


 27%|██▋       | 7/26 [01:15<03:37, 11.46s/it]

unsupported encoding: none : failed


 31%|███       | 8/26 [01:34<04:11, 13.95s/it]

unsupported encoding: none : failed


 46%|████▌     | 12/26 [02:13<02:43, 11.69s/it]

unsupported encoding: none : failed


 50%|█████     | 13/26 [02:23<02:22, 10.96s/it]

unsupported encoding: none : failed


 54%|█████▍    | 14/26 [02:30<01:57,  9.79s/it]

unsupported encoding: none : failed
unsupported encoding: none : failed


 58%|█████▊    | 15/26 [02:38<01:43,  9.40s/it]

unsupported encoding: none : failed
unsupported encoding: none : failed


 62%|██████▏   | 16/26 [02:58<02:06, 12.64s/it]

unsupported encoding: none : failed


 73%|███████▎  | 19/26 [03:20<01:02,  8.91s/it]

unsupported encoding: none : failed


 77%|███████▋  | 20/26 [03:25<00:46,  7.68s/it]

unsupported encoding: none : failed


 81%|████████  | 21/26 [03:28<00:31,  6.26s/it]

unsupported encoding: none : failed


 85%|████████▍ | 22/26 [03:35<00:26,  6.52s/it]

unsupported encoding: none : failed


 96%|█████████▌| 25/26 [03:50<00:05,  5.36s/it]

unsupported encoding: none : failed


100%|██████████| 26/26 [03:53<00:00,  8.99s/it]

Assignment not found: anastasiasiebers
Assignment not found: AnLeWe
Assignment not found: Ari-manius
Assignment not found: bacherni
Assignment not found: cgl-04
Assignment not found: cristinaflintoaca
Assignment not found: ekaterinakabashko
Assignment not found: fabioconsiglio
Assignment not found: Guacabmole
Assignment not found: jan-thiele7
Assignment not found: Jonah879
Assignment not found: Jonaschrade
Assignment not found: KatrinSauter
Assignment not found: KMayer24
Assignment not found: lorenzrck
Assignment not found: maitekuester
Assignment not found: MaximilianWeiland
Assignment not found: MouPas
Assignment not found: NinaGeyer-KN
Assignment not found: samrauh
Assignment not found: sinamayer
Assignment not found: TimothyGGGGG
Assignment not found: vulonviing
Assignment not found: xiaoxuanzhang1





## Create feedback files in local student directories

In [24]:
# Create course specific idintifier dictionary with Git Classroom roster
roster = pd.read_csv('sources/classroom_roster.csv')[['github_username', 'identifier']].replace(
    to_replace=r'"', value='', regex=True)

identifier_dict = dict(roster.values)
identifier_dict
# if test:
#    identifier_dict['jellyjoy-nobot'] = 'Jelly, Joy' # for testing

{'bacherni': 'Bacher, Niklas',
 'cristinaflintoaca': 'Flintoaca-Cojocea, Cristina Ioana',
 'cgl-04': 'George-Lembach, Carl',
 'NinaGeyer-KN': 'Geyer, Nina',
 'Guacabmole': 'Giovanelli-Rosendo-Guimaraes, Samuel',
 'TimothyGGGGG': 'Gunson, Timothy',
 'Jonah879': 'Hartmann, Jonah',
 'Ari-manius': 'Helten, Marius',
 'DajanaHennig': 'Hennig, Dajana',
 'ekaterinakabashko': 'Kabashko, Ekaterina',
 'maitekuester': 'Küster, Maite',
 'fabioconsiglio': 'Mahner, Fabian',
 'KMayer24': 'Mayer, Katharina',
 'sinamayer': 'Mayer, Sina-Marie',
 'MouPas': 'Mounchid, Pascal',
 'samrauh': 'Rauh, Samuel',
 'lorenzrck': 'Rückert, Lorenz',
 'KatrinSauter': 'Sauter, Katrin',
 'Jonaschrade': 'Schrade, Jonas',
 'anastasiasiebers': 'Siebers, Anastasia',
 'jan-thiele7': 'Thiele, Jan',
 'vulonviing': 'Ulu, Emrecan',
 'MaximilianWeiland': 'Weiland, Maximilian',
 'AnLeWe': 'Werner, Anna',
 'xiaoxuanzhang1': 'Zhang, Xiaoxuan'}

### Please don't change your usernames guys!

In [27]:
def create_feedback_files(folder, identifiers, assignment, total_points, schema):
    '''
    This function takes a folder (e.g. 'submissions') and an identifier dict
    with students names and github handles to create a feedback.txt draft
    in each student's personal submission folder for the current assignment.
    
    folder: the folder where the student submission folders are located
    identifiers: dict of form {'githubHandle' : 'studentName'}
    assignment: int # of the current assignment (e.g. 3)
    total_points: int # of the assignment's total points
    schema: string grading schema of the given assignment
    '''
    student_folders = [f.name for f in os.scandir(folder) if f.is_dir()]

    overwrite = False
    temp = []
    
    for student_folder in student_folders:
        student_name = None
        for key, value in identifiers.items():
            if key == student_folder:
                student_name = value
        
        header = f'Feedback Assignment {assignment}\n-----\n{student_name}: /{total_points} Points'#  \nAchieved Bonus: '
        header_undefined = f'Feedback Assignment {assignment}\n-----\n: /{total_points} Points'

        if os.path.isfile(f'{folder}/{student_folder}/feedback_{assignment}.txt'):
            if not overwrite and len(temp) == 0:
                overwrite_file = input('Files already exist. Overwrite: ')
                temp.append(1)
                if overwrite_file == 'yes':
                    overwrite = True

        else:
            if student_name != None:            
                with open(f'{folder}/{student_folder}/feedback_{assignment}.txt', 'w', encoding='utf-8') as f:
                    f.write(f'{header}\n{schema}')
                f.close()
            
            else:
                with open(f'{folder}/{student_folder}/feedback_{assignment}.txt', 'w', encoding='utf-8') as f:
                    f.write(f'{header_undefined}\n{schema}')
                f.close()
        
        if student_name != None:            
            if overwrite:
                with open(f'{folder}/{student_folder}/feedback_{assignment}.txt', 'w', encoding='utf-8') as f:
                    f.write(f'{header}\n{schema}')
                f.close()
        else:
            if overwrite == 'yes':
                with open(f'{folder}/{student_folder}/feedback_{assignment}.txt', 'w', encoding='utf-8') as f:
                    f.write(f'{header_undefined}\n{schema}')
                f.close()

In [28]:
# specify the text that is added to the feedback files
with open('sources/grading_schema.txt', 'r') as f:
    grading_schema = f.read()
f.close()

# Create the feedback files for each student
if pull:
    create_feedback_files(folder='submissions', 
                          identifiers=identifier_dict, 
                          assignment=assignment_number, 
                          total_points=total_points,
                          schema=grading_schema)

## Push feedback from local to GitHub

In [14]:
ignore_repos = ["submission-TimothyGGGGG", "template", "smda-2025-classroom-submission-template", "forum"]
#ignore_repos = []
org_repos = [repo for repo in list(org.get_repos()) if repo.name not in ignore_repos]
org_repos

[Repository(full_name="SMDA-2025/submission-KatrinSauter"),
 Repository(full_name="SMDA-2025/submission-anastasiasiebers"),
 Repository(full_name="SMDA-2025/submission-bacherni"),
 Repository(full_name="SMDA-2025/submission-ekaterinakabashko"),
 Repository(full_name="SMDA-2025/submission-MaximilianWeiland"),
 Repository(full_name="SMDA-2025/submission-MouPas"),
 Repository(full_name="SMDA-2025/submission-Jonah879"),
 Repository(full_name="SMDA-2025/submission-samrauh"),
 Repository(full_name="SMDA-2025/submission-sinamayer"),
 Repository(full_name="SMDA-2025/submission-cgl-04"),
 Repository(full_name="SMDA-2025/submission-cristinaflintoaca"),
 Repository(full_name="SMDA-2025/submission-NinaGeyer-KN"),
 Repository(full_name="SMDA-2025/submission-AnLeWe"),
 Repository(full_name="SMDA-2025/submission-vulonviing"),
 Repository(full_name="SMDA-2025/submission-Jonaschrade"),
 Repository(full_name="SMDA-2025/submission-KMayer24"),
 Repository(full_name="SMDA-2025/submission-fabioconsiglio"),


In [15]:
ignore_users

org_repos[23]

Repository(full_name="SMDA-2025/submission-TimothyGGGGG-1")

In [16]:
collaborators = org_repos[23].get_collaborators()
[col for col in collaborators if col.login not in ignore_users]

[NamedUser(login="TimothyGGGGG")]

In [17]:
def push_feedback(repos, folder, assignment, ignore):
    '''
    This function pushes the feedback files for a given assignment to
    the students' personal github repositories.
    
    repos: the organization repositories (each students personal repo)
    folder: string of folder where student repos are stored locally
    assignment: int # of the current assignment
    ignore: list of githandles that should be ignored (e.g. admins)
    '''
    fails = []
    
    for repo in tqdm(repos):
        fail = False
        collaborators = repo.get_collaborators()
        collaborators = [col for col in collaborators if col.login not in ignore]
        
        
        student = collaborators[0].login
        student_folder = f'{folder}/{student}'
        feedback_file = f'feedback_{assignment}.txt'
        feedback_file_path = f'{student_folder}/{feedback_file}'
        
        try:
            with open(feedback_file_path, 'r') as f:
                feedback_content = f.read()
            f.close()
            if re.match(r'.*\n-----\n.+,.+: \/\d{2} Points.*', feedback_content, re.MULTILINE):
                feedback_content = False
        except:
            feedback_content = False            
            
        commit_message = f'uploaded feedback_{assignment}'
        
        if not feedback_content:
            fail = f'Empty feedback file: {student}'
        else:
            try:
                repo.create_file(feedback_file, content=feedback_content, message=commit_message)
            except:
                fail = f'Git error: {student}'
        
        if fail != False:
            fails.append(fail)
    
    fails.sort(key=lambda entry: entry.upper())
    return fails

In [18]:
# Push Feedback

if not pull:
    org_repos = [repo for repo in list(org.get_repos()) if repo.name not in ignore_repos]
    
    # fetch submissions
    feedback_fails = push_feedback(repos=org_repos, folder='submissions', assignment=assignment_number, ignore=ignore_users)
    # show students for whom push failed
    for fail in feedback_fails:
        print(feedback_fails)

100%|██████████| 25/25 [00:34<00:00,  1.38s/it]

['Git error: anastasiasiebers', 'Git error: AnLeWe', 'Git error: Ari-manius', 'Git error: bacherni', 'Git error: cgl-04', 'Git error: cristinaflintoaca', 'Git error: DajanaHennig', 'Git error: ekaterinakabashko', 'Git error: fabioconsiglio', 'Git error: Guacabmole', 'Git error: jan-thiele7', 'Git error: Jonah879', 'Git error: Jonaschrade', 'Git error: KatrinSauter', 'Git error: KMayer24', 'Git error: lorenzrck', 'Git error: MaximilianWeiland', 'Git error: MouPas', 'Git error: NinaGeyer-KN', 'Git error: samrauh', 'Git error: sinamayer', 'Git error: vulonviing', 'Git error: xiaoxuanzhang1']
['Git error: anastasiasiebers', 'Git error: AnLeWe', 'Git error: Ari-manius', 'Git error: bacherni', 'Git error: cgl-04', 'Git error: cristinaflintoaca', 'Git error: DajanaHennig', 'Git error: ekaterinakabashko', 'Git error: fabioconsiglio', 'Git error: Guacabmole', 'Git error: jan-thiele7', 'Git error: Jonah879', 'Git error: Jonaschrade', 'Git error: KatrinSauter', 'Git error: KMayer24', 'Git error: 




## Create .gitignore File in Student Repos

In [None]:
# if test:
#     org_repos = [repo for repo in list(org.get_repos()) if repo.name not in ignore_repos][:1]
# else:
#     org_repos = [repo for repo in list(org.get_repos()) if repo.name not in ignore_repos]

# fails = []

# for repo in tqdm(org_repos):
#     try:
#         repo.create_file('.gitignore', content='.ipynb_checkpoints/', message='uploaded .gitignore')
#     except:
#         fails.append(repo)

# for fail in fails:
#     print(fail)