In [1]:
import os
import requests
import shutil
import subprocess
from github import Github
from dotenv import load_dotenv
from datetime import datetime
from github import Github, GithubException
from datetime import datetime


# Setting up credentials 

In [2]:
load_dotenv()
GITLAB_TOKEN = os.getenv('GITLAB_TOKEN')
GITHUB_TOKEN = os.getenv('GITHUB_TOKEN')
GITHUB_USER = os.getenv('GITHUB_USER')
GITLAB_GROUP_ID = os.getenv('GITLAB_GROUP_ID')

In [3]:
GITLAB_GROUP_ID

'jcdemomig-group'

# Migration Settings

In [4]:
# Set to True to migrate issues (tickets, bug reports, etc.) from GitLab to GitHub
MIGRATE_ISSUES = True

# Set to True to migrate pull requests (called "Merge Requests" in GitLab) to GitHub PRs
MIGRATE_PRS = True

# Set to True to migrate the project's wiki content from GitLab to GitHub wiki
MIGRATE_WIKI = True

# Strategy for handling duplicate files during migration:
#   "rename"  -> If a duplicate file is found, keep both by renaming the new one
#   "replace" -> If a duplicate file is found, overwrite the old file with the new one
DUPLICATE_STRATEGY = "rename"  # or "replace"

# The local directory used for temporary storage during migration (repo clones, etc.)
LOCAL_BASE = "tmp_migration"

# Logging

In [5]:

def log(msg, loglines, printout=True):
    now = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    line = f"[{now}] {msg}"
    if printout: print(line)
    loglines.append(line)


# Core Migration Functions

In [6]:

def get_gitlab_repos(token, group):
    url = f"https://gitlab.com/api/v4/groups/{group}/projects?per_page=100"
    headers = {"PRIVATE-TOKEN": token}
    repos = []
    page = 1
    while True:
        resp = requests.get(url + f"&page={page}", headers=headers)
        if resp.status_code != 200:
            raise Exception(f"Failed to list GitLab projects: {resp.text}")
        batch = resp.json()
        if not batch:
            break
        repos += batch
        page += 1
    return repos

def github_repo_exists(user, repo, token):
    url = f"https://api.github.com/repos/{user}/{repo}"
    headers = {"Authorization": f"token {token}"}
    return requests.get(url, headers=headers).status_code == 200

def create_github_repo(user, repo, token, private=False):
    url = "https://api.github.com/user/repos"
    headers = {"Authorization": f"token {token}"}
    data = {"name": repo, "private": private}
    r = requests.post(url, headers=headers, json=data)
    return r.status_code == 201


def migrate_code(gitlab_repo, github_user, config, loglines):
    import os
    import shutil
    import subprocess

    repo_name = gitlab_repo['path']
    gl_clone_url = gitlab_repo['http_url_to_repo']
    gl_clone_url_with_token = gl_clone_url.replace("https://", f"https://oauth2:{config['GITLAB_TOKEN']}@")

    # === Create a unique local path using timestamp ===
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    local_path = os.path.join(config['LOCAL_BASE'], f"{repo_name}_{timestamp}")

    gh_push_url = f"https://{github_user}:{config['GITHUB_TOKEN']}@github.com/{github_user}/{repo_name}.git"

    original_dir = os.getcwd()
    try:
        if not os.path.exists(config['LOCAL_BASE']):
            os.makedirs(config['LOCAL_BASE'])
        subprocess.run(["git", "clone", "--mirror", gl_clone_url_with_token, local_path], check=True)
        log(f"Cloned {repo_name} from GitLab.", loglines)
    except Exception as e:
        log(f"❌ Failed to clone {repo_name}: {e}", loglines)
        return False

    if not github_repo_exists(github_user, repo_name, config['GITHUB_TOKEN']):
        is_private = not gitlab_repo["visibility"] == "public"
        if not create_github_repo(github_user, repo_name, config['GITHUB_TOKEN'], is_private):
            log(f"❌ Failed to create {repo_name} on GitHub.", loglines)
            return False
    else:
        log(f"Repo {repo_name} already exists on GitHub.", loglines)

    try:
        os.chdir(local_path)
        subprocess.run(["git", "remote", "add", "github", gh_push_url], check=True)
        subprocess.run(["git", "push", "--mirror", "github"], check=True)
    except Exception as e:
        log(f"❌ Failed to push {repo_name} to GitHub: {e}", loglines)
        os.chdir(original_dir)
        return False
    finally:
        os.chdir(original_dir)

    # Instead of deleting, just log where the folder is
    log(f"Local clone kept at {local_path} for {repo_name}. You can delete it manually later.", loglines)
    return True


# Issues, PRs, Wiki, Milestones, Labels, Attachments, Comments, Author Mapping

In [10]:

def migrate_labels_milestones(gitlab_repo, gh_repo, loglines):
    # Labels
    labels_url = f"https://gitlab.com/api/v4/projects/{gitlab_repo['id']}/labels"
    headers = {"PRIVATE-TOKEN": GITLAB_TOKEN}
    for gl_label in requests.get(labels_url, headers=headers).json():
        try:
            gh_repo.create_label(name=gl_label['name'], color='FFFFFF', description=gl_label.get('description', ''))
            log(f"Created label {gl_label['name']}", loglines)
        except GithubException:
            pass  # Label may already exist

    # Milestones
    milestones_url = f"https://gitlab.com/api/v4/projects/{gitlab_repo['id']}/milestones"
    for gl_ms in requests.get(milestones_url, headers=headers).json():
        try:
            gh_repo.create_milestone(title=gl_ms['title'], state='closed' if gl_ms['state'] == 'closed' else 'open', description=gl_ms.get('description', ''))
            log(f"Created milestone {gl_ms['title']}", loglines)
        except GithubException:
            pass

def migrate_issues(gitlab_repo, gh_repo, loglines):
    issues_url = f"https://gitlab.com/api/v4/projects/{gitlab_repo['id']}/issues?scope=all&per_page=100"
    headers = {"PRIVATE-TOKEN": GITLAB_TOKEN}
    issues = requests.get(issues_url, headers=headers).json()
    for gl_issue in issues:
        # Map labels
        labels = [label for label in gl_issue.get('labels', [])]
        milestone = None
        if gl_issue.get('milestone'):
            milestone = gl_issue['milestone']['title']
        # Compose body
        body = f"**Imported from GitLab issue #{gl_issue['iid']} by {gl_issue.get('author', {}).get('username', 'unknown')}**\n\n"
        body += gl_issue.get('description', '')
        # Create issue
        gh_issue = gh_repo.create_issue(
            title=gl_issue['title'],
            body=body,
            labels=labels,
        )
        # Comments
        notes_url = f"https://gitlab.com/api/v4/projects/{gitlab_repo['id']}/issues/{gl_issue['iid']}/notes"
        comments = requests.get(notes_url, headers=headers).json()
        for comment in comments:
            if comment['system']: continue
            gh_issue.create_comment(f"**Original comment by {comment.get('author', {}).get('username', 'unknown')}**:\n\n{comment['body']}")
        # Close if needed
        if gl_issue['state'] == 'closed':
            gh_issue.edit(state='closed')
        log(f"Issue '{gl_issue['title']}' migrated.", loglines)
        
        
def migrate_prs(gitlab_repo, gh_repo, loglines):
    mrs_url = f"https://gitlab.com/api/v4/projects/{gitlab_repo['id']}/merge_requests?scope=all&per_page=100"
    headers = {"PRIVATE-TOKEN": GITLAB_TOKEN}
    mrs = requests.get(mrs_url, headers=headers).json()
    for mr in mrs:
        body = f"**Imported from GitLab MR !{mr['iid']} by {mr.get('author', {}).get('username', 'unknown')}**\n\n"
        body += mr.get('description', '')
        try:
            gh_pr = gh_repo.create_pull(
                title=mr['title'],
                body=body,
                head=mr['source_branch'],
                base=mr['target_branch']
            )
            # Comments (notes)
            notes_url = f"https://gitlab.com/api/v4/projects/{gitlab_repo['id']}/merge_requests/{mr['iid']}/notes"
            comments = requests.get(notes_url, headers=headers).json()
            for comment in comments:
                if comment['system']: continue
                gh_pr.create_issue_comment(f"**Original comment by {comment.get('author', {}).get('username', 'unknown')}**:\n\n{comment['body']}")
            if mr['state'] == 'merged':
                gh_pr.edit(state='closed')  # Can't merge via API, so we close it
            log(f"Pull Request '{mr['title']}' migrated.", loglines)
        except GithubException as e:
            if "No commits between" in str(e):
                log(f"Skipping PR '{mr['title']}': no commits between {mr['source_branch']} and {mr['target_branch']}.", loglines)
            else:
                log(f"Failed to migrate PR '{mr['title']}': {e}", loglines)
        except Exception as e:
            log(f"Failed to migrate PR '{mr['title']}': {e}", loglines)    

def migrate_wiki(gitlab_repo, gh_repo, loglines):
    # Download the GitLab wiki as a git repo, push to GitHub wiki
    if not gitlab_repo.get('wiki_enabled'):
        log(f"No wiki enabled for {gitlab_repo['path']}", loglines)
        return
    repo_name = gitlab_repo['path']
    gl_wiki_url = gitlab_repo['http_url_to_repo'].replace('.git', '.wiki.git')
    gh_wiki_url = f"https://{GITHUB_USER}:{GITHUB_TOKEN}@github.com/{GITHUB_USER}/{repo_name}.wiki.git"
    local_path = os.path.join(LOCAL_BASE, f"{repo_name}_wiki")
    try:
        if os.path.exists(local_path):
            shutil.rmtree(local_path)
        subprocess.run(["git", "clone", "--bare", gl_wiki_url, local_path], check=True)
        os.chdir(local_path)
        subprocess.run(["git", "remote", "add", "github", gh_wiki_url], check=True)
        subprocess.run(["git", "push", "--mirror", "github"], check=True)
        os.chdir("../..")
        log(f"Wiki migrated for {repo_name}", loglines)
    except Exception as e:
        log(f"Wiki migration failed for {repo_name}: {e}", loglines)
    finally:
        if os.path.exists(local_path):
            shutil.rmtree(local_path)


# Duplicate File Handling (Default Branch Only)

In [11]:
def handle_duplicates(repo_name, loglines, strategy="rename"):
    # Only for default branch files
    from github import Github
    g = Github(GITHUB_TOKEN)
    gh_repo = g.get_user().get_repo(repo_name)
    default_branch = gh_repo.default_branch
    files_seen = set()
    contents = gh_repo.get_contents("", ref=default_branch)
    while contents:
        file_content = contents.pop(0)
        if file_content.type == "dir":
            contents.extend(gh_repo.get_contents(file_content.path, ref=default_branch))
        else:
            if file_content.path in files_seen:
                if strategy == "rename":
                    new_path = file_content.path.replace('.', f'_DUPLICATE_{datetime.now().strftime("%Y%m%d%H%M%S")}.', 1)
                    gh_repo.create_file(new_path, "Duplicate renamed", file_content.decoded_content, branch=default_branch)
                    log(f"Duplicate {file_content.path} renamed to {new_path}", loglines)
                elif strategy == "replace":
                    # Already replaced by last push, nothing to do
                    log(f"Duplicate {file_content.path} replaced.", loglines)
            else:
                files_seen.add(file_content.path)


# Main Migration Runner

In [12]:
loglines = []
config = {
    "GITLAB_TOKEN": GITLAB_TOKEN,
    "GITHUB_TOKEN": GITHUB_TOKEN,
    "GITHUB_USER": GITHUB_USER,
    "LOCAL_BASE": LOCAL_BASE
}

repos = get_gitlab_repos(GITLAB_TOKEN, GITLAB_GROUP_ID)
print(f"Found {len(repos)} repos in GitLab group.")

gh = Github(GITHUB_TOKEN)
for repo in repos:
    repo_name = repo['path']
    log(f"==== Migrating repo: {repo_name} ====", loglines)
    code_ok = migrate_code(repo, GITHUB_USER, config, loglines)
    if not code_ok:
        continue
    gh_repo = gh.get_user().get_repo(repo_name)
    migrate_labels_milestones(repo, gh_repo, loglines)
    if MIGRATE_ISSUES:
        migrate_issues(repo, gh_repo, loglines)
    if MIGRATE_PRS:
        migrate_prs(repo, gh_repo, loglines)
    if MIGRATE_WIKI:
        migrate_wiki(repo, gh_repo, loglines)
    handle_duplicates(repo_name, loglines, strategy=DUPLICATE_STRATEGY)
    log(f"==== Done: {repo_name} ====", loglines)


Found 3 repos in GitLab group.
[2025-07-30 00:30:53] ==== Migrating repo: dummy-repo-32d0c5 ====
[2025-07-30 00:30:58] Cloned dummy-repo-32d0c5 from GitLab.
[2025-07-30 00:30:58] Repo dummy-repo-32d0c5 already exists on GitHub.
[2025-07-30 00:31:02] Local clone kept at tmp_migration\dummy-repo-32d0c5_20250730_003053 for dummy-repo-32d0c5. You can delete it manually later.
[2025-07-30 00:31:04] Issue 'Test Issue' migrated.
[2025-07-30 00:31:07] Pull Request 'Demo Merge Request' migrated.
[2025-07-30 00:31:54] Wiki migration failed for dummy-repo-32d0c5: Command '['git', 'push', '--mirror', 'github']' returned non-zero exit status 128.
[2025-07-30 00:31:56] ==== Done: dummy-repo-32d0c5 ====
[2025-07-30 00:31:56] ==== Migrating repo: gitlab-ci ====
[2025-07-30 00:32:01] Cloned gitlab-ci from GitLab.
[2025-07-30 00:32:01] Repo gitlab-ci already exists on GitHub.
[2025-07-30 00:32:05] Local clone kept at tmp_migration\gitlab-ci_20250730_003156 for gitlab-ci. You can delete it manually later

In [13]:
for line in loglines:
    print(line)

[2025-07-30 00:30:53] ==== Migrating repo: dummy-repo-32d0c5 ====
[2025-07-30 00:30:58] Cloned dummy-repo-32d0c5 from GitLab.
[2025-07-30 00:30:58] Repo dummy-repo-32d0c5 already exists on GitHub.
[2025-07-30 00:31:02] Local clone kept at tmp_migration\dummy-repo-32d0c5_20250730_003053 for dummy-repo-32d0c5. You can delete it manually later.
[2025-07-30 00:31:04] Issue 'Test Issue' migrated.
[2025-07-30 00:31:07] Pull Request 'Demo Merge Request' migrated.
[2025-07-30 00:31:54] Wiki migration failed for dummy-repo-32d0c5: Command '['git', 'push', '--mirror', 'github']' returned non-zero exit status 128.
[2025-07-30 00:31:56] ==== Done: dummy-repo-32d0c5 ====
[2025-07-30 00:31:56] ==== Migrating repo: gitlab-ci ====
[2025-07-30 00:32:01] Cloned gitlab-ci from GitLab.
[2025-07-30 00:32:01] Repo gitlab-ci already exists on GitHub.
[2025-07-30 00:32:05] Local clone kept at tmp_migration\gitlab-ci_20250730_003156 for gitlab-ci. You can delete it manually later.
[2025-07-30 00:32:14] Wiki mi