In [3]:
import requests
import pandas as pd
from datetime import datetime
import re

# API Magang Kampus Merdeka
api_url = "https://api.kampusmerdeka.kemdikbud.go.id/magang/browse/opportunities"

def get_data_from_api(url):
    response = requests.get(url)
    if response.status_code == 200:
        return response.json()
    else:
        response.raise_for_status()

def filter_it_opportunities(data):
    it_keywords = [
        'IT', 'Information Technology', 'Software', 'Developer', 'Programmer', 'Engineer',
        'Teknologi Informasi', 'Web', 'Data', 'Database', 'Cyber', 'Security', 'Network',
        'Cloud', 'System', 'Application', 'App', 'AI', 'Artificial Intelligence', 'Machine Learning', 'ML',
        'DevOps', 'Blockchain', 'Full Stack', 'Frontend', 'Backend', 'QA', 'Quality Assurance', 'UX', 'UI',
        'Technology', 'Coding', 'Programming'
    ]

    # Create a regex pattern that matches any of the keywords
    pattern = re.compile(r'\b(' + '|'.join(it_keywords) + r')\b', re.IGNORECASE)

    # Create a regex pattern that matches any of the keywords except "engineer"
    non_engineer_keywords = [kw for kw in it_keywords if kw.lower() != 'engineer']
    non_engineer_pattern = re.compile(r'\b(' + '|'.join(non_engineer_keywords) + r')\b', re.IGNORECASE)

    it_opportunities = []

    for item in data:
        if item.get('opportunity_type', '').upper() == 'MSIB':
            name = item.get('name', '').lower()
            if pattern.search(name):
                opportunity_link = f"https://kampusmerdeka.kemdikbud.go.id/program/magang/browse/{item.get('mitra_id', '')}/{item.get('id', '')}"
                # Check that engineer must contain other keyword to avoid non-IT engineer opportunities (e.g. civil engineer, mechanical engineer, etc.)
                if 'engineer' in name:
                    if non_engineer_pattern.search(name):
                        it_opportunities.append({
                            'name': item.get('name', ''),
                            'mitra_brand_name': item.get('mitra_brand_name', ''),
                            'location': item.get('location', ''),
                            'link': opportunity_link
                        })
                else:
                    it_opportunities.append({
                        'name': item.get('name', ''),
                        'mitra_brand_name': item.get('mitra_brand_name', ''),
                        'location': item.get('location', ''),
                        'link': opportunity_link
                    })


    return it_opportunities

data = get_data_from_api(api_url)

it_opportunities = filter_it_opportunities(data['data']) 

now = datetime.now()
timestamp = now.strftime("%Y-%m-%d")
filename = f'MSIB_IT_updated_at_{timestamp}.xlsx'

df = pd.DataFrame(it_opportunities)
df.to_excel(filename, index=False)
df.to_csv(filename.replace('xlsx', 'csv'), index=False)

print(f"Found {len(it_opportunities)} IT opportunities.")


Found 1129 IT opportunities.
