<a href="https://colab.research.google.com/github/hwasol/esg/blob/main/Untitled6.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [9]:
import pandas as pd
import numpy as np
import re

# 파일명 맞춰주세요
df = pd.read_csv('한국국제협력단_국별 개발협력동향_20250531.csv')

# 1. 키워드 사전 정의 (React 코드와 동일)
cooperationFields = {
    '인프라개발력': ['인프라','도로','교통','전력','에너지','수자원','상하수도','항만','공항','철도','건설'],
    '보건의료수준': ['보건','의료','병원','백신','치료','질병','건강','의약품','의료진','코로나'],
    '교육역량강화': ['교육','학교','대학','훈련','역량강화','기술전수','연수','교사','직업교육'],
    '농업현대화': ['농업','농촌','식량','농산물','축산','어업','농민','작물','스마트팜'],
    '디지털혁신도': ['디지털','IT','인터넷','통신','데이터','사이버','스마트','온라인','플랫폼'],
    '환경지속성': ['환경','기후','탄소','재생에너지','친환경','지속가능','녹색','청정에너지'],
    '경제개방도': ['무역','수출','투자','경제협력','자유무역협정','FTA','관세','시장개방'],
    '거버넌스수준': ['거버넌스','정부','행정','법률','제도','투명성','부패방지','민주주의']
}
growthPotential = {
    '경제성장동력': ['성장','GDP','경제발전','산업화','개발','확대','증가','향상','부가가치'],
    '혁신기술도입': ['혁신','기술','첨단','신기술','AI','자동화','스마트','4차산업혁명'],
    '국제협력활성': ['국제기구','World Bank','ADB','IMF','UN','다자협력','양자협력','국제사회'],
    '정책추진력': ['정책','개혁','법제도','규제완화','구조조정','현대화','전략','비전'],
    '사회발전도': ['사회발전','복지','빈곤퇴치','불평등','포용','취약계층','소외계층'],
    '지속가능전략': ['지속가능','SDGs','장기전략','미래','비전2030','목표달성']
}
cooperationIntensity = {
    '투자규모': ['백만 달러','억 달러','천만 달러','대규모','투자','자금지원','융자'],
    '사업지속성': ['장기','다년간','지속','단계별','확대','연장','지속적'],
    '협력다양성': ['다분야','통합','종합','패키지','다각도','포괄적'],
    '정부관여도': ['정부','장관','대통령','총리','고위급','정상회담','공식'],
    'KOICA연관성': ['KOICA','한국','코이카','우리나라','양국','한-']
}
riskFactors = {
    '정치안정성': ['선거','정치','시위','갈등','불안','혼란','위기'],
    '경제위험': ['인플레이션','금융위기','부채','경제난','재정','위기'],
    '사회갈등': ['분쟁','테러','종교','민족','갈등','사회불안'],
    '자연재해': ['홍수','지진','태풍','가뭄','자연재해','기후변화']
}
institutions = ['world bank', 'adb', 'imf', 'un', 'who', 'undp', 'unicef']

africaRegionMapping = {
    '이집트': '북아프리카','모로코': '북아프리카','알제리': '북아프리카','튀니지': '북아프리카',
    '나이지리아': '서아프리카','가나': '서아프리카','세네갈': '서아프리카','코트디부아르': '서아프리카',
    '말리': '서아프리카','부르키나파소': '서아프리카','베냉': '서아프리카',
    '케냐': '동아프리카','탄자니아': '동아프리카','우간다': '동아프리카','르완다': '동아프리카','마다가스카르': '동아프리카',
    '카메룬': '중앙아프리카','가봉': '중앙아프리카','콩고민주공화국': '중앙아프리카','DR Congo': '중앙아프리카',
    '남아프리카공화국': '남아프리카','앙골라': '남아프리카','잠비아': '남아프리카','보츠와나': '남아프리카','모잠비크': '남아프리카'
}
targetAfricanCountries = list(africaRegionMapping.keys())

# 본문 텍스트 합치기
def get_fulltext(row):
    texts = [str(row.get('제목','')), str(row.get('본문1','')), str(row.get('본문2','')), str(row.get('본문3',''))]
    return ' '.join(texts).lower()

df['fulltext'] = df.apply(get_fulltext, axis=1)

# 국가-지역별 데이터프레임 집계
profiles = {}
for _, row in df.iterrows():
    country = row['국가명']
    if country not in profiles:
        profiles[country] = {
            'region': row['지역'],
            'totalProjects': 0,
            'cooperation': {k:0 for k in cooperationFields},
            'growth': {k:0 for k in growthPotential},
            'intensity': {k:0 for k in cooperationIntensity},
            'risks': {k:0 for k in riskFactors},
            'institutions': set(),
            'sectors': set(),
        }
    profiles[country]['totalProjects'] += 1
    profiles[country]['sectors'].add(row.get('분야',''))

    # 협력 지표
    for k, kwlist in cooperationFields.items():
        profiles[country]['cooperation'][k] += sum(row['fulltext'].count(kw.lower()) for kw in kwlist)
    # 성장 잠재력
    for k, kwlist in growthPotential.items():
        profiles[country]['growth'][k] += sum(row['fulltext'].count(kw.lower()) for kw in kwlist)
    # 협력 강도
    for k, kwlist in cooperationIntensity.items():
        profiles[country]['intensity'][k] += sum(row['fulltext'].count(kw.lower()) for kw in kwlist)
    # 리스크
    for k, kwlist in riskFactors.items():
        profiles[country]['risks'][k] += sum(row['fulltext'].count(kw.lower()) for kw in kwlist)
    # 국제기구 언급
    for inst in institutions:
        if inst in row['fulltext']:
            profiles[country]['institutions'].add(inst.upper())

# 결과 데이터프레임 만들기
rows = []
for country in targetAfricanCountries:
    if country not in profiles:
        continue
    p = profiles[country]
    n = p['totalProjects']
    if n < 1: continue  # 1개 미만은 제외
    row = {
        '국가명': country,
        '세분화지역': africaRegionMapping[country],
        '총사업수': n,
        '분야다양성': len(p['sectors']),
        '국제기구수': len(p['institutions']),
    }
    # 협력 지표
    for k in cooperationFields:
        row[k] = round(p['cooperation'][k]/n,2)
    for k in growthPotential:
        row[k] = round(p['growth'][k]/n,2)
    for k in cooperationIntensity:
        row[k] = round(p['intensity'][k]/n,2)
    for k in riskFactors:
        row[k+'위험'] = round(p['risks'][k]/n,2)
    # 종합지표
    row['협력잠재력'] = round(np.mean([row['인프라개발력'],row['디지털혁신도'],row['경제개방도'],row['거버넌스수준']]),2)
    row['성장가능성'] = round(np.mean([row['경제성장동력'],row['혁신기술도입'],row['국제협력활성'],row['정책추진력']]),2)
    row['한국연관성'] = round(np.mean([row['KOICA연관성'],row['투자규모'],row['사업지속성']]),2)
    rows.append(row)

df_profile = pd.DataFrame(rows)
print(f"아프리카 24개국 분석 완료! shape={df_profile.shape}")
df_profile.head()


아프리카 24개국 분석 완료! shape=(16, 31)


Unnamed: 0,국가명,세분화지역,총사업수,분야다양성,국제기구수,인프라개발력,보건의료수준,교육역량강화,농업현대화,디지털혁신도,...,협력다양성,정부관여도,KOICA연관성,정치안정성위험,경제위험위험,사회갈등위험,자연재해위험,협력잠재력,성장가능성,한국연관성
0,이집트,북아프리카,15,8,1,1.33,2.07,0.13,0.13,0.6,...,0.47,1.93,0.27,0.0,0.13,0.0,0.0,0.77,0.66,0.4
1,모로코,북아프리카,12,6,0,2.58,0.0,1.0,0.58,0.08,...,0.33,1.5,0.92,0.08,0.17,0.0,0.25,0.96,0.79,1.22
2,알제리,북아프리카,10,6,2,2.7,3.0,0.7,1.0,2.7,...,0.3,1.5,0.5,0.1,0.0,0.0,0.0,1.92,1.27,1.03
3,튀니지,북아프리카,12,8,2,1.67,1.67,1.0,0.5,1.17,...,0.17,0.5,0.0,0.08,0.25,0.0,0.0,0.77,0.79,0.17
4,나이지리아,서아프리카,11,6,2,0.45,0.18,0.18,1.09,1.18,...,0.73,1.91,0.09,0.09,0.0,0.27,0.0,0.82,1.16,0.7


In [10]:
# 국가명+세분화지역+수치지표 전체 저장
df_profile.to_csv('pca_input.csv', index=False, encoding='cp949')
from google.colab import files
files.download('pca_input.csv')


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>