# 새로운 칼럼 생성

In [None]:
import pandas as pd
from datetime import timedelta
from datetime import datetime

# 예시 데이터 생성
data = {
    'patientid': [1, 2, 3],
    'age': [45, 34, 60],
    'gender': ['Male', 'Female', 'Female'],
    'bloodtype': ['O', 'A', 'B'],
    'diseases': ['Diabetes', 'Asthma', 'Hypertension'],
    'period': ['Every 6 months', 'Every 1 year', 'Every 3 months'],
    'visitDate': [datetime(2024, 5, 15), datetime(2024, 7, 20), datetime(2024, 6, 30)],
    'address': [
        '서울특별시 종로구 세종대로 1',
        '서울특별시 강남구 테헤란로 123',
        '부산광역시 해운대구 센텀동로 45'
    ]
}

# Convert to DataFrame
df = pd.DataFrame(data)

In [None]:
df

Unnamed: 0,patientid,age,gender,bloodyype,diseases,period,visitDate,address
0,1,45,Male,O,Diabetes,Every 6 months,2024-05-15,서울특별시 종로구 세종대로 1
1,2,34,Female,A,Asthma,Every 1 year,2024-07-20,서울특별시 강남구 테헤란로 123
2,3,60,Female,B,Hypertension,Every 3 months,2024-06-30,부산광역시 해운대구 센텀동로 45


In [None]:
# 이전 진료날짜 데이터 타입 변환
df['visitDate'] = pd.to_datetime(df['visitDate'])

In [None]:
# 날짜 계산 함수
def calculate_next_visit(row):
    # Previous Visit Date를 datetime 객체로 변환
    prev_visit_date = pd.to_datetime(row['visitDate'])  # 이미 pandas의 Timestamp 형식으로 변환
    visit_freq = row['period']

    # Visit Frequency에 따른 날짜 계산
    if visit_freq == 'Every 6 months':
        next_visit_date = prev_visit_date + timedelta(days=180)  # 6개월 = 약 180일
    elif visit_freq == 'Every 1 year':
        next_visit_date = prev_visit_date + timedelta(days=365)  # 1년 = 365일
    elif visit_freq == 'Every 3 months':
        next_visit_date = prev_visit_date + timedelta(days=90)  # 3개월 = 약 90일
    elif visit_freq == 'Every 2 months':
        next_visit_date = prev_visit_date + timedelta(days=60)  # 2개월 = 약 60일
    else:
        next_visit_date = prev_visit_date  # 기본값 (오류 방지용)

    return next_visit_date.strftime('%Y-%m-%d')  # 결과를 문자열 형식으로 반환

In [None]:
# 날짜 계산 적용
df['resDate'] = df.apply(calculate_next_visit, axis=1)

In [None]:
df

Unnamed: 0,patientid,age,gender,bloodyype,diseases,period,visitDate,address,resDate
0,1,45,Male,O,Diabetes,Every 6 months,2024-05-15,서울특별시 종로구 세종대로 1,2024-11-11
1,2,34,Female,A,Asthma,Every 1 year,2024-07-20,서울특별시 강남구 테헤란로 123,2025-07-20
2,3,60,Female,B,Hypertension,Every 3 months,2024-06-30,부산광역시 해운대구 센텀동로 45,2024-09-28


In [None]:
# 'resDate'를 datetime으로 변환
df['resDate'] = pd.to_datetime(df['resDate'])

# 오늘 날짜 가져오기
today = pd.to_datetime(date.today())

# 남은 일수를 계산하여 'remaining_days' 열에 저장
df['remaining_days'] = (df['resDate'] - today).dt.days

df

Unnamed: 0,patientid,age,gender,bloodyype,diseases,period,visitDate,address,resDate,remaining_days
0,1,45,Male,O,Diabetes,Every 6 months,2024-05-15,서울특별시 종로구 세종대로 1,2024-11-11,-11
1,2,34,Female,A,Asthma,Every 1 year,2024-07-20,서울특별시 강남구 테헤란로 123,2025-07-20,240
2,3,60,Female,B,Hypertension,Every 3 months,2024-06-30,부산광역시 해운대구 센텀동로 45,2024-09-28,-55


In [None]:
df

Unnamed: 0,patientid,age,gender,bloodyype,diseases,period,visitDate,address,resDate,remaining_days
0,1,45,Male,O,Diabetes,Every 6 months,2024-05-15,서울특별시 종로구 세종대로 1,2024-11-11,-11
1,2,34,Female,A,Asthma,Every 1 year,2024-07-20,서울특별시 강남구 테헤란로 123,2025-07-20,240
2,3,60,Female,B,Hypertension,Every 3 months,2024-06-30,부산광역시 해운대구 센텀동로 45,2024-09-28,-55


In [None]:
# geolocator 객체 생성
geolocator = Nominatim(user_agent="South Korea")

# 위도와 경도를 DataFrame에 추가
for index, row in df.iterrows():
    address = row['address']
    try:
        # 주소로부터 위도와 경도를 얻음
        location = geolocator.geocode(address)
        if location:
            df.at[index, 'latitude'] = location.latitude
            df.at[index, 'longitude'] = location.longitude
        else:
            df.at[index, 'latitude'] = None
            df.at[index, 'longitude'] = None
    except Exception as e:
        print(f"Error geocoding {address}: {e}")
        df.at[index, 'latitude'] = None
        df.at[index, 'longitude'] = None
    time.sleep(1)  # 요청 간에 1초 간격을 두기 위해

# 결과 확인
df

Unnamed: 0,patientid,age,gender,bloodyype,diseases,period,visitDate,address,resDate,remaining_days,latitude,longitude
0,1,45,Male,O,Diabetes,Every 6 months,2024-05-15,서울특별시 종로구 세종대로 1,2024-11-11,-11,37.565167,126.9785
1,2,34,Female,A,Asthma,Every 1 year,2024-07-20,서울특별시 강남구 테헤란로 123,2025-07-20,240,37.499571,127.031539
2,3,60,Female,B,Hypertension,Every 3 months,2024-06-30,부산광역시 해운대구 센텀동로 45,2024-09-28,-55,35.173223,129.130352


In [None]:
print(df)

   patientid  age  gender bloodyype      diseases          period  visitDate  \
0          1   45    Male         O      Diabetes  Every 6 months 2024-05-15   
1          2   34  Female         A        Asthma    Every 1 year 2024-07-20   
2          3   60  Female         B  Hypertension  Every 3 months 2024-06-30   

              address    resDate  remaining_days   latitude   longitude  
0    서울특별시 종로구 세종대로 1 2024-11-11             -11  37.565167  126.978500  
1  서울특별시 강남구 테헤란로 123 2025-07-20             240  37.499571  127.031539  
2  부산광역시 해운대구 센텀동로 45 2024-09-28             -55  35.173223  129.130352  


In [None]:
# 환자 리스트 생성
patient_list = []
for _, row in df.iterrows():
    patient_info = {
        'patientid': row['patientid'],
        'remaining_days': row['remaining_days'],
        'location':[row['latitude'], row['longitude']]
    }
    patient_list.append(patient_info)

# 결과 출력
for patient in patient_list:
    print(patient)

{'patientid': 1, 'remaining_days': -11, 'location': [37.5651673, 126.9785]}
{'patientid': 2, 'remaining_days': 240, 'location': [37.4995708, 127.0315389]}
{'patientid': 3, 'remaining_days': -55, 'location': [35.1732234, 129.13035168377394]}


In [None]:
patient_list

[{'patientid': 1, 'remaining_days': -11, 'location': [37.5651673, 126.9785]},
 {'patientid': 2,
  'remaining_days': 240,
  'location': [37.4995708, 127.0315389]},
 {'patientid': 3,
  'remaining_days': -55,
  'location': [35.1732234, 129.13035168377394]}]

# 샘플데이터 생성

In [None]:
pip install faker

Collecting faker
  Downloading Faker-33.0.0-py3-none-any.whl.metadata (15 kB)
Downloading Faker-33.0.0-py3-none-any.whl (1.9 MB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/1.9 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.2/1.9 MB[0m [31m5.0 MB/s[0m eta [36m0:00:01[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━[0m [32m1.3/1.9 MB[0m [31m18.5 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.9/1.9 MB[0m [31m19.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: faker
Successfully installed faker-33.0.0


In [None]:
import pandas as pd
import random
from geopy.geocoders import Nominatim

# Geopy 초기화
geolocator = Nominatim(user_agent="geoapi")

# 성남시 수정구 태평동 근처 좌표 설정
base_latitude = 37.4409  # 성남시 수정구 태평동 근처 위도
base_longitude = 127.1376  # 성남시 수정구 태평동 근처 경도

# 랜덤 좌표 생성 함수
def generate_random_coordinates(base_lat, base_lon, radius=0.01):
    lat = base_lat + random.uniform(-radius, radius)  # 약 1km 반경
    lon = base_lon + random.uniform(-radius, radius)
    return lat, lon

# 좌표를 주소로 변환하는 함수
def get_address_from_coordinates(lat, lon):
    try:
        location = geolocator.reverse((lat, lon), exactly_one=True, language="ko")
        return location.address if location else "Unknown address"
    except Exception as e:
        return "Geocoding failed"

# 데이터 생성
data = []
for patient_id in range(1, 16):
    lat, lon = generate_random_coordinates(base_latitude, base_longitude)
    address = get_address_from_coordinates(lat, lon)
    data.append({
        "patientid": patient_id,
        "age": random.randint(20, 80),
        "gender": random.choice(["Male", "Female"]),
        "bloodType": random.choice(["A", "B", "O", "AB"]),
        "diseases": random.choice(["Diabetes", "Asthma", "Hypertension", "Cancer", "Obesity", "Arthritis"]),
        "period": random.choice(["Every 1 year", "Every 6 months", "Every 3 months", "Every 2 months"]),
        "visitDate": pd.Timestamp('2024-01-01') + pd.to_timedelta(random.randint(0, 365), unit='d'),
        "address": address
    })

# 데이터프레임 생성
sam = pd.DataFrame(data)
sam

Unnamed: 0,patientid,age,gender,bloodType,diseases,period,visitDate,address
0,1,60,Male,AB,Obesity,Every 2 months,2024-08-26,"수정로, 신흥동, 수정구, 성남시, 경기도, 13259, 대한민국"
1,2,68,Female,AB,Hypertension,Every 3 months,2024-05-01,"두원약국, 2095, 광명로, 성남동, 중원구, 성남시, 경기도, 13361, 대한민국"
2,3,63,Male,B,Diabetes,Every 6 months,2024-07-04,"원터로75번길, 하대원동, 중원구, 성남시, 경기도, 13361, 대한민국"
3,4,41,Male,A,Asthma,Every 1 year,2024-03-03,"원터로38번길, 하대원동, 밭, 중원구, 성남시, 경기도, 13388, 대한민국"
4,5,59,Female,B,Cancer,Every 3 months,2024-04-15,"시민로, 중앙동, 중원구, 성남시, 경기도, 13344, 대한민국"
5,6,32,Male,B,Cancer,Every 6 months,2024-02-23,"수정로170번길, 신흥동, 수정구, 성남시, 경기도, 13338, 대한민국"
6,7,52,Female,AB,Obesity,Every 3 months,2024-05-07,"원터로69번길, 하대원동, 중원구, 성남시, 경기도, 13370, 대한민국"
7,8,78,Male,B,Diabetes,Every 2 months,2024-11-03,"아찌방, 165, 제일로, 태평동, 수정구, 성남시, 경기도, 13310, 대한민국"
8,9,34,Male,B,Diabetes,Every 6 months,2024-02-20,"원터로93번길, 하대원동, 중원구, 성남시, 경기도, 13361, 대한민국"
9,10,45,Male,B,Asthma,Every 3 months,2024-09-24,"성남중학교, 64, 원터로, 하대원동, 중원구, 성남시, 경기도, 13370, 대한민국"


In [None]:
import pandas as pd
from datetime import timedelta, date
from geopy.geocoders import Nominatim
import time

def process_patient_data(df):
    """
    Processes a DataFrame containing patient information:
    - Calculates next visit date based on the period.
    - Computes remaining days to the next visit.
    - Adds latitude and longitude based on the address.

    Args:
        df (pd.DataFrame): DataFrame with patient data.

    Returns:
        pd.DataFrame: Processed DataFrame with added 'resDate', 'remaining_days',
                      'latitude', and 'longitude' columns.
    """
    # Geolocator initialization
    geolocator = Nominatim(user_agent="South Korea")

    def calculate_next_visit(row):
        """Calculates the next visit date based on the visit frequency."""
        prev_visit_date = pd.to_datetime(row['visitDate'])
        visit_freq = row['period']

        if visit_freq == 'Every 6 months':
            next_visit_date = prev_visit_date + timedelta(days=180)
        elif visit_freq == 'Every 1 year':
            next_visit_date = prev_visit_date + timedelta(days=365)
        elif visit_freq == 'Every 3 months':
            next_visit_date = prev_visit_date + timedelta(days=90)
        elif visit_freq == 'Every 2 months':
            next_visit_date = prev_visit_date + timedelta(days=60)
        else:
            next_visit_date = prev_visit_date  # Default case

        return next_visit_date.strftime('%Y-%m-%d')

    # Add 'resDate' column
    df['resDate'] = df.apply(calculate_next_visit, axis=1)
    df['resDate'] = pd.to_datetime(df['resDate'])

    # Calculate remaining days
    today = pd.to_datetime(date.today())
    df['remaining_days'] = (df['resDate'] - today).dt.days

    # Add latitude and longitude columns based on addresses
    df['latitude'] = None
    df['longitude'] = None

    for index, row in df.iterrows():
        address = row['address']
        try:
            location = geolocator.geocode(address)
            if location:
                df.at[index, 'latitude'] = location.latitude
                df.at[index, 'longitude'] = location.longitude
            else:
                df.at[index, 'latitude'] = None
                df.at[index, 'longitude'] = None
        except Exception as e:
            print(f"Error geocoding {address}: {e}")
            df.at[index, 'latitude'] = None
            df.at[index, 'longitude'] = None

        time.sleep(1)  # To avoid overloading the geocoding service

    return df


In [None]:
sam_df = process_patient_data(sam)

In [None]:
sam_df

Unnamed: 0,patientid,age,gender,bloodType,diseases,period,visitDate,address,resDate,remaining_days,latitude,longitude
0,1,60,Male,AB,Obesity,Every 2 months,2024-08-26,"수정로, 신흥동, 수정구, 성남시, 경기도, 13259, 대한민국",2024-10-25,-28,37.449399,127.146015
1,2,68,Female,AB,Hypertension,Every 3 months,2024-05-01,"두원약국, 2095, 광명로, 성남동, 중원구, 성남시, 경기도, 13361, 대한민국",2024-07-30,-115,37.438083,127.144454
2,3,63,Male,B,Diabetes,Every 6 months,2024-07-04,"원터로75번길, 하대원동, 중원구, 성남시, 경기도, 13361, 대한민국",2024-12-31,39,37.433177,127.141611
3,4,41,Male,A,Asthma,Every 1 year,2024-03-03,"원터로38번길, 하대원동, 밭, 중원구, 성남시, 경기도, 13388, 대한민국",2025-03-03,101,37.430216,127.145124
4,5,59,Female,B,Cancer,Every 3 months,2024-04-15,"시민로, 중앙동, 중원구, 성남시, 경기도, 13344, 대한민국",2024-07-14,-131,37.439016,127.144996
5,6,32,Male,B,Cancer,Every 6 months,2024-02-23,"수정로170번길, 신흥동, 수정구, 성남시, 경기도, 13338, 대한민국",2024-08-21,-93,37.441691,127.139659
6,7,52,Female,AB,Obesity,Every 3 months,2024-05-07,"원터로69번길, 하대원동, 중원구, 성남시, 경기도, 13370, 대한민국",2024-08-05,-109,37.432673,127.14186
7,8,78,Male,B,Diabetes,Every 2 months,2024-11-03,"아찌방, 165, 제일로, 태평동, 수정구, 성남시, 경기도, 13310, 대한민국",2025-01-02,41,37.441035,127.131485
8,9,34,Male,B,Diabetes,Every 6 months,2024-02-20,"원터로93번길, 하대원동, 중원구, 성남시, 경기도, 13361, 대한민국",2024-08-18,-96,37.434334,127.141457
9,10,45,Male,B,Asthma,Every 3 months,2024-09-24,"성남중학교, 64, 원터로, 하대원동, 중원구, 성남시, 경기도, 13370, 대한민국",2024-12-23,31,37.432509,127.143972


In [None]:
# 환자 리스트 생성
patient_list = []
for _, row in sam_df.iterrows():
    patient_info = {
        'patientid': row['patientid'],
        'remaining_days': row['remaining_days'],
        'location':[row['latitude'], row['longitude']]
    }
    patient_list.append(patient_info)

# 결과 출력
for patient in patient_list:
    print(patient)

{'patientid': 1, 'remaining_days': -28, 'location': [37.449399149433056, 127.14601526356022]}
{'patientid': 2, 'remaining_days': -115, 'location': [37.4380831, 127.1444545]}
{'patientid': 3, 'remaining_days': 39, 'location': [37.433176613176435, 127.14161080088701]}
{'patientid': 4, 'remaining_days': 101, 'location': [37.43021647283937, 127.1451237800741]}
{'patientid': 5, 'remaining_days': -131, 'location': [37.43901598293697, 127.14499567791044]}
{'patientid': 6, 'remaining_days': -93, 'location': [37.44169056961609, 127.13965915647442]}
{'patientid': 7, 'remaining_days': -109, 'location': [37.43267341052862, 127.14186008911558]}
{'patientid': 8, 'remaining_days': 41, 'location': [37.4410347, 127.131485]}
{'patientid': 9, 'remaining_days': -96, 'location': [37.43433366005426, 127.14145748387472]}
{'patientid': 10, 'remaining_days': 31, 'location': [37.43250915, 127.14397198375899]}
{'patientid': 11, 'remaining_days': 23, 'location': [37.43163931627512, 127.1352877171919]}
{'patientid