In [None]:
import pandas as pd
import numpy as np

import asyncio
import aiohttp
import urllib.parse
from tqdm.asyncio import tqdm_asyncio

everytime_session = "s%3AnI6LmfD5XwBfXxbBDEMsKLZHPNWGXf6Y.mlnak0kWR%2ByeZtRhRGJLpLHidG5Te5CRBQDU5UoqIHU" # session

# professor names
prof = ["고영웅","김동회","김백섭","김병정","김선정","김유섭","김은주","김의직","김점근",
        "김종대","김진","문규","박섭형","박찬영","방성근","배무호","송성호","송창근","신미영",
        "안재목","양은샘","윤지희","이선우","이용업","이은주","이재영","이정근","정재동","주한규",
        "노원종","김진국","김진환","임성훈","허종욱","곽병일","박현제","최종환","김효승","신범주"]

headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:138.0) Gecko/20100101 Firefox/138.0",
    "Accept": "application/json, text/plain, */*",
    "Accept-Language": "en-US,en;q=0.5",
    "Content-Type": "application/x-www-form-urlencoded",
    "Cookie": f"etsid={everytime_session}"
}

async def fetch_lectures(session, name):
    url = "https://api.everytime.kr/find/lecture/list/keyword"
    data = {
        "campusId": "0",
        "field": "professor",
        "keyword": name,
        "limit": "10000",
        "offset": "0"
    }
    async with session.post(url, headers=headers, data=urllib.parse.urlencode(data)) as response:
        if response.status == 200:
            json_data = await response.json()
            return json_data.get("result", {}).get("lectures", [])
        else:
            print(f"[Lecture] {name} failed with status {response.status}")
            return []


async def fetch_articles(session, lecture):
    url = "https://api.everytime.kr/find/lecture/article/list"
    data = {
        "lectureId": lecture["id"],
        "limit": "10000",
        "offset": "0",
        "sort": "id"
    }
    async with session.post(url, headers=headers, data=urllib.parse.urlencode(data)) as response:
        if response.status == 200:
            json_data = await response.json()
            lecture["articles"] = json_data.get("result", {}).get("articles", [])
        else:
            print(f"[Article] {lecture['id']} failed with status {response.status}")
            lecture["articles"] = []
    return lecture


async def main():
    async with aiohttp.ClientSession() as session:
        # Step 1: Fetch lectures for each professor
        lecture_tasks = [fetch_lectures(session, name) for name in prof]
        lectures = await tqdm_asyncio.gather(*lecture_tasks)
        lectures = [lec for sublist in lectures for lec in sublist]

        # Step 2: Fetch articles for each lecture concurrently
        article_tasks = [fetch_articles(session, lecture) for lecture in lectures]
        lectures_with_articles = await tqdm_asyncio.gather(*article_tasks)

        return lectures_with_articles


df = pd.DataFrame(await main())
df.columns = ["lecture_id", "lecture_name", "professor", "lecture_rate", "articles"]

100%|██████████| 39/39 [00:00<00:00, 48.48it/s]
100%|██████████| 690/690 [00:02<00:00, 330.48it/s]


In [None]:
df = df.explode("articles").reset_index(drop=True)

# Step 3: Normalize the review dicts into separate columns
articles = pd.json_normalize(df['articles'])

# Step 4: Drop the old 'reviews' column and concatenate the normalized review columns
df = pd.concat([df.drop(columns=["articles"]), articles], axis=1)

In [None]:
df = df[["professor", "lecture_id", "lecture_name", "year", "semester", "text", "rate", "posvote"]]

In [None]:
df = df[df["professor"].isin(prof)]

In [None]:
df = df.convert_dtypes(convert_integer=True)

In [None]:
df

Unnamed: 0,professor,lecture_id,lecture_name,year,semester,text,rate,posvote
0,고영웅,2338758,소프트웨어캡스톤디자인,2024,1,점수도 잘나오고 기부니가 좋다 열심히만 하면 되는듯 교수님이 말씀하신거 다 수용하고,5,0
1,고영웅,2338758,소프트웨어캡스톤디자인,2024,1,이번에 링크사업단으로 넘어가면서 교수님이 잘 모르시는게 많았음 그리고 학생들끼리하는...,2,0
2,고영웅,2338758,소프트웨어캡스톤디자인,2023,2,캡스톤 졸업만 하고싶다는 생각이면 다른 분반을 추천합니다 졸업만 하자라는 생각으로 ...,5,1
3,고영웅,2338758,소프트웨어캡스톤디자인,2023,2,이미 질리도록 들으셨겠지만 그 만큼 캡스톤 같이 할 팀원 잘 찾는게 좋습니다... ...,5,1
4,고영웅,2338758,소프트웨어캡스톤디자인,2023,1,소융대 학생들에 수준을 알 수 있는 수업 4년동안 무엇을 배웠는지 모르는 사람들이 ...,5,1
...,...,...,...,...,...,...,...,...
4465,신범주,2700057,파이썬과학프로그래밍기초,2024,1,교수님 열정도 좋으시고 질문하면 이해할때까지 다 햐주시고 본인도 직접 하셔서 해결할...,5,0
4466,신범주,2700045,머신러닝,2024,1,교수님이 열심히는 가르쳐 주시는데 너무 설명을 어렵게 해주신다고 느꼈습니다. 그래도...,3,0
4467,신범주,2700045,머신러닝,2024,1,어려움 처음 배우면서 이걸 들으니 못 따라감 저학년은 듣지 마세요,3,0
4468,신범주,2700045,머신러닝,2024,1,머신러닝의 알고리즘을 수학적 수식으로 설명하는 수업 선수 학습내용(거의 필수)...,5,0


In [None]:
# prompt: save df to excel

df.to_csv("everytime_reviews.csv", index=False)

In [None]:
df.dropna()["professor"].value_counts()

Unnamed: 0_level_0,count
professor,Unnamed: 1_level_1
신미영,284
양은샘,244
김은주,228
이정근,217
김유섭,133
김선정,112
