In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Professor names
professor_names = ['김유섭', '김은주', '이정근', '양은샘', '신미영', '김선정']

# Everytime session
everytime_token = ''

### Fetch lecture reviews from Everytime

In [2]:
import asyncio
import aiohttp
import urllib.parse

class Everytime:
    def __init__(self, token):
        self.token = token
        self.session = None

    async def __aenter__(self):
        self.session = aiohttp.ClientSession()
        return self

    async def __aexit__(self, exc_type, exc, tb):
        await self.session.close()

    async def fetch_lectures(self, name):
        url = 'https://api.everytime.kr/find/lecture/list/keyword'
        headers = {
            'User-Agent': 'Chrome/137.0.0.0',
            'Accept': 'application/json, text/plain, */*',
            'Content-Type': 'application/x-www-form-urlencoded',
            'Cookie': f'etsid={self.token}'
        }
        data = urllib.parse.urlencode({
            'campusId': '0',
            'field': 'professor',
            'keyword': name,
            'limit': '10000',
            'offset': '0'
        })
        async with self.session.post(url, headers=headers, data=data) as response:
            if response.status == 200:
                json_data = await response.json()
                return json_data.get('result', {}).get('lectures', [])
            else:
                print(f'[Lecture] {name} failed with status {response.status}')
                return []

    async def fetch_articles(self, lecture):
        url = 'https://api.everytime.kr/find/lecture/article/list'
        headers = {
            'User-Agent': 'Chrome/137.0.0.0',
            'Accept': 'application/json, text/plain, */*',
            'Content-Type': 'application/x-www-form-urlencoded',
            'Cookie': f'etsid={self.token}'
        }
        data = urllib.parse.urlencode({
            'lectureId': lecture['id'],
            'limit': '10000',
            'offset': '0',
            'sort': 'id'
        })
        async with self.session.post(url, headers=headers, data=data) as response:
            if response.status == 200:
                json_data = await response.json()
                lecture['articles'] = json_data.get('result', {}).get('articles', [])
            else:
                print(f'[Article] {lecture["id"]} failed with status {response.status}')
                lecture['articles'] = []
        return lecture

In [3]:
from tqdm.asyncio import tqdm_asyncio

async def get_everytime_reviews(professors):
    async with Everytime(everytime_token) as et:
        # Fetch lectures for each professor
        lecture_tasks = [et.fetch_lectures(name) for name in professors]
        lectures = await tqdm_asyncio.gather(*lecture_tasks)
        lectures = [lec for sublist in lectures for lec in sublist]

        # Fetch articles for each lecture
        article_tasks = [et.fetch_articles(lecture) for lecture in lectures]
        lectures_with_articles = await tqdm_asyncio.gather(*article_tasks)

        return pd.DataFrame(lectures_with_articles)

df = await get_everytime_reviews(professor_names)

100%|██████████| 6/6 [00:00<00:00, 10.20it/s]
100%|██████████| 148/148 [00:01<00:00, 74.60it/s] 


In [4]:
df.head()

Unnamed: 0,id,name,professor,rate,articles
0,2871480,데이터마이닝,김유섭,5.0,"[{'isMine': False, 'id': 8603151, 'year': 2025..."
1,2794067,인공지능기초,김유섭,4.43,"[{'isMine': False, 'id': 8432502, 'year': 2024..."
2,2199639,선형대수,김유섭,4.45,"[{'isMine': False, 'id': 5077402, 'year': 2022..."
3,2112333,"정보,컴퓨터논술교육",김유섭,0.0,[]
4,2112316,오디세이세미나1,김유섭,3.0,"[{'isMine': False, 'id': 1983545, 'year': 2020..."


In [5]:
df = df.explode('articles').reset_index(drop=True)

# JSON to table buckets
articles = pd.json_normalize(df['articles'])

# Drop the 'articles' column and concatenate the normalized review columns
df = pd.concat([df.drop(columns=['articles']), articles], axis=1)

# Rename and type conversion
df = df[['professor', 'name', 'year', 'semester', 'text']]
df = df.convert_dtypes(convert_integer=True)
df.rename(columns={'text': 'review'}, inplace=True)

In [6]:
df.dropna(inplace=True)
df.drop_duplicates(inplace=True)

In [7]:
df['professor'].value_counts()

Unnamed: 0_level_0,count
professor,Unnamed: 1_level_1
신미영,285
양은샘,245
김은주,228
이정근,217
김유섭,134
김선정,113
임종국 / 김선정 / 민경애,3


In [8]:
df['name'].value_counts()

Unnamed: 0_level_0,count
name,Unnamed: 1_level_1
컴퓨팅사고와문제해결,127
자바프로그래밍Ⅰ,105
C프로그래밍,96
자바프로그래밍Ⅱ,90
이산구조론,71
...,...
데이터베이스,1
프로그래밍어론,1
HCI,1
융합SW개론I,1


### 동명이인 및 전공 외 과목 처리

In [9]:
df = df[df['professor'].isin(professor_names)]
df = df[~df['name'].isin([
    '음성학과발음연습', '영어문법',
    # '오디세이세미나1', '오디세이세미나2(리더십과 기업가정신)', '오디세이세미나3', '오디세이세미나4',
    # '글로벌취업전략', '직무및기업탐색', '취업성공전략', '취업설계', '진로설계', '여대생커리어개발과취업전략', '해외취업및인턴준비과정'
])]

In [10]:
df['professor'].value_counts()

Unnamed: 0_level_0,count
professor,Unnamed: 1_level_1
신미영,270
양은샘,245
김은주,228
이정근,217
김유섭,134
김선정,113


In [11]:
df['professor'] = pd.Categorical(
    df['professor'],
    categories=professor_names,
    ordered=True
)

# Sort and reset index
df = df.sort_values('professor').reset_index(drop=True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['professor'] = pd.Categorical(


In [12]:
df.head()

Unnamed: 0,professor,name,year,semester,review
0,김유섭,데이터마이닝,2025,1,실습 위주로 수업하는데 정말 좋어요 다음에고 듣고 싶어요
1,김유섭,이산구조론,2021,1,매우 좋은 수업이긴하지만 공부할때 답안지가 없어서 친구와 같이 공부하는걸 선호합니디...
2,김유섭,이산구조론,2021,1,2주에 한 번씩 쪽지시험있어서 공부 미리 할 수 있고 교재에 나오는 예제 문제 풀어...
3,김유섭,이산구조론,2021,1,비대면이라 강의는 빠르게 듣고 어려운 부분만 집중적으로 다시 들을 수 있어서 좋았음...
4,김유섭,이산구조론,2021,1,화 목을 신입생이랑 기존재학생으로 나누어서 수업해서그런지 진도 꿑까지 못빼서 아쉬웠...


### Save

In [13]:
df.to_csv('everytime_reviews.csv', index=False, encoding='utf-8-sig')