In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Professor names
professor_names = ['김유섭', '김은주', '이정근', '양은샘', '신미영', '김선정']

# Everytime session
everytime_token = ''

### Fetch lecture reviews from Everytime

In [None]:
import asyncio
import aiohttp
import urllib.parse

class Everytime:
    def __init__(self, token):
        self.token = token
        self.session = None

    async def __aenter__(self):
        self.session = aiohttp.ClientSession()
        return self

    async def __aexit__(self, exc_type, exc, tb):
        await self.session.close()

    async def fetch_lectures(self, name):
        url = 'https://api.everytime.kr/find/lecture/list/keyword'
        headers = {
            'User-Agent': 'Chrome/137.0.0.0',
            'Accept': 'application/json, text/plain, */*',
            'Content-Type': 'application/x-www-form-urlencoded',
            'Cookie': f'etsid={self.token}'
        }
        data = urllib.parse.urlencode({
            'campusId': '0',
            'field': 'professor',
            'keyword': name,
            'limit': '10000',
            'offset': '0'
        })
        async with self.session.post(url, headers=headers, data=data) as response:
            if response.status == 200:
                json_data = await response.json()
                return json_data.get('result', {}).get('lectures', [])
            else:
                print(f'[Lecture] {name} failed with status {response.status}')
                return []

    async def fetch_articles(self, lecture):
        url = 'https://api.everytime.kr/find/lecture/article/list'
        headers = {
            'User-Agent': 'Chrome/137.0.0.0',
            'Accept': 'application/json, text/plain, */*',
            'Content-Type': 'application/x-www-form-urlencoded',
            'Cookie': f'etsid={self.token}'
        }
        data = urllib.parse.urlencode({
            'lectureId': lecture['id'],
            'limit': '10000',
            'offset': '0',
            'sort': 'id'
        })
        async with self.session.post(url, headers=headers, data=data) as response:
            if response.status == 200:
                json_data = await response.json()
                lecture['articles'] = json_data.get('result', {}).get('articles', [])
            else:
                print(f'[Article] {lecture["id"]} failed with status {response.status}')
                lecture['articles'] = []
        return lecture

In [None]:
from tqdm.asyncio import tqdm_asyncio

async def get_everytime_reviews(professors):
    async with Everytime(everytime_token) as et:
        # Fetch lectures for each professor
        lecture_tasks = [et.fetch_lectures(name) for name in professors]
        lectures = await tqdm_asyncio.gather(*lecture_tasks)
        lectures = [lec for sublist in lectures for lec in sublist]

        # Fetch articles for each lecture
        article_tasks = [et.fetch_articles(lecture) for lecture in lectures]
        lectures_with_articles = await tqdm_asyncio.gather(*article_tasks)

        return pd.DataFrame(lectures_with_articles)

df = await get_everytime_reviews(professor_names)

In [None]:
df.head()

In [None]:
df = df.explode('articles').reset_index(drop=True)

# JSON to table buckets
articles = pd.json_normalize(df['articles'])

# Drop the 'articles' column and concatenate the normalized review columns
df = pd.concat([df.drop(columns=['articles']), articles], axis=1)

# Rename and type conversion
df = df[['professor', 'name', 'year', 'semester', 'text']]
df = df.convert_dtypes(convert_integer=True)
df.rename(columns={'text': 'review'}, inplace=True)

In [None]:
df.dropna(inplace=True)
df.drop_duplicates(inplace=True)

In [None]:
df['professor'].value_counts()

In [None]:
df['name'].value_counts()

### 동명이인 및 전공 외 과목 처리

In [None]:
df = df[df['professor'].isin(professor_names)]
df = df[~df['name'].isin([
    '음성학과발음연습', '영어문법',
    # '오디세이세미나1', '오디세이세미나2(리더십과 기업가정신)', '오디세이세미나3', '오디세이세미나4',
    # '글로벌취업전략', '직무및기업탐색', '취업성공전략', '취업설계', '진로설계', '여대생커리어개발과취업전략', '해외취업및인턴준비과정'
])]

In [None]:
df['professor'].value_counts()

In [None]:
df['professor'] = pd.Categorical(
    df['professor'],
    categories=professor_names,
    ordered=True
)

# Sort and reset index
df = df.sort_values('professor').reset_index(drop=True)

In [None]:
df.head()

### Save

In [None]:
df.to_csv('everytime_reviews.csv', index=False, encoding='utf-8-sig')