https://www.inflearn.com/courses/client/api/v1/course/search?isDiscounted=false&isNew=false&pageNumber=3&pageSize=60&types=ONLINE

In [1]:
import requests
import sqlite3
import time
from datetime import datetime

today = datetime.today().strftime("%Y-%m-%d")
db_file = f'inflearn_courses-{today}.db'
db_file

'inflearn_courses-2024-07-01.db'

In [2]:


def create_database():
    conn = sqlite3.connect(db_file)
    c = conn.cursor()
    c.execute('''CREATE TABLE IF NOT EXISTS courses
                 (id INTEGER PRIMARY KEY, 
                 title TEXT, 
                 instructor TEXT, 
                 price INTEGER, 
                 regular_price INTEGER, 
                 discount_rate REAL,
                 student_count INTEGER, 
                 review_count INTEGER, 
                 rating REAL,
                 last_updated TEXT)''')
    conn.commit()
    return conn

def fetch_courses(page_number, page_size=60):
    url = f"https://www.inflearn.com/courses/client/api/v1/course/search"
    params = {
        "isDiscounted": "false",
        "isNew": "false",
        "pageNumber": page_number,
        "pageSize": page_size,
        "types": "ONLINE"
    }
    response = requests.get(url, params=params)
    return response.json()

In [3]:
data = fetch_courses(page_number=1, page_size=60)
# data["data"]["items"][0]["instructor"]["name"]
course_inst = data["data"]["items"][0]

course = course_inst["course"]
instructor = course_inst["instructor"]
print(course.get('id'),
       course.get('title'),
       instructor.get('name'),
       course.get('price'),
       'regularPrice : ', course.get('regularPrice'),
       course.get('discountRate'),
       course.get('studentCount'),
       course.get('reviewCount'),
       course.get('star'),
       datetime.now().strftime('%Y-%m-%d %H:%M:%S'))

332188 토비의 스프링 6 - 이해와 원리 토비 None regularPrice :  None None 716 11 5.0 2024-07-01 09:52:52


In [4]:
def scrape_inflearn():
    conn = create_database()
    c = conn.cursor()

    page_number = 1
    total_courses = 0

    while True:
        data = fetch_courses(page_number)
        courses = data["data"].get('items', [])

        if not courses:
            break

        for course_inst in courses:
            course = course_inst["course"]
            instructor = course_inst["instructor"]
            
            c.execute('''INSERT OR REPLACE INTO courses VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)''',
                      (course.get('id'),
                       course.get('title'),
                       instructor.get('name'),
                       course.get('price'),
                       course.get('regularPrice'),
                       course.get('discountRate'),
                       course.get('studentCount'),
                       course.get('reviewCount'),
                       course.get('star'),
                       datetime.now().strftime('%Y-%m-%d %H:%M:%S')))

        conn.commit()
        total_courses += len(courses)
        print(f"Collected {len(courses)} courses from page {page_number}. Total: {total_courses}")

        page_number += 1
        time.sleep(1)  # 서버에 부담을 주지 않기 위한 딜레이

    conn.close()
    print(f"Scraping completed. Total courses collected: {total_courses}")

if __name__ == '__main__':
    scrape_inflearn()

Collected 60 courses from page 1. Total: 60
Collected 60 courses from page 2. Total: 120
Collected 60 courses from page 3. Total: 180
Collected 60 courses from page 4. Total: 240
Collected 60 courses from page 5. Total: 300
Collected 60 courses from page 6. Total: 360
Collected 60 courses from page 7. Total: 420
Collected 60 courses from page 8. Total: 480
Collected 60 courses from page 9. Total: 540
Collected 60 courses from page 10. Total: 600
Collected 60 courses from page 11. Total: 660
Collected 60 courses from page 12. Total: 720
Collected 60 courses from page 13. Total: 780
Collected 60 courses from page 14. Total: 840
Collected 60 courses from page 15. Total: 900
Collected 60 courses from page 16. Total: 960
Collected 60 courses from page 17. Total: 1020
Collected 60 courses from page 18. Total: 1080
Collected 60 courses from page 19. Total: 1140
Collected 60 courses from page 20. Total: 1200
Collected 60 courses from page 21. Total: 1260
Collected 60 courses from page 22. Tota

In [5]:
def test_database():
    # 데이터베이스 연결
    conn = sqlite3.connect(db_file)
    cursor = conn.cursor()

    # 총 레코드 수 확인
    cursor.execute("SELECT COUNT(*) FROM courses")
    total_records = cursor.fetchone()[0]
    print(f"총 레코드 수: {total_records}")

    # 무작위로 5개 레코드 출력 (5개 미만이면 전체 출력)
    sample_size = min(5, total_records)
    cursor.execute(f"SELECT * FROM courses ORDER BY RANDOM() LIMIT {sample_size}")
    sample_records = cursor.fetchall()
    
    print("\n무작위 샘플 레코드:")
    for record in sample_records:
        print(record)

    # 각 컬럼의 기본 통계 계산 (수치형 컬럼에 대해)
    numeric_columns = ['price', 'regularPrice', 'discountRate', 'studentCount', 'reviewCount', 'star']
    # print("\n수치형 컬럼의 기본 통계:")
    # for column in numeric_columns:
    #     cursor.execute(f"SELECT MIN({column}), MAX({column}), AVG({column}) FROM courses")
    #     min_val, max_val, avg_val = cursor.fetchone()
    #     print(f"{column}: Min = {min_val}, Max = {max_val}, Avg = {avg_val:.2f}")

    # 각 컬럼의 null 값 개수 확인
    cursor.execute("PRAGMA table_info(courses)")
    columns = [column[1] for column in cursor.fetchall()]
    
    print("\n각 컬럼의 null 값 개수:")
    for column in columns:
        cursor.execute(f"SELECT COUNT(*) FROM courses WHERE {column} IS NULL")
        null_count = cursor.fetchone()[0]
        print(f"{column}: {null_count}")

    # 가장 최근에 업데이트된 레코드와 가장 오래된 레코드 확인
    cursor.execute("SELECT * FROM courses ORDER BY last_updated DESC LIMIT 1")
    print("\n가장 최근에 업데이트된 레코드:")
    print(cursor.fetchone())
    
    cursor.execute("SELECT * FROM courses ORDER BY last_updated ASC LIMIT 1")
    print("\n가장 오래된 레코드:")
    print(cursor.fetchone())

    # 연결 종료
    conn.close()

if __name__ == '__main__':
    test_database()

총 레코드 수: 3457

무작위 샘플 레코드:
(331619, '퇴근은 빠르게, 코드는 클린하게, 인텔리J를 활용한 리팩토링 시작하기(with 단축키) - 기초편', '겸손하게 익은 벼', None, None, None, 62, 3, 5.0, '2024-07-01 09:53:38')
(329541, '외워서 끝내는 네트워크 핵심이론 - 응용', '널널한 개발자', None, None, None, 3071, 84, 4.9, '2024-07-01 09:52:53')
(332258, 'Verilog FPGA Program 4 (MCU Porting, HIL-A35T)', 'alex', None, None, None, 17, 0, 0.0, '2024-07-01 09:53:31')
(334104, 'Amazing JavaScript - 입문', '장기효(캡틴판교)', None, None, None, 588, 7, 5.0, '2024-07-01 09:52:54')
(324841, '[애플 공식 교재] iOS 앱 만들기 Part1', '코더스하이', None, None, None, 273, 6, 5.0, '2024-07-01 09:53:52')

각 컬럼의 null 값 개수:
id: 0
title: 0
instructor: 0
price: 3457
regular_price: 3457
discount_rate: 3457
student_count: 0
review_count: 0
rating: 0
last_updated: 0

가장 최근에 업데이트된 레코드:
(23026, '테레비보다 재미있는 제이쿼리(jQuery) 강좌', '샵투월드', None, None, None, 4973, 84, 4.3, '2024-07-01 09:54:06')

가장 오래된 레코드:
(172458, 'IntelliJ를 시작하시는 분들을 위한 IntelliJ 가이드', '향로', None, None, None, 2291, 165, 4.7, '2024-07-01 09:52:53')


In [6]:
# 데이터베이스 연결
conn = sqlite3.connect(db_file)
cursor = conn.cursor()

# 테이블의 모든 데이터 조회
cursor.execute("SELECT * FROM courses")
rows = cursor.fetchall()

# 결과 출력
for row in rows:
    print(row)

# 데이터 개수 확인
cursor.execute("SELECT COUNT(*) FROM courses")
count = cursor.fetchone()[0]
print(f"총 {count}개의 강의가 저장되어 있습니다.")

# 연결 종료
conn.close()

(18968, '따라하면서 배우는 웹애플리케이션 만들기', 'Egoing Lee', None, None, None, 6385, 82, 5.0, '2024-07-01 09:53:47')
(19174, '생활코딩 - HTML 기초', 'Egoing Lee', None, None, None, 9217, 165, 4.9, '2024-07-01 09:53:53')
(19271, '생활코딩 - jQuery', 'Egoing Lee', None, None, None, 6227, 155, 4.9, '2024-07-01 09:53:47')
(19671, 'TED – 자기계발', '유용한IT학습', None, None, None, 1627, 44, 4.5, '2024-07-01 09:54:02')
(19687, '안드로이드 기초부터 창업까지 - 실무 강좌', 'Seoul Wiz', None, None, None, 4986, 25, 4.8, '2024-07-01 09:54:05')
(19812, '생활코딩 - MySQL', 'Egoing Lee', None, None, None, 8834, 101, 4.9, '2024-07-01 09:53:53')
(19885, '생활코딩 - PHP 기본 A 부터 Z 까지', 'Egoing Lee', None, None, None, 6114, 63, 4.8, '2024-07-01 09:53:34')
(19889, '아이폰 앱 제작기 – 원트소프트 꼬마마녀 샐리', 'Choong Ji Kim', None, None, None, 778, 10, 4.6, '2024-07-01 09:54:05')
(20117, '웹브라우저 Javascript (자바스크립트)', 'Egoing Lee', None, None, None, 9412, 170, 4.9, '2024-07-01 09:53:42')
(20275, '생활코딩 - 정규표현식', 'Egoing Lee', None, None, None, 3481, 132, 4.9, '2024-07-01 09:53:53')